public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] RDMA/cxgb4: don't declare wr_wait objects on the stack.
@ 2011-05-13 18:37 Steve Wise
       [not found] ` <20110513183727.32157.8873.stgit-T4OLL4TyM9aNDNWfRnPdfg@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Steve Wise @ 2011-05-13 18:37 UTC (permalink / raw)
  To: roland-DgEjT+Ai2ygdnm+yROfE0A; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

There exists a race condition when using c4iw_wr_wait objects that are
declared on the stack.  This was being done in a few places where we are
sending work requests to the FW and awaiting replies, but we don't have
an endpoint structure with an embedded c4iw_wr_wait struct.  So the code
was allocating it locally on the stack.  Bad design. The race is this:

thread on cpuX declares the wait object on the stack, then posts
a firmware WR with that wait object ptr as the cookie to be returned
in the WR reply.  This thread will proceed to block in wait_event_timeout()
but before it does:

An interrupt runs on cpuY with the WR reply.  fw6_msg() handles this
and calls c4iw_wake_up().  c4iw_wake_up() sets the condition variable in
the c4iw_wr_wait object to TRUE and will call wake_up(), but before it
calls wake_up():

The thread on cpuX calls c4iw_wait_for_reply(), which calls
wait_event_timeout().  The wait_event_timeout() macro checks the condition
variable and returns immediately since it is TRUE.  So this thread
never blocks/sleeps. The function then returns effectively deallocating
the c4iw_wr_wait object that was on the stack.

So at this point cpuY has a pointer to the c4iw_wr_wait object that is no
longer valid.  Further its pointing to a stack frame that might now be in
use by some other context/thread.  So cpuY continues execution and calls
wake_up() on a ptr to a wait object that as been effectively deallocated.

This race, when it hits, can cause a crash in wake_up(), which I've seen
under heavy stress. It can also corrupt the referenced stack which can
cause any number of failures.

The fix:

Allocate temporary c4iw_wr_wait objects and keep references on them so that it
is never freed until both execution paths are done using it.

Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---

 drivers/infiniband/hw/cxgb4/cm.c       |   12 +++++++++++-
 drivers/infiniband/hw/cxgb4/cq.c       |   33 ++++++++++++++++++++++----------
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |   29 +++++++++++++++++++++++++++-
 drivers/infiniband/hw/cxgb4/mem.c      |   10 ++++++----
 drivers/infiniband/hw/cxgb4/qp.c       |   16 +++++++++++-----
 5 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index d7ee70f..fcbc803 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -255,6 +255,13 @@ static void *alloc_ep(int size, gfp_t gfp)
 	return epc;
 }
 
+void _c4iw_free_tmp_wr_wait(struct kref *kref)
+{
+	struct c4iw_wr_wait *wr_waitp;
+	wr_waitp = container_of(kref, struct c4iw_wr_wait, kref);
+	kfree(wr_waitp);
+}
+
 void _c4iw_free_ep(struct kref *kref)
 {
 	struct c4iw_ep *ep;
@@ -2284,8 +2291,11 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
 		ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
 		wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
 		PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
-		if (wr_waitp)
+		if (wr_waitp) {
 			c4iw_wake_up(wr_waitp, ret ? -ret : 0);
+			if (wr_waitp->tmp)
+				c4iw_put_tmp_wr_wait(wr_waitp);
+		}
 		kfree_skb(skb);
 		break;
 	case 2:
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 8d8f8ad..4fbe82e 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -38,7 +38,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	struct fw_ri_res_wr *res_wr;
 	struct fw_ri_res *res;
 	int wr_len;
-	struct c4iw_wr_wait wr_wait;
+	struct c4iw_wr_wait *wr_waitp;
 	struct sk_buff *skb;
 	int ret;
 
@@ -46,6 +46,13 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	skb = alloc_skb(wr_len, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
+
+	wr_waitp = c4iw_get_tmp_wr_wait();
+	if (!wr_waitp) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+		
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
 	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
@@ -55,16 +62,15 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 			V_FW_RI_RES_WR_NRES(1) |
 			FW_WR_COMPL(1));
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-	res_wr->cookie = (unsigned long) &wr_wait;
+	res_wr->cookie = (unsigned long) wr_waitp;
 	res = res_wr->res;
 	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
 	res->u.cq.op = FW_RI_RES_OP_RESET;
 	res->u.cq.iqid = cpu_to_be32(cq->cqid);
 
-	c4iw_init_wr_wait(&wr_wait);
 	ret = c4iw_ofld_send(rdev, skb);
 	if (!ret) {
-		ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+		ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__);
 	}
 
 	kfree(cq->sw_queue);
@@ -82,7 +88,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	struct fw_ri_res *res;
 	int wr_len;
 	int user = (uctx != &rdev->uctx);
-	struct c4iw_wr_wait wr_wait;
+	struct c4iw_wr_wait *wr_waitp;
 	int ret;
 	struct sk_buff *skb;
 
@@ -116,6 +122,13 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 		ret = -ENOMEM;
 		goto err4;
 	}
+
+	wr_waitp = c4iw_get_tmp_wr_wait();
+	if (!wr_waitp) {
+		ret = -ENOMEM;
+		goto err5;
+	}
+
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
 	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
@@ -125,7 +138,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 			V_FW_RI_RES_WR_NRES(1) |
 			FW_WR_COMPL(1));
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-	res_wr->cookie = (unsigned long) &wr_wait;
+	res_wr->cookie = (unsigned long) wr_waitp;
 	res = res_wr->res;
 	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
 	res->u.cq.op = FW_RI_RES_OP_WRITE;
@@ -144,13 +157,11 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	res->u.cq.iqsize = cpu_to_be16(cq->size);
 	res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
 
-	c4iw_init_wr_wait(&wr_wait);
-
 	ret = c4iw_ofld_send(rdev, skb);
 	if (ret)
 		goto err4;
-	PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait);
-	ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+	PDBG("%s wait_event wr_wait %p\n", __func__, wr_waitp);
+	ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__);
 	if (ret)
 		goto err4;
 
@@ -163,6 +174,8 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 		cq->ugts &= PAGE_MASK;
 	}
 	return 0;
+err5:
+	kfree_skb(skb);
 err4:
 	dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue,
 			  dma_unmap_addr(cq, mapping));
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 35d2a5d..63d9ca8 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -139,15 +139,39 @@ struct c4iw_wr_wait {
 	wait_queue_head_t wait;
 	unsigned long status;
 	int ret;
+	struct kref kref;
+	int tmp;
 };
 
 static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp)
 {
+	wr_waitp->tmp = 0;
 	wr_waitp->ret = 0;
 	wr_waitp->status = 0;
 	init_waitqueue_head(&wr_waitp->wait);
 }
 
+static inline struct c4iw_wr_wait *c4iw_get_tmp_wr_wait(void)
+{
+	struct c4iw_wr_wait *wr_waitp;
+
+	wr_waitp = kmalloc(sizeof *wr_waitp, GFP_KERNEL);
+	if (wr_waitp) {
+		c4iw_init_wr_wait(wr_waitp);
+		kref_init(&wr_waitp->kref);
+		kref_get(&wr_waitp->kref);
+		wr_waitp->tmp = 1;
+	}
+	return wr_waitp;
+}
+
+void _c4iw_free_tmp_wr_wait(struct kref *kref);
+
+static inline void c4iw_put_tmp_wr_wait(struct c4iw_wr_wait *wr_waitp)
+{
+	kref_put(&wr_waitp->kref, _c4iw_free_tmp_wr_wait);
+}
+
 static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret)
 {
 	wr_waitp->ret = ret;
@@ -180,7 +204,10 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
 	if (wr_waitp->ret)
 		PDBG("%s: FW reply %d tid %u qpid %u\n",
 		     pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
-	return wr_waitp->ret;
+	ret = wr_waitp->ret;
+	if (wr_waitp->tmp)
+		c4iw_put_tmp_wr_wait(wr_waitp);
+	return ret;
 }
 
 struct c4iw_dev {
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 273ffe4..685928d 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -46,12 +46,14 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
 	struct ulptx_idata *sc;
 	u8 wr_len, *to_dp, *from_dp;
 	int copy_len, num_wqe, i, ret = 0;
-	struct c4iw_wr_wait wr_wait;
+	struct c4iw_wr_wait *wr_waitp;
 
 	addr &= 0x7FFFFFF;
 	PDBG("%s addr 0x%x len %u\n", __func__, addr, len);
 	num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE);
-	c4iw_init_wr_wait(&wr_wait);
+	wr_waitp = c4iw_get_tmp_wr_wait();
+	if (!wr_waitp)
+		return -ENOMEM;
 	for (i = 0; i < num_wqe; i++) {
 
 		copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE :
@@ -71,7 +73,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
 		if (i == (num_wqe-1)) {
 			req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
 						    FW_WR_COMPL(1));
-			req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait;
+			req->wr.wr_lo = (__force __be64)(unsigned long) wr_waitp;
 		} else
 			req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR));
 		req->wr.wr_mid = cpu_to_be32(
@@ -103,7 +105,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
 		len -= C4IW_MAX_INLINE_SIZE;
 	}
 
-	ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+	ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index a1824a5..d295a20 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -115,7 +115,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	struct fw_ri_res_wr *res_wr;
 	struct fw_ri_res *res;
 	int wr_len;
-	struct c4iw_wr_wait wr_wait;
+	struct c4iw_wr_wait *wr_waitp;
 	struct sk_buff *skb;
 	int ret;
 	int eqsize;
@@ -191,6 +191,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		ret = -ENOMEM;
 		goto err7;
 	}
+	wr_waitp = c4iw_get_tmp_wr_wait();
+	if (!wr_waitp) {
+		ret = -ENOMEM;
+		goto err8;
+	}
+		
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
 	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
@@ -200,7 +206,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 			V_FW_RI_RES_WR_NRES(2) |
 			FW_WR_COMPL(1));
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-	res_wr->cookie = (unsigned long) &wr_wait;
+	res_wr->cookie = (unsigned long) wr_waitp;
 	res = res_wr->res;
 	res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
 	res->u.sqrq.op = FW_RI_RES_OP_WRITE;
@@ -250,12 +256,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
 	res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
 
-	c4iw_init_wr_wait(&wr_wait);
-
 	ret = c4iw_ofld_send(rdev, skb);
 	if (ret)
 		goto err7;
-	ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
+	ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, wq->sq.qid, __func__);
 	if (ret)
 		goto err7;
 
@@ -264,6 +268,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	     (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
 
 	return 0;
+err8:
+	kfree_skb(skb);
 err7:
 	dma_free_coherent(&(rdev->lldi.pdev->dev),
 			  wq->rq.memsize, wq->rq.queue,

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-05-16 16:47 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-13 18:37 [PATCH] RDMA/cxgb4: don't declare wr_wait objects on the stack Steve Wise
     [not found] ` <20110513183727.32157.8873.stgit-T4OLL4TyM9aNDNWfRnPdfg@public.gmane.org>
2011-05-16 15:18   ` Steve Wise
     [not found]     ` <4DD14036.9050704-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2011-05-16 15:53       ` Roland Dreier
     [not found]         ` <BANLkTimGzzo1xDTvApAVxvv4C8WcYuCdUA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-05-16 15:56           ` Steve Wise
     [not found]             ` <4DD1493D.3060401-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2011-05-16 16:05               ` Steve Wise
     [not found]                 ` <4DD14B2D.9090104-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2011-05-16 16:47                   ` Steve Wise

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox