public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] RDMA/nes: multiple disconnect cause crash during AE handling
@ 2010-02-12 19:58 Faisal Latif
  2010-02-19 19:41 ` Roland Dreier
  0 siblings, 1 reply; 2+ messages in thread
From: Faisal Latif @ 2010-02-12 19:58 UTC (permalink / raw)
  To: Roland Dreier, -c, linux-rdma-u79uwXL29TY76Z2rM5mHXA

There is double disconnect during AE processing causing crash. While fixing the
crash, also simplify the AE handling code.

Signed-off-by: Faisal Latif <faisal.latif-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/hw/nes/nes_hw.c |   88 +++++++++++------------------------
 1 files changed, 28 insertions(+), 60 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index b1c2cbb..310cc7c 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -3352,8 +3352,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 	u16 async_event_id;
 	u8 tcp_state;
 	u8 iwarp_state;
-	int must_disconn = 1;
-	int must_terminate = 0;
 	struct ib_event ibevent;
 
 	nes_debug(NES_DBG_AEQ, "\n");
@@ -3367,6 +3365,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 		BUG_ON(!context);
 	}
 
+	/* context is nesqp unless async_event_id == CQ ERROR */
+	nesqp = (struct nes_qp *)(unsigned long)context;
 	async_event_id = (u16)aeq_info;
 	tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
 	iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
@@ -3378,8 +3378,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 
 	switch (async_event_id) {
 		case NES_AEQE_AEID_LLP_FIN_RECEIVED:
-			nesqp = (struct nes_qp *)(unsigned long)context;
-
 			if (nesqp->term_flags)
 				return; /* Ignore it, wait for close complete */
 
@@ -3394,79 +3392,48 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 						async_event_id, nesqp->last_aeq, tcp_state);
 			}
 
-			if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
-					(nesqp->ibqp_state != IB_QPS_RTS)) {
-				/* FIN Received but tcp state or IB state moved on,
-						should expect a	close complete */
-				return;
-			}
-
+			break;
 		case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
-			nesqp = (struct nes_qp *)(unsigned long)context;
 			if (nesqp->term_flags) {
 				nes_terminate_done(nesqp, 0);
 				return;
 			}
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0);
+			nes_cm_disconn(nesqp);
+			break;
 
-		case NES_AEQE_AEID_LLP_CONNECTION_RESET:
 		case NES_AEQE_AEID_RESET_SENT:
-			nesqp = (struct nes_qp *)(unsigned long)context;
-			if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
-				tcp_state = NES_AEQE_TCP_STATE_CLOSED;
-			}
+			tcp_state = NES_AEQE_TCP_STATE_CLOSED;
 			spin_lock_irqsave(&nesqp->lock, flags);
 			nesqp->hw_iwarp_state = iwarp_state;
 			nesqp->hw_tcp_state = tcp_state;
 			nesqp->last_aeq = async_event_id;
-
-			if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
-					(tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
-				nesqp->hte_added = 0;
-				next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
-			}
-
-			if ((nesqp->ibqp_state == IB_QPS_RTS) &&
-					((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
-					(async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
-				switch (nesqp->hw_iwarp_state) {
-					case NES_AEQE_IWARP_STATE_RTS:
-						next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
-						nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
-						break;
-					case NES_AEQE_IWARP_STATE_TERMINATE:
-						must_disconn = 0; /* terminate path takes care of disconn */
-						if (nesqp->term_flags == 0)
-							must_terminate = 1;
-						break;
-				}
-			} else {
-				if (async_event_id ==  NES_AEQE_AEID_LLP_FIN_RECEIVED) {
-					/* FIN Received but ib state not RTS,
-							close complete will be on its way */
-					must_disconn = 0;
-				}
-			}
+			nesqp->hte_added = 0;
 			spin_unlock_irqrestore(&nesqp->lock, flags);
+			next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
+			nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+			nes_cm_disconn(nesqp);
+			break;
 
-			if (must_terminate)
-				nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
-			else if (must_disconn) {
-				if (next_iwarp_state) {
-					nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n",
-						  nesqp->hwqp.qp_id, next_iwarp_state);
-					nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
-				}
-				nes_cm_disconn(nesqp);
-			}
+		case NES_AEQE_AEID_LLP_CONNECTION_RESET:
+			if (atomic_read(&nesqp->close_timer_started))
+				return;
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_cm_disconn(nesqp);
 			break;
 
 		case NES_AEQE_AEID_TERMINATE_SENT:
-			nesqp = (struct nes_qp *)(unsigned long)context;
 			nes_terminate_send_fin(nesdev, nesqp, aeqe);
 			break;
 
 		case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
-			nesqp = (struct nes_qp *)(unsigned long)context;
 			nes_terminate_received(nesdev, nesqp, aeqe);
 			break;
 
@@ -3480,7 +3447,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 		case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
 		case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
 		case NES_AEQE_AEID_AMP_TO_WRAP:
-			nesqp = (struct nes_qp *)(unsigned long)context;
+			printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n",
+					nesqp->hwqp.qp_id, async_event_id);
 			nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
 			break;
 
@@ -3488,7 +3456,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 		case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
 		case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
 		case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
-			nesqp = (struct nes_qp *)(unsigned long)context;
 			if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
 				aeq_info &= 0xffff0000;
 				aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
@@ -3530,7 +3497,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 		case NES_AEQE_AEID_STAG_ZERO_INVALID:
 		case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
 		case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
-			nesqp = (struct nes_qp *)(unsigned long)context;
+			printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
+					nesqp->hwqp.qp_id, async_event_id);
 			nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
 			break;
 
-- 
1.5.3.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] RDMA/nes: multiple disconnect cause crash during AE handling
  2010-02-12 19:58 [PATCH] RDMA/nes: multiple disconnect cause crash during AE handling Faisal Latif
@ 2010-02-19 19:41 ` Roland Dreier
  0 siblings, 0 replies; 2+ messages in thread
From: Roland Dreier @ 2010-02-19 19:41 UTC (permalink / raw)
  To: Faisal Latif; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

thanks, applied all 3 patches.
-- 
Roland Dreier  <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2010-02-19 19:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-12 19:58 [PATCH] RDMA/nes: multiple disconnect cause crash during AE handling Faisal Latif
2010-02-19 19:41 ` Roland Dreier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox