public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Mike Marciniszyn
	<mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,
	Sebastian Sanchez
	<sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 02/27] IB/hfi1: Process qp wait list in IRQ thread periodically
Date: Wed, 08 Feb 2017 05:26:02 -0800	[thread overview]
Message-ID: <20170208132601.16442.69915.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20170208132142.16442.69329.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>

From: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

In the event that the IRQ thread is extremely busy, the
processing of an rcd wait list can be delayed by quite
a bit until the IRQ thread completes its work.

The QP reset reference count wait can then appear to be stuck, thus
causing up a QP destroy to emit the hung task diagnostic.

Fix by processing the qp wait list periodically from the thread.  The
interval is a multiple (currently 4) of the MAX_PKT_RECV.

Also, reduce some of the excessive inlining.   The guidelines
are per packet is ok inline, otherwise the choice is based on
likelyhood of execution.

Reviewed-by: Sebastian Sanchez <sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/hw/hfi1/driver.c |  121 +++++++++++++++++++----------------
 1 files changed, 67 insertions(+), 54 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 4fbaee6..29db673 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -100,6 +100,11 @@
  * MAX_PKT_RCV is the max # if packets processed per receive interrupt.
  */
 #define MAX_PKT_RECV 64
+/*
+ * MAX_PKT_THREAD_RCV is the max # of packets processed before
+ * the qp_wait_list queue is flushed.
+ */
+#define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
 #define EGR_HEAD_UPDATE_THRESHOLD 16
 
 struct hfi1_ib_stats hfi1_stats;
@@ -259,7 +264,7 @@ int hfi1_count_units(int *npresentp, int *nupp)
  * allowed size ranges for the respective type and, optionally,
  * return the proper encoding.
  */
-inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
 {
 	if (unlikely(!PAGE_ALIGNED(size)))
 		return 0;
@@ -654,24 +659,68 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 	}
 }
 
-static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd)
+{
+	struct rvt_qp *qp, *nqp;
+
+	/*
+	 * Iterate over all QPs waiting to respond.
+	 * The list won't change since the IRQ is only run on one CPU.
+	 */
+	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
+		list_del_init(&qp->rspwait);
+		if (qp->r_flags & RVT_R_RSP_NAK) {
+			qp->r_flags &= ~RVT_R_RSP_NAK;
+			hfi1_send_rc_ack(rcd, qp, 0);
+		}
+		if (qp->r_flags & RVT_R_RSP_SEND) {
+			unsigned long flags;
+
+			qp->r_flags &= ~RVT_R_RSP_SEND;
+			spin_lock_irqsave(&qp->s_lock, flags);
+			if (ib_rvt_state_ops[qp->state] &
+					RVT_PROCESS_OR_FLUSH_SEND)
+				hfi1_schedule_send(qp);
+			spin_unlock_irqrestore(&qp->s_lock, flags);
+		}
+		rvt_put_qp(qp);
+	}
+}
+
+static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
+{
+	if (thread) {
+		if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
+			/* allow defered processing */
+			process_rcv_qp_work(packet->rcd);
+		cond_resched();
+		return RCV_PKT_OK;
+	} else {
+		this_cpu_inc(*packet->rcd->dd->rcv_limit);
+		return RCV_PKT_LIMIT;
+	}
+}
+
+static inline int check_max_packet(struct hfi1_packet *packet, int thread)
 {
 	int ret = RCV_PKT_OK;
 
+	if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
+		ret = max_packet_exceeded(packet, thread);
+	return ret;
+}
+
+static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+{
+	int ret;
+
 	/* Set up for the next packet */
 	packet->rhqoff += packet->rsize;
 	if (packet->rhqoff >= packet->maxcnt)
 		packet->rhqoff = 0;
 
 	packet->numpkt++;
-	if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
-		if (thread) {
-			cond_resched();
-		} else {
-			ret = RCV_PKT_LIMIT;
-			this_cpu_inc(*packet->rcd->dd->rcv_limit);
-		}
-	}
+	ret = check_max_packet(packet, thread);
 
 	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 				     packet->rcd->dd->rhf_offset;
@@ -682,7 +731,7 @@ static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 
 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 {
-	int ret = RCV_PKT_OK;
+	int ret;
 
 	packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
 					 packet->rhf_addr);
@@ -723,14 +772,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 	if (packet->rhqoff >= packet->maxcnt)
 		packet->rhqoff = 0;
 
-	if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
-		if (thread) {
-			cond_resched();
-		} else {
-			ret = RCV_PKT_LIMIT;
-			this_cpu_inc(*packet->rcd->dd->rcv_limit);
-		}
-	}
+	ret = check_max_packet(packet, thread);
 
 	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 				      packet->rcd->dd->rhf_offset;
@@ -767,38 +809,6 @@ static inline void finish_packet(struct hfi1_packet *packet)
 		       packet->etail, rcv_intr_dynamic, packet->numpkt);
 }
 
-static inline void process_rcv_qp_work(struct hfi1_packet *packet)
-{
-	struct hfi1_ctxtdata *rcd;
-	struct rvt_qp *qp, *nqp;
-
-	rcd = packet->rcd;
-	rcd->head = packet->rhqoff;
-
-	/*
-	 * Iterate over all QPs waiting to respond.
-	 * The list won't change since the IRQ is only run on one CPU.
-	 */
-	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
-		list_del_init(&qp->rspwait);
-		if (qp->r_flags & RVT_R_RSP_NAK) {
-			qp->r_flags &= ~RVT_R_RSP_NAK;
-			hfi1_send_rc_ack(rcd, qp, 0);
-		}
-		if (qp->r_flags & RVT_R_RSP_SEND) {
-			unsigned long flags;
-
-			qp->r_flags &= ~RVT_R_RSP_SEND;
-			spin_lock_irqsave(&qp->s_lock, flags);
-			if (ib_rvt_state_ops[qp->state] &
-					RVT_PROCESS_OR_FLUSH_SEND)
-				hfi1_schedule_send(qp);
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-		}
-		rvt_put_qp(qp);
-	}
-}
-
 /*
  * Handle receive interrupts when using the no dma rtail option.
  */
@@ -826,7 +836,8 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 			last = RCV_PKT_DONE;
 		process_rcv_update(last, &packet);
 	}
-	process_rcv_qp_work(&packet);
+	process_rcv_qp_work(rcd);
+	rcd->head = packet.rhqoff;
 bail:
 	finish_packet(&packet);
 	return last;
@@ -854,7 +865,8 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 			last = RCV_PKT_DONE;
 		process_rcv_update(last, &packet);
 	}
-	process_rcv_qp_work(&packet);
+	process_rcv_qp_work(rcd);
+	rcd->head = packet.rhqoff;
 bail:
 	finish_packet(&packet);
 	return last;
@@ -1024,7 +1036,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 		process_rcv_update(last, &packet);
 	}
 
-	process_rcv_qp_work(&packet);
+	process_rcv_qp_work(rcd);
+	rcd->head = packet.rhqoff;
 
 bail:
 	/*

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-02-08 13:26 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-08 13:25 [PATCH 00/27] IB/hfi1,qib,rdmavt: Patches for 4.11 Dennis Dalessandro
     [not found] ` <20170208132142.16442.69329.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-08 13:25   ` [PATCH 01/27] IB/hfi1: Correct defered count after processing qp_wait_list Dennis Dalessandro
2017-02-08 13:26   ` Dennis Dalessandro [this message]
2017-02-08 13:26   ` [PATCH 03/27] IB/hfi1: Ensure read of producer s_head is correct Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 04/27] IB/hfi1: Use static CTLE with Preset 6 for integrated HFIs Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 05/27] IB/hfi1: Correct error calldown locking Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 06/27] IB/hfi1: Access hfi1_ibport through rcd pointer Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 07/27] IB/rdmavt: Use per-CPU reference count for MRs Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 08/27] IB/hfi1: Allocate context data on memory node Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 09/27] IB/hfi1: Add additional fields to qp_stats Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 10/27] IB/hfi1: Reduce oversized fields in struct hfi1_packet Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 11/27] IB/hfi1: Check upper-case EFI variables Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 12/27] IB/hfi1, qib, rdmavt: Move two IB event functions into rdmavt Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 13/27] IB/hfi1, qib, rdmavt: Move AETH credit " Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 14/27] IB/rdmavt: Adding timer logic to rdmavt Dennis Dalessandro
     [not found]     ` <20170208132712.16442.57028.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:33       ` Leon Romanovsky
2017-02-08 13:27   ` [PATCH 15/27] IB/hfi1: Use new rdmavt timers Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 16/27] IB/qib: " Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 17/27] IB/hfi1, rdmavt: Update copy_sge to use boolean arguments Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 18/27] IB/hfi1, rdmavt: Move SGE state helper routines into rdmavt Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 19/27] IB/qib: Updates to use rdmavt's SGE helper routines Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 20/27] IB/rdmavt, IB/hfi1, IB/qib: Correct ack count for passive (RTR) QPs Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 21/27] IB/hfi1: Modify logging frequency of DCC errors Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 22/27] IB/hfi1: Add receive fault injection feature Dennis Dalessandro
     [not found]     ` <20170208132800.16442.94549.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:42       ` Leon Romanovsky
     [not found]         ` <20170212174205.GI14015-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-02-14 21:51           ` Doug Ledford
     [not found]             ` <1487109065.86943.86.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-02-28 17:55               ` Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 23/27] IB/hfi1: Add transmit " Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 24/27] IB/hfi1: Do not set physical link state if DC is in the shutdown state Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 25/27] IB/hfi1: Add rvt_rnr_tbl_to_usec function Dennis Dalessandro
     [not found]     ` <20170208132818.16442.38634.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:48       ` Leon Romanovsky
2017-02-08 13:28   ` [PATCH 26/27] IB/hfi1, qib, rdmavt: Move AETH defines to rdma/ib_hdrs.h Dennis Dalessandro
     [not found]     ` <20170208132824.16442.61753.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:47       ` Leon Romanovsky
2017-02-08 13:28   ` [PATCH 27/27] IB/hfi1: Code reuse with memdup_copy Dennis Dalessandro
2017-02-19 13:47   ` [PATCH 00/27] IB/hfi1,qib,rdmavt: Patches for 4.11 Doug Ledford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170208132601.16442.69915.stgit@scvm10.sc.intel.com \
    --to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox