All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH][REPOST] SVCRDMA: Add refs to RDMA transport close path
@ 2008-03-06 20:01 Tom Tucker
       [not found] ` <1204833673.21546.31.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Tom Tucker @ 2008-03-06 20:01 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Bruce: 

This patch is for 2.6.25 if possible. The previous version of this patch had a warning
that I missed in my "make && make modules_install && make install" test cycle. 

This patch has been tested with multiple concurrent mounts of both IB and iWARP. The
connect/destroy test was as follows:

#  while true; do \
   mount.nfs vic1-10g:/dsfs /mnt/iwarp -i -o rdma,port=2050,vers=3;\
   iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/iwarp/foo; \
   umount /mnt/iwarp; \
   sleep 1; \
done

A similar test is run concurrently for IB as follows:

#  while true; do \
   mount.nfs vic1-ib:/dsfs /mnt/ib -i -o rdma,port=2050,vers=3;\
   iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/ib/bar; \
   umount /mnt/ib; \
   sleep 1; \
done

I also ran Connectathon for grins.

Sorry for the noise on the warning...
    
    The close code was re-factored so that transport close in error and close
    are performed by the same code.
    
    Additional transport references were added as follows:
    - A reference when on the DTO Q to avoid having the transport
      deleted while queued for I/O.
    - A reference while there is a QP able to generate events.
    - A reference until the DISCONNECTED event is received on the CM ID
    
    Finally, the QP and CQ event handler messages were changed to printk's to
    make it easier for system administrators to diagnose RDMA transport
    problems.
    
    Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  113 ++++++++++++++++++------------
 1 files changed, 68 insertions(+), 45 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f09444c..7340abb 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -54,7 +54,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 					int flags);
 static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
 static void svc_rdma_release_rqst(struct svc_rqst *);
-static void rdma_destroy_xprt(struct svcxprt_rdma *xprt);
 static void dto_tasklet_func(unsigned long data);
 static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
@@ -174,8 +173,9 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 static void cq_event_handler(struct ib_event *event, void *context)
 {
 	struct svc_xprt *xprt = context;
-	dprintk("svcrdma: received CQ event id=%d, context=%p\n",
-		event->event, context);
+	printk(KERN_INFO
+	       "svcrdma: received CQ event id=%d, context=%p\n",
+	       event->event, context);
 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
 }
 
@@ -190,8 +190,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
 	case IB_EVENT_COMM_EST:
 	case IB_EVENT_SQ_DRAINED:
 	case IB_EVENT_QP_LAST_WQE_REACHED:
-		dprintk("svcrdma: QP event %d received for QP=%p\n",
-			event->event, event->element.qp);
+		printk(KERN_INFO
+		       "svcrdma: QP event %d received for QP=%p\n",
+		       event->event, event->element.qp);
 		break;
 	/* These are considered fatal events */
 	case IB_EVENT_PATH_MIG_ERR:
@@ -200,9 +201,10 @@ static void qp_event_handler(struct ib_event *event, void *context)
 	case IB_EVENT_QP_ACCESS_ERR:
 	case IB_EVENT_DEVICE_FATAL:
 	default:
-		dprintk("svcrdma: QP ERROR event %d received for QP=%p, "
-			"closing transport\n",
-			event->event, event->element.qp);
+		printk(KERN_INFO
+		       "svcrdma: QP ERROR event %d received for QP=%p, "
+		       "closing transport\n",
+		       event->event, event->element.qp);
 		set_bit(XPT_CLOSE, &xprt->xpt_flags);
 		break;
 	}
@@ -247,6 +249,7 @@ static void dto_tasklet_func(unsigned long data)
 			sq_cq_reap(xprt);
 		}
 
+		svc_xprt_put(&xprt->sc_xprt);
 		spin_lock_irqsave(&dto_lock, flags);
 	}
 	spin_unlock_irqrestore(&dto_lock, flags);
@@ -275,8 +278,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
 	 * add it
 	 */
 	spin_lock_irqsave(&dto_lock, flags);
-	if (list_empty(&xprt->sc_dto_q))
+	if (list_empty(&xprt->sc_dto_q)) {
+		svc_xprt_get(&xprt->sc_xprt);
 		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
+	}
 	spin_unlock_irqrestore(&dto_lock, flags);
 
 	/* Tasklet does all the work to avoid irqsave locks. */
@@ -386,8 +391,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
 	 * add it
 	 */
 	spin_lock_irqsave(&dto_lock, flags);
-	if (list_empty(&xprt->sc_dto_q))
+	if (list_empty(&xprt->sc_dto_q)) {
+		svc_xprt_get(&xprt->sc_xprt);
 		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
+	}
 	spin_unlock_irqrestore(&dto_lock, flags);
 
 	/* Tasklet does all the work to avoid irqsave locks. */
@@ -611,6 +618,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
 	switch (event->event) {
 	case RDMA_CM_EVENT_ESTABLISHED:
 		/* Accept complete */
+		svc_xprt_get(xprt);
 		dprintk("svcrdma: Connection completed on DTO xprt=%p, "
 			"cm_id=%p\n", xprt, cma_id);
 		clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
@@ -661,15 +669,15 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 
 	listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
 	if (IS_ERR(listen_id)) {
-		rdma_destroy_xprt(cma_xprt);
+		svc_xprt_put(&cma_xprt->sc_xprt);
 		dprintk("svcrdma: rdma_create_id failed = %ld\n",
 			PTR_ERR(listen_id));
 		return (void *)listen_id;
 	}
 	ret = rdma_bind_addr(listen_id, sa);
 	if (ret) {
-		rdma_destroy_xprt(cma_xprt);
 		rdma_destroy_id(listen_id);
+		svc_xprt_put(&cma_xprt->sc_xprt);
 		dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
 		return ERR_PTR(ret);
 	}
@@ -678,8 +686,9 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
 	if (ret) {
 		rdma_destroy_id(listen_id);
-		rdma_destroy_xprt(cma_xprt);
+		svc_xprt_put(&cma_xprt->sc_xprt);
 		dprintk("svcrdma: rdma_listen failed = %d\n", ret);
+		return ERR_PTR(ret);
 	}
 
 	/*
@@ -820,6 +829,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
 		newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
 	}
+	svc_xprt_get(&newxprt->sc_xprt);
 	newxprt->sc_qp = newxprt->sc_cm_id->qp;
 
 	/* Register all of physical memory */
@@ -891,8 +901,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 
  errout:
 	dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
+	/* Take a reference in case the DTO handler runs */
+	svc_xprt_get(&newxprt->sc_xprt);
+	if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) {
+		ib_destroy_qp(newxprt->sc_qp);
+		svc_xprt_put(&newxprt->sc_xprt);
+	}
 	rdma_destroy_id(newxprt->sc_cm_id);
-	rdma_destroy_xprt(newxprt);
+	/* This call to put will destroy the transport */
+	svc_xprt_put(&newxprt->sc_xprt);
 	return NULL;
 }
 
@@ -919,54 +936,60 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
 	rqstp->rq_xprt_ctxt = NULL;
 }
 
-/* Disable data ready events for this connection */
+/*
+ * When connected, an svc_xprt has at least three references:
+ *
+ * - A reference held by the QP. We still hold that here because this
+ *   code deletes the QP and puts the reference.
+ *
+ * - A reference held by the cm_id between the ESTABLISHED and
+ *   DISCONNECTED events. If the remote peer disconnected first, this
+ *   reference could be gone.
+ *
+ * - A reference held by the svc_recv code that called this function
+ *   as part of close processing.
+ *
+ * At a minimum two references should still be held.
+ */
 static void svc_rdma_detach(struct svc_xprt *xprt)
 {
 	struct svcxprt_rdma *rdma =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-	unsigned long flags;
-
 	dprintk("svc: svc_rdma_detach(%p)\n", xprt);
-	/*
-	 * Shutdown the connection. This will ensure we don't get any
-	 * more events from the provider.
-	 */
+
+	/* Disconnect and flush posted WQE */
 	rdma_disconnect(rdma->sc_cm_id);
-	rdma_destroy_id(rdma->sc_cm_id);
 
-	/* We may already be on the DTO list */
-	spin_lock_irqsave(&dto_lock, flags);
-	if (!list_empty(&rdma->sc_dto_q))
-		list_del_init(&rdma->sc_dto_q);
-	spin_unlock_irqrestore(&dto_lock, flags);
+	/* Destroy the QP if present (not a listener) */
+	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
+		ib_destroy_qp(rdma->sc_qp);
+		svc_xprt_put(xprt);
+	}
+
+	/* Destroy the CM ID */
+	rdma_destroy_id(rdma->sc_cm_id);
 }
 
 static void svc_rdma_free(struct svc_xprt *xprt)
 {
 	struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
 	dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
-	rdma_destroy_xprt(rdma);
-	kfree(rdma);
-}
-
-static void rdma_destroy_xprt(struct svcxprt_rdma *xprt)
-{
-	if (xprt->sc_qp && !IS_ERR(xprt->sc_qp))
-		ib_destroy_qp(xprt->sc_qp);
-
-	if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq))
-		ib_destroy_cq(xprt->sc_sq_cq);
+	/* We should only be called from kref_put */
+	BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0);
+	if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
+		ib_destroy_cq(rdma->sc_sq_cq);
 
-	if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq))
-		ib_destroy_cq(xprt->sc_rq_cq);
+	if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
+		ib_destroy_cq(rdma->sc_rq_cq);
 
-	if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr))
-		ib_dereg_mr(xprt->sc_phys_mr);
+	if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
+		ib_dereg_mr(rdma->sc_phys_mr);
 
-	if (xprt->sc_pd && !IS_ERR(xprt->sc_pd))
-		ib_dealloc_pd(xprt->sc_pd);
+	if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
+		ib_dealloc_pd(rdma->sc_pd);
 
-	destroy_context_cache(xprt->sc_ctxt_head);
+	destroy_context_cache(rdma->sc_ctxt_head);
+	kfree(rdma);
 }
 
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH][REPOST] SVCRDMA: Add refs to RDMA transport close path
       [not found] ` <1204833673.21546.31.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
@ 2008-03-10 20:31   ` J. Bruce Fields
  2008-03-10 21:01     ` Tom Tucker
  0 siblings, 1 reply; 3+ messages in thread
From: J. Bruce Fields @ 2008-03-10 20:31 UTC (permalink / raw)
  To: Tom Tucker; +Cc: linux-nfs

On Thu, Mar 06, 2008 at 02:01:13PM -0600, Tom Tucker wrote:
> Bruce: 
> 
> This patch is for 2.6.25 if possible. The previous version of this patch had a warning
> that I missed in my "make && make modules_install && make install" test cycle. 
> 
> This patch has been tested with multiple concurrent mounts of both IB and iWARP. The
> connect/destroy test was as follows:
> 
> #  while true; do \
>    mount.nfs vic1-10g:/dsfs /mnt/iwarp -i -o rdma,port=2050,vers=3;\
>    iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/iwarp/foo; \
>    umount /mnt/iwarp; \
>    sleep 1; \
> done
> 
> A similar test is run concurrently for IB as follows:
> 
> #  while true; do \
>    mount.nfs vic1-ib:/dsfs /mnt/ib -i -o rdma,port=2050,vers=3;\
>    iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/ib/bar; \
>    umount /mnt/ib; \
>    sleep 1; \
> done
> 
> I also ran Connectathon for grins.

Thanks, and sorry for the delay!

> 
> Sorry for the noise on the warning...
>     
>     The close code was re-factored so that transport close in error and close
>     are performed by the same code.
>     
>     Additional transport references were added as follows:
>     - A reference when on the DTO Q to avoid having the transport
>       deleted while queued for I/O.
>     - A reference while there is a QP able to generate events.
>     - A reference until the DISCONNECTED event is received on the CM ID
>     
>     Finally, the QP and CQ event handler messages were changed to printk's to
>     make it easier for system administrators to diagnose RDMA transport
>     problems.

Could I get just a couple superficial changes, and then I'll pass this
along for 2.6.25?:

	- We should keep the bugfix separated out from the rest.  (So
	  the printk changes, at the very least, should probably go in
	  later patch.)
	- Could we get just a short summary of the actual bug symptoms?
	  (E.g., "Multiple concurrent rdma mounts and unmounts could
	  result in a null-dereference in nfs_rdma_foo()...".)

--b.

>     
>     Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>  net/sunrpc/xprtrdma/svc_rdma_transport.c |  113 ++++++++++++++++++------------
>  1 files changed, 68 insertions(+), 45 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> index f09444c..7340abb 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> @@ -54,7 +54,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
>  					int flags);
>  static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
>  static void svc_rdma_release_rqst(struct svc_rqst *);
> -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt);
>  static void dto_tasklet_func(unsigned long data);
>  static void svc_rdma_detach(struct svc_xprt *xprt);
>  static void svc_rdma_free(struct svc_xprt *xprt);
> @@ -174,8 +173,9 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
>  static void cq_event_handler(struct ib_event *event, void *context)
>  {
>  	struct svc_xprt *xprt = context;
> -	dprintk("svcrdma: received CQ event id=%d, context=%p\n",
> -		event->event, context);
> +	printk(KERN_INFO
> +	       "svcrdma: received CQ event id=%d, context=%p\n",
> +	       event->event, context);
>  	set_bit(XPT_CLOSE, &xprt->xpt_flags);
>  }
>  
> @@ -190,8 +190,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
>  	case IB_EVENT_COMM_EST:
>  	case IB_EVENT_SQ_DRAINED:
>  	case IB_EVENT_QP_LAST_WQE_REACHED:
> -		dprintk("svcrdma: QP event %d received for QP=%p\n",
> -			event->event, event->element.qp);
> +		printk(KERN_INFO
> +		       "svcrdma: QP event %d received for QP=%p\n",
> +		       event->event, event->element.qp);
>  		break;
>  	/* These are considered fatal events */
>  	case IB_EVENT_PATH_MIG_ERR:
> @@ -200,9 +201,10 @@ static void qp_event_handler(struct ib_event *event, void *context)
>  	case IB_EVENT_QP_ACCESS_ERR:
>  	case IB_EVENT_DEVICE_FATAL:
>  	default:
> -		dprintk("svcrdma: QP ERROR event %d received for QP=%p, "
> -			"closing transport\n",
> -			event->event, event->element.qp);
> +		printk(KERN_INFO
> +		       "svcrdma: QP ERROR event %d received for QP=%p, "
> +		       "closing transport\n",
> +		       event->event, event->element.qp);
>  		set_bit(XPT_CLOSE, &xprt->xpt_flags);
>  		break;
>  	}
> @@ -247,6 +249,7 @@ static void dto_tasklet_func(unsigned long data)
>  			sq_cq_reap(xprt);
>  		}
>  
> +		svc_xprt_put(&xprt->sc_xprt);
>  		spin_lock_irqsave(&dto_lock, flags);
>  	}
>  	spin_unlock_irqrestore(&dto_lock, flags);
> @@ -275,8 +278,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
>  	 * add it
>  	 */
>  	spin_lock_irqsave(&dto_lock, flags);
> -	if (list_empty(&xprt->sc_dto_q))
> +	if (list_empty(&xprt->sc_dto_q)) {
> +		svc_xprt_get(&xprt->sc_xprt);
>  		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
> +	}
>  	spin_unlock_irqrestore(&dto_lock, flags);
>  
>  	/* Tasklet does all the work to avoid irqsave locks. */
> @@ -386,8 +391,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
>  	 * add it
>  	 */
>  	spin_lock_irqsave(&dto_lock, flags);
> -	if (list_empty(&xprt->sc_dto_q))
> +	if (list_empty(&xprt->sc_dto_q)) {
> +		svc_xprt_get(&xprt->sc_xprt);
>  		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
> +	}
>  	spin_unlock_irqrestore(&dto_lock, flags);
>  
>  	/* Tasklet does all the work to avoid irqsave locks. */
> @@ -611,6 +618,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
>  	switch (event->event) {
>  	case RDMA_CM_EVENT_ESTABLISHED:
>  		/* Accept complete */
> +		svc_xprt_get(xprt);
>  		dprintk("svcrdma: Connection completed on DTO xprt=%p, "
>  			"cm_id=%p\n", xprt, cma_id);
>  		clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
> @@ -661,15 +669,15 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
>  
>  	listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
>  	if (IS_ERR(listen_id)) {
> -		rdma_destroy_xprt(cma_xprt);
> +		svc_xprt_put(&cma_xprt->sc_xprt);
>  		dprintk("svcrdma: rdma_create_id failed = %ld\n",
>  			PTR_ERR(listen_id));
>  		return (void *)listen_id;
>  	}
>  	ret = rdma_bind_addr(listen_id, sa);
>  	if (ret) {
> -		rdma_destroy_xprt(cma_xprt);
>  		rdma_destroy_id(listen_id);
> +		svc_xprt_put(&cma_xprt->sc_xprt);
>  		dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
>  		return ERR_PTR(ret);
>  	}
> @@ -678,8 +686,9 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
>  	ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
>  	if (ret) {
>  		rdma_destroy_id(listen_id);
> -		rdma_destroy_xprt(cma_xprt);
> +		svc_xprt_put(&cma_xprt->sc_xprt);
>  		dprintk("svcrdma: rdma_listen failed = %d\n", ret);
> +		return ERR_PTR(ret);
>  	}
>  
>  	/*
> @@ -820,6 +829,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
>  		newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
>  		newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
>  	}
> +	svc_xprt_get(&newxprt->sc_xprt);
>  	newxprt->sc_qp = newxprt->sc_cm_id->qp;
>  
>  	/* Register all of physical memory */
> @@ -891,8 +901,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
>  
>   errout:
>  	dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
> +	/* Take a reference in case the DTO handler runs */
> +	svc_xprt_get(&newxprt->sc_xprt);
> +	if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) {
> +		ib_destroy_qp(newxprt->sc_qp);
> +		svc_xprt_put(&newxprt->sc_xprt);
> +	}
>  	rdma_destroy_id(newxprt->sc_cm_id);
> -	rdma_destroy_xprt(newxprt);
> +	/* This call to put will destroy the transport */
> +	svc_xprt_put(&newxprt->sc_xprt);
>  	return NULL;
>  }
>  
> @@ -919,54 +936,60 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
>  	rqstp->rq_xprt_ctxt = NULL;
>  }
>  
> -/* Disable data ready events for this connection */
> +/*
> + * When connected, an svc_xprt has at least three references:
> + *
> + * - A reference held by the QP. We still hold that here because this
> + *   code deletes the QP and puts the reference.
> + *
> + * - A reference held by the cm_id between the ESTABLISHED and
> + *   DISCONNECTED events. If the remote peer disconnected first, this
> + *   reference could be gone.
> + *
> + * - A reference held by the svc_recv code that called this function
> + *   as part of close processing.
> + *
> + * At a minimum two references should still be held.
> + */
>  static void svc_rdma_detach(struct svc_xprt *xprt)
>  {
>  	struct svcxprt_rdma *rdma =
>  		container_of(xprt, struct svcxprt_rdma, sc_xprt);
> -	unsigned long flags;
> -
>  	dprintk("svc: svc_rdma_detach(%p)\n", xprt);
> -	/*
> -	 * Shutdown the connection. This will ensure we don't get any
> -	 * more events from the provider.
> -	 */
> +
> +	/* Disconnect and flush posted WQE */
>  	rdma_disconnect(rdma->sc_cm_id);
> -	rdma_destroy_id(rdma->sc_cm_id);
>  
> -	/* We may already be on the DTO list */
> -	spin_lock_irqsave(&dto_lock, flags);
> -	if (!list_empty(&rdma->sc_dto_q))
> -		list_del_init(&rdma->sc_dto_q);
> -	spin_unlock_irqrestore(&dto_lock, flags);
> +	/* Destroy the QP if present (not a listener) */
> +	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
> +		ib_destroy_qp(rdma->sc_qp);
> +		svc_xprt_put(xprt);
> +	}
> +
> +	/* Destroy the CM ID */
> +	rdma_destroy_id(rdma->sc_cm_id);
>  }
>  
>  static void svc_rdma_free(struct svc_xprt *xprt)
>  {
>  	struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
>  	dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
> -	rdma_destroy_xprt(rdma);
> -	kfree(rdma);
> -}
> -
> -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt)
> -{
> -	if (xprt->sc_qp && !IS_ERR(xprt->sc_qp))
> -		ib_destroy_qp(xprt->sc_qp);
> -
> -	if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq))
> -		ib_destroy_cq(xprt->sc_sq_cq);
> +	/* We should only be called from kref_put */
> +	BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0);
> +	if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
> +		ib_destroy_cq(rdma->sc_sq_cq);
>  
> -	if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq))
> -		ib_destroy_cq(xprt->sc_rq_cq);
> +	if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
> +		ib_destroy_cq(rdma->sc_rq_cq);
>  
> -	if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr))
> -		ib_dereg_mr(xprt->sc_phys_mr);
> +	if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
> +		ib_dereg_mr(rdma->sc_phys_mr);
>  
> -	if (xprt->sc_pd && !IS_ERR(xprt->sc_pd))
> -		ib_dealloc_pd(xprt->sc_pd);
> +	if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
> +		ib_dealloc_pd(rdma->sc_pd);
>  
> -	destroy_context_cache(xprt->sc_ctxt_head);
> +	destroy_context_cache(rdma->sc_ctxt_head);
> +	kfree(rdma);
>  }
>  
>  static int svc_rdma_has_wspace(struct svc_xprt *xprt)
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH][REPOST] SVCRDMA: Add refs to RDMA transport close path
  2008-03-10 20:31   ` J. Bruce Fields
@ 2008-03-10 21:01     ` Tom Tucker
  0 siblings, 0 replies; 3+ messages in thread
From: Tom Tucker @ 2008-03-10 21:01 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: linux-nfs


On Mon, 2008-03-10 at 16:31 -0400, J. Bruce Fields wrote:
> On Thu, Mar 06, 2008 at 02:01:13PM -0600, Tom Tucker wrote:
> > Bruce: 
> > 
> > This patch is for 2.6.25 if possible. The previous version of this patch had a warning
> > that I missed in my "make && make modules_install && make install" test cycle. 
> > 
> > This patch has been tested with multiple concurrent mounts of both IB and iWARP. The
> > connect/destroy test was as follows:
> > 
> > #  while true; do \
> >    mount.nfs vic1-10g:/dsfs /mnt/iwarp -i -o rdma,port=2050,vers=3;\
> >    iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/iwarp/foo; \
> >    umount /mnt/iwarp; \
> >    sleep 1; \
> > done
> > 
> > A similar test is run concurrently for IB as follows:
> > 
> > #  while true; do \
> >    mount.nfs vic1-ib:/dsfs /mnt/ib -i -o rdma,port=2050,vers=3;\
> >    iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/ib/bar; \
> >    umount /mnt/ib; \
> >    sleep 1; \
> > done
> > 
> > I also ran Connectathon for grins.
> 
> Thanks, and sorry for the delay!

np, happy vacation!

> 
> > 
> > Sorry for the noise on the warning...
> >     
> >     The close code was re-factored so that transport close in error and close
> >     are performed by the same code.
> >     
> >     Additional transport references were added as follows:
> >     - A reference when on the DTO Q to avoid having the transport
> >       deleted while queued for I/O.
> >     - A reference while there is a QP able to generate events.
> >     - A reference until the DISCONNECTED event is received on the CM ID
> >     
> >     Finally, the QP and CQ event handler messages were changed to printk's to
> >     make it easier for system administrators to diagnose RDMA transport
> >     problems.
> 
> Could I get just a couple superficial changes, and then I'll pass this
> along for 2.6.25?:

Sure, I'll pull out the printk..


> 
> 	- We should keep the bugfix separated out from the rest.  (So
> 	  the printk changes, at the very least, should probably go in
> 	  later patch.)
> 	- Could we get just a short summary of the actual bug symptoms?
> 	  (E.g., "Multiple concurrent rdma mounts and unmounts could
> 	  result in a null-dereference in nfs_rdma_foo()...".)
> 

... and I'll add the following (embarrassing) description. 

"RDMA connection shutdown on an SMP machine can cause a kernel crash due
to the transport shutdown path racing with the I/O tasklet."


> --b.
> 
> >     
> >     Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> > ---
> >  net/sunrpc/xprtrdma/svc_rdma_transport.c |  113 ++++++++++++++++++------------
> >  1 files changed, 68 insertions(+), 45 deletions(-)
> > 
> > diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> > index f09444c..7340abb 100644
> > --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> > +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> > @@ -54,7 +54,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
> >  					int flags);
> >  static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
> >  static void svc_rdma_release_rqst(struct svc_rqst *);
> > -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt);
> >  static void dto_tasklet_func(unsigned long data);
> >  static void svc_rdma_detach(struct svc_xprt *xprt);
> >  static void svc_rdma_free(struct svc_xprt *xprt);
> > @@ -174,8 +173,9 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
> >  static void cq_event_handler(struct ib_event *event, void *context)
> >  {
> >  	struct svc_xprt *xprt = context;
> > -	dprintk("svcrdma: received CQ event id=%d, context=%p\n",
> > -		event->event, context);
> > +	printk(KERN_INFO
> > +	       "svcrdma: received CQ event id=%d, context=%p\n",
> > +	       event->event, context);
> >  	set_bit(XPT_CLOSE, &xprt->xpt_flags);
> >  }
> >  
> > @@ -190,8 +190,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
> >  	case IB_EVENT_COMM_EST:
> >  	case IB_EVENT_SQ_DRAINED:
> >  	case IB_EVENT_QP_LAST_WQE_REACHED:
> > -		dprintk("svcrdma: QP event %d received for QP=%p\n",
> > -			event->event, event->element.qp);
> > +		printk(KERN_INFO
> > +		       "svcrdma: QP event %d received for QP=%p\n",
> > +		       event->event, event->element.qp);
> >  		break;
> >  	/* These are considered fatal events */
> >  	case IB_EVENT_PATH_MIG_ERR:
> > @@ -200,9 +201,10 @@ static void qp_event_handler(struct ib_event *event, void *context)
> >  	case IB_EVENT_QP_ACCESS_ERR:
> >  	case IB_EVENT_DEVICE_FATAL:
> >  	default:
> > -		dprintk("svcrdma: QP ERROR event %d received for QP=%p, "
> > -			"closing transport\n",
> > -			event->event, event->element.qp);
> > +		printk(KERN_INFO
> > +		       "svcrdma: QP ERROR event %d received for QP=%p, "
> > +		       "closing transport\n",
> > +		       event->event, event->element.qp);
> >  		set_bit(XPT_CLOSE, &xprt->xpt_flags);
> >  		break;
> >  	}
> > @@ -247,6 +249,7 @@ static void dto_tasklet_func(unsigned long data)
> >  			sq_cq_reap(xprt);
> >  		}
> >  
> > +		svc_xprt_put(&xprt->sc_xprt);
> >  		spin_lock_irqsave(&dto_lock, flags);
> >  	}
> >  	spin_unlock_irqrestore(&dto_lock, flags);
> > @@ -275,8 +278,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
> >  	 * add it
> >  	 */
> >  	spin_lock_irqsave(&dto_lock, flags);
> > -	if (list_empty(&xprt->sc_dto_q))
> > +	if (list_empty(&xprt->sc_dto_q)) {
> > +		svc_xprt_get(&xprt->sc_xprt);
> >  		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
> > +	}
> >  	spin_unlock_irqrestore(&dto_lock, flags);
> >  
> >  	/* Tasklet does all the work to avoid irqsave locks. */
> > @@ -386,8 +391,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
> >  	 * add it
> >  	 */
> >  	spin_lock_irqsave(&dto_lock, flags);
> > -	if (list_empty(&xprt->sc_dto_q))
> > +	if (list_empty(&xprt->sc_dto_q)) {
> > +		svc_xprt_get(&xprt->sc_xprt);
> >  		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
> > +	}
> >  	spin_unlock_irqrestore(&dto_lock, flags);
> >  
> >  	/* Tasklet does all the work to avoid irqsave locks. */
> > @@ -611,6 +618,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
> >  	switch (event->event) {
> >  	case RDMA_CM_EVENT_ESTABLISHED:
> >  		/* Accept complete */
> > +		svc_xprt_get(xprt);
> >  		dprintk("svcrdma: Connection completed on DTO xprt=%p, "
> >  			"cm_id=%p\n", xprt, cma_id);
> >  		clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
> > @@ -661,15 +669,15 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
> >  
> >  	listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
> >  	if (IS_ERR(listen_id)) {
> > -		rdma_destroy_xprt(cma_xprt);
> > +		svc_xprt_put(&cma_xprt->sc_xprt);
> >  		dprintk("svcrdma: rdma_create_id failed = %ld\n",
> >  			PTR_ERR(listen_id));
> >  		return (void *)listen_id;
> >  	}
> >  	ret = rdma_bind_addr(listen_id, sa);
> >  	if (ret) {
> > -		rdma_destroy_xprt(cma_xprt);
> >  		rdma_destroy_id(listen_id);
> > +		svc_xprt_put(&cma_xprt->sc_xprt);
> >  		dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
> >  		return ERR_PTR(ret);
> >  	}
> > @@ -678,8 +686,9 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
> >  	ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
> >  	if (ret) {
> >  		rdma_destroy_id(listen_id);
> > -		rdma_destroy_xprt(cma_xprt);
> > +		svc_xprt_put(&cma_xprt->sc_xprt);
> >  		dprintk("svcrdma: rdma_listen failed = %d\n", ret);
> > +		return ERR_PTR(ret);
> >  	}
> >  
> >  	/*
> > @@ -820,6 +829,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
> >  		newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
> >  		newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
> >  	}
> > +	svc_xprt_get(&newxprt->sc_xprt);
> >  	newxprt->sc_qp = newxprt->sc_cm_id->qp;
> >  
> >  	/* Register all of physical memory */
> > @@ -891,8 +901,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
> >  
> >   errout:
> >  	dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
> > +	/* Take a reference in case the DTO handler runs */
> > +	svc_xprt_get(&newxprt->sc_xprt);
> > +	if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) {
> > +		ib_destroy_qp(newxprt->sc_qp);
> > +		svc_xprt_put(&newxprt->sc_xprt);
> > +	}
> >  	rdma_destroy_id(newxprt->sc_cm_id);
> > -	rdma_destroy_xprt(newxprt);
> > +	/* This call to put will destroy the transport */
> > +	svc_xprt_put(&newxprt->sc_xprt);
> >  	return NULL;
> >  }
> >  
> > @@ -919,54 +936,60 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
> >  	rqstp->rq_xprt_ctxt = NULL;
> >  }
> >  
> > -/* Disable data ready events for this connection */
> > +/*
> > + * When connected, an svc_xprt has at least three references:
> > + *
> > + * - A reference held by the QP. We still hold that here because this
> > + *   code deletes the QP and puts the reference.
> > + *
> > + * - A reference held by the cm_id between the ESTABLISHED and
> > + *   DISCONNECTED events. If the remote peer disconnected first, this
> > + *   reference could be gone.
> > + *
> > + * - A reference held by the svc_recv code that called this function
> > + *   as part of close processing.
> > + *
> > + * At a minimum two references should still be held.
> > + */
> >  static void svc_rdma_detach(struct svc_xprt *xprt)
> >  {
> >  	struct svcxprt_rdma *rdma =
> >  		container_of(xprt, struct svcxprt_rdma, sc_xprt);
> > -	unsigned long flags;
> > -
> >  	dprintk("svc: svc_rdma_detach(%p)\n", xprt);
> > -	/*
> > -	 * Shutdown the connection. This will ensure we don't get any
> > -	 * more events from the provider.
> > -	 */
> > +
> > +	/* Disconnect and flush posted WQE */
> >  	rdma_disconnect(rdma->sc_cm_id);
> > -	rdma_destroy_id(rdma->sc_cm_id);
> >  
> > -	/* We may already be on the DTO list */
> > -	spin_lock_irqsave(&dto_lock, flags);
> > -	if (!list_empty(&rdma->sc_dto_q))
> > -		list_del_init(&rdma->sc_dto_q);
> > -	spin_unlock_irqrestore(&dto_lock, flags);
> > +	/* Destroy the QP if present (not a listener) */
> > +	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
> > +		ib_destroy_qp(rdma->sc_qp);
> > +		svc_xprt_put(xprt);
> > +	}
> > +
> > +	/* Destroy the CM ID */
> > +	rdma_destroy_id(rdma->sc_cm_id);
> >  }
> >  
> >  static void svc_rdma_free(struct svc_xprt *xprt)
> >  {
> >  	struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
> >  	dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
> > -	rdma_destroy_xprt(rdma);
> > -	kfree(rdma);
> > -}
> > -
> > -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt)
> > -{
> > -	if (xprt->sc_qp && !IS_ERR(xprt->sc_qp))
> > -		ib_destroy_qp(xprt->sc_qp);
> > -
> > -	if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq))
> > -		ib_destroy_cq(xprt->sc_sq_cq);
> > +	/* We should only be called from kref_put */
> > +	BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0);
> > +	if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
> > +		ib_destroy_cq(rdma->sc_sq_cq);
> >  
> > -	if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq))
> > -		ib_destroy_cq(xprt->sc_rq_cq);
> > +	if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
> > +		ib_destroy_cq(rdma->sc_rq_cq);
> >  
> > -	if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr))
> > -		ib_dereg_mr(xprt->sc_phys_mr);
> > +	if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
> > +		ib_dereg_mr(rdma->sc_phys_mr);
> >  
> > -	if (xprt->sc_pd && !IS_ERR(xprt->sc_pd))
> > -		ib_dealloc_pd(xprt->sc_pd);
> > +	if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
> > +		ib_dealloc_pd(rdma->sc_pd);
> >  
> > -	destroy_context_cache(xprt->sc_ctxt_head);
> > +	destroy_context_cache(rdma->sc_ctxt_head);
> > +	kfree(rdma);
> >  }
> >  
> >  static int svc_rdma_has_wspace(struct svc_xprt *xprt)
> > 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-03-10 20:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-06 20:01 [PATCH][REPOST] SVCRDMA: Add refs to RDMA transport close path Tom Tucker
     [not found] ` <1204833673.21546.31.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
2008-03-10 20:31   ` J. Bruce Fields
2008-03-10 21:01     ` Tom Tucker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.