From: "J. Bruce Fields" <bfields@fieldses.org>
To: Tom Tucker <tom@opengridcomputing.com>
Cc: linux-nfs <linux-nfs@vger.kernel.org>
Subject: Re: [PATCH][REPOST] SVCRDMA: Add refs to RDMA transport close path
Date: Mon, 10 Mar 2008 16:31:29 -0400 [thread overview]
Message-ID: <20080310203129.GD13660@fieldses.org> (raw)
In-Reply-To: <1204833673.21546.31.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
On Thu, Mar 06, 2008 at 02:01:13PM -0600, Tom Tucker wrote:
> Bruce:
>
> This patch is for 2.6.25 if possible. The previous version of this patch had a warning
> that I missed in my "make && make modules_install && make install" test cycle.
>
> This patch has been tested with multiple concurrent mounts of both IB and iWARP. The
> connect/destroy test was as follows:
>
> # while true; do \
> mount.nfs vic1-10g:/dsfs /mnt/iwarp -i -o rdma,port=2050,vers=3;\
> iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/iwarp/foo; \
> umount /mnt/iwarp; \
> sleep 1; \
> done
>
> A similar test is run concurrently for IB as follows:
>
> # while true; do \
> mount.nfs vic1-ib:/dsfs /mnt/ib -i -o rdma,port=2050,vers=3;\
> iozone -aec -i 0 -i 1 -s 10M -q 128k -y 32k -f /mnt/ib/bar; \
> umount /mnt/ib; \
> sleep 1; \
> done
>
> I also ran Connectathon for grins.
Thanks, and sorry for the delay!
>
> Sorry for the noise on the warning...
>
> The close code was re-factored so that transport close in error and close
> are performed by the same code.
>
> Additional transport references were added as follows:
> - A reference when on the DTO Q to avoid having the transport
> deleted while queued for I/O.
> - A reference while there is a QP able to generate events.
> - A reference until the DISCONNECTED event is received on the CM ID
>
> Finally, the QP and CQ event handler messages were changed to printk's to
> make it easier for system administrators to diagnose RDMA transport
> problems.
Could I get just a couple superficial changes, and then I'll pass this
along for 2.6.25?:
- We should keep the bugfix separated out from the rest. (So
the printk changes, at the very least, should probably go in
later patch.)
- Could we get just a short summary of the actual bug symptoms?
(E.g., "Multiple concurrent rdma mounts and unmounts could
result in a null-dereference in nfs_rdma_foo()...".)
--b.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
> net/sunrpc/xprtrdma/svc_rdma_transport.c | 113 ++++++++++++++++++------------
> 1 files changed, 68 insertions(+), 45 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> index f09444c..7340abb 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> @@ -54,7 +54,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
> int flags);
> static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
> static void svc_rdma_release_rqst(struct svc_rqst *);
> -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt);
> static void dto_tasklet_func(unsigned long data);
> static void svc_rdma_detach(struct svc_xprt *xprt);
> static void svc_rdma_free(struct svc_xprt *xprt);
> @@ -174,8 +173,9 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
> static void cq_event_handler(struct ib_event *event, void *context)
> {
> struct svc_xprt *xprt = context;
> - dprintk("svcrdma: received CQ event id=%d, context=%p\n",
> - event->event, context);
> + printk(KERN_INFO
> + "svcrdma: received CQ event id=%d, context=%p\n",
> + event->event, context);
> set_bit(XPT_CLOSE, &xprt->xpt_flags);
> }
>
> @@ -190,8 +190,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
> case IB_EVENT_COMM_EST:
> case IB_EVENT_SQ_DRAINED:
> case IB_EVENT_QP_LAST_WQE_REACHED:
> - dprintk("svcrdma: QP event %d received for QP=%p\n",
> - event->event, event->element.qp);
> + printk(KERN_INFO
> + "svcrdma: QP event %d received for QP=%p\n",
> + event->event, event->element.qp);
> break;
> /* These are considered fatal events */
> case IB_EVENT_PATH_MIG_ERR:
> @@ -200,9 +201,10 @@ static void qp_event_handler(struct ib_event *event, void *context)
> case IB_EVENT_QP_ACCESS_ERR:
> case IB_EVENT_DEVICE_FATAL:
> default:
> - dprintk("svcrdma: QP ERROR event %d received for QP=%p, "
> - "closing transport\n",
> - event->event, event->element.qp);
> + printk(KERN_INFO
> + "svcrdma: QP ERROR event %d received for QP=%p, "
> + "closing transport\n",
> + event->event, event->element.qp);
> set_bit(XPT_CLOSE, &xprt->xpt_flags);
> break;
> }
> @@ -247,6 +249,7 @@ static void dto_tasklet_func(unsigned long data)
> sq_cq_reap(xprt);
> }
>
> + svc_xprt_put(&xprt->sc_xprt);
> spin_lock_irqsave(&dto_lock, flags);
> }
> spin_unlock_irqrestore(&dto_lock, flags);
> @@ -275,8 +278,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
> * add it
> */
> spin_lock_irqsave(&dto_lock, flags);
> - if (list_empty(&xprt->sc_dto_q))
> + if (list_empty(&xprt->sc_dto_q)) {
> + svc_xprt_get(&xprt->sc_xprt);
> list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
> + }
> spin_unlock_irqrestore(&dto_lock, flags);
>
> /* Tasklet does all the work to avoid irqsave locks. */
> @@ -386,8 +391,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
> * add it
> */
> spin_lock_irqsave(&dto_lock, flags);
> - if (list_empty(&xprt->sc_dto_q))
> + if (list_empty(&xprt->sc_dto_q)) {
> + svc_xprt_get(&xprt->sc_xprt);
> list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
> + }
> spin_unlock_irqrestore(&dto_lock, flags);
>
> /* Tasklet does all the work to avoid irqsave locks. */
> @@ -611,6 +618,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
> switch (event->event) {
> case RDMA_CM_EVENT_ESTABLISHED:
> /* Accept complete */
> + svc_xprt_get(xprt);
> dprintk("svcrdma: Connection completed on DTO xprt=%p, "
> "cm_id=%p\n", xprt, cma_id);
> clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
> @@ -661,15 +669,15 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
>
> listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
> if (IS_ERR(listen_id)) {
> - rdma_destroy_xprt(cma_xprt);
> + svc_xprt_put(&cma_xprt->sc_xprt);
> dprintk("svcrdma: rdma_create_id failed = %ld\n",
> PTR_ERR(listen_id));
> return (void *)listen_id;
> }
> ret = rdma_bind_addr(listen_id, sa);
> if (ret) {
> - rdma_destroy_xprt(cma_xprt);
> rdma_destroy_id(listen_id);
> + svc_xprt_put(&cma_xprt->sc_xprt);
> dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
> return ERR_PTR(ret);
> }
> @@ -678,8 +686,9 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
> ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
> if (ret) {
> rdma_destroy_id(listen_id);
> - rdma_destroy_xprt(cma_xprt);
> + svc_xprt_put(&cma_xprt->sc_xprt);
> dprintk("svcrdma: rdma_listen failed = %d\n", ret);
> + return ERR_PTR(ret);
> }
>
> /*
> @@ -820,6 +829,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
> newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
> newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
> }
> + svc_xprt_get(&newxprt->sc_xprt);
> newxprt->sc_qp = newxprt->sc_cm_id->qp;
>
> /* Register all of physical memory */
> @@ -891,8 +901,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
>
> errout:
> dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
> + /* Take a reference in case the DTO handler runs */
> + svc_xprt_get(&newxprt->sc_xprt);
> + if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) {
> + ib_destroy_qp(newxprt->sc_qp);
> + svc_xprt_put(&newxprt->sc_xprt);
> + }
> rdma_destroy_id(newxprt->sc_cm_id);
> - rdma_destroy_xprt(newxprt);
> + /* This call to put will destroy the transport */
> + svc_xprt_put(&newxprt->sc_xprt);
> return NULL;
> }
>
> @@ -919,54 +936,60 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
> rqstp->rq_xprt_ctxt = NULL;
> }
>
> -/* Disable data ready events for this connection */
> +/*
> + * When connected, an svc_xprt has at least three references:
> + *
> + * - A reference held by the QP. We still hold that here because this
> + * code deletes the QP and puts the reference.
> + *
> + * - A reference held by the cm_id between the ESTABLISHED and
> + * DISCONNECTED events. If the remote peer disconnected first, this
> + * reference could be gone.
> + *
> + * - A reference held by the svc_recv code that called this function
> + * as part of close processing.
> + *
> + * At a minimum two references should still be held.
> + */
> static void svc_rdma_detach(struct svc_xprt *xprt)
> {
> struct svcxprt_rdma *rdma =
> container_of(xprt, struct svcxprt_rdma, sc_xprt);
> - unsigned long flags;
> -
> dprintk("svc: svc_rdma_detach(%p)\n", xprt);
> - /*
> - * Shutdown the connection. This will ensure we don't get any
> - * more events from the provider.
> - */
> +
> + /* Disconnect and flush posted WQE */
> rdma_disconnect(rdma->sc_cm_id);
> - rdma_destroy_id(rdma->sc_cm_id);
>
> - /* We may already be on the DTO list */
> - spin_lock_irqsave(&dto_lock, flags);
> - if (!list_empty(&rdma->sc_dto_q))
> - list_del_init(&rdma->sc_dto_q);
> - spin_unlock_irqrestore(&dto_lock, flags);
> + /* Destroy the QP if present (not a listener) */
> + if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
> + ib_destroy_qp(rdma->sc_qp);
> + svc_xprt_put(xprt);
> + }
> +
> + /* Destroy the CM ID */
> + rdma_destroy_id(rdma->sc_cm_id);
> }
>
> static void svc_rdma_free(struct svc_xprt *xprt)
> {
> struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
> dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
> - rdma_destroy_xprt(rdma);
> - kfree(rdma);
> -}
> -
> -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt)
> -{
> - if (xprt->sc_qp && !IS_ERR(xprt->sc_qp))
> - ib_destroy_qp(xprt->sc_qp);
> -
> - if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq))
> - ib_destroy_cq(xprt->sc_sq_cq);
> + /* We should only be called from kref_put */
> + BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0);
> + if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
> + ib_destroy_cq(rdma->sc_sq_cq);
>
> - if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq))
> - ib_destroy_cq(xprt->sc_rq_cq);
> + if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
> + ib_destroy_cq(rdma->sc_rq_cq);
>
> - if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr))
> - ib_dereg_mr(xprt->sc_phys_mr);
> + if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
> + ib_dereg_mr(rdma->sc_phys_mr);
>
> - if (xprt->sc_pd && !IS_ERR(xprt->sc_pd))
> - ib_dealloc_pd(xprt->sc_pd);
> + if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
> + ib_dealloc_pd(rdma->sc_pd);
>
> - destroy_context_cache(xprt->sc_ctxt_head);
> + destroy_context_cache(rdma->sc_ctxt_head);
> + kfree(rdma);
> }
>
> static int svc_rdma_has_wspace(struct svc_xprt *xprt)
>
next prev parent reply other threads:[~2008-03-10 20:31 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-06 20:01 [PATCH][REPOST] SVCRDMA: Add refs to RDMA transport close path Tom Tucker
[not found] ` <1204833673.21546.31.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
2008-03-10 20:31 ` J. Bruce Fields [this message]
2008-03-10 21:01 ` Tom Tucker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080310203129.GD13660@fieldses.org \
--to=bfields@fieldses.org \
--cc=linux-nfs@vger.kernel.org \
--cc=tom@opengridcomputing.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.