Re: [PATCH 02/10] svcrdma: Add FRMR get/put services

Linux NFS development
 help / color / mirror / Atom feed

From: Tom Tucker <tom@opengridcomputing.com>
To: "J. Bruce Fields" <bfields@fieldses.org>
Cc: linux-nfs@vger.kernel.org
Subject: Re: [PATCH 02/10] svcrdma: Add FRMR get/put services
Date: Thu, 25 Sep 2008 09:25:30 -0500	[thread overview]
Message-ID: <48DB9F5A.4090401@opengridcomputing.com> (raw)
In-Reply-To: <20080924194538.GM5772@fieldses.org>

J. Bruce Fields wrote:
> On Tue, Sep 16, 2008 at 06:34:31AM -0500, Tom Tucker wrote:
>> Add services for the allocating, freeing, and unmapping Fast Reg MR. These
>> services will be used by the transport connection setup, send and receive
>> routines.
>>
>> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
>>
>> ---
>>  include/linux/sunrpc/svc_rdma.h          |    3 +
>>  net/sunrpc/xprtrdma/svc_rdma_transport.c |  125 ++++++++++++++++++++++++++++-
>>  2 files changed, 123 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
>> index 295ebbc..100754e 100644
>> --- a/include/linux/sunrpc/svc_rdma.h
>> +++ b/include/linux/sunrpc/svc_rdma.h
>> @@ -219,6 +219,9 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
>>  extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
>>  extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
>>  extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
>> +extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
>> +extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
>> +			      struct svc_rdma_fastreg_mr *);
>>  extern void svc_sq_reap(struct svcxprt_rdma *);
>>  extern void svc_rq_reap(struct svcxprt_rdma *);
>>  extern struct svc_xprt_class svc_rdma_class;
>> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
>> index 900cb69..f200345 100644
>> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
>> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
>> @@ -100,6 +100,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
>>  	ctxt->xprt = xprt;
>>  	INIT_LIST_HEAD(&ctxt->dto_q);
>>  	ctxt->count = 0;
>> +	ctxt->frmr = NULL;
>>  	atomic_inc(&xprt->sc_ctxt_used);
>>  	return ctxt;
>>  }
>> @@ -109,11 +110,19 @@ static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
>>  	struct svcxprt_rdma *xprt = ctxt->xprt;
>>  	int i;
>>  	for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
>> -		atomic_dec(&xprt->sc_dma_used);
>> -		ib_dma_unmap_single(xprt->sc_cm_id->device,
>> -				    ctxt->sge[i].addr,
>> -				    ctxt->sge[i].length,
>> -				    ctxt->direction);
>> +		/*
>> +		 * Unmap the DMA addr in the SGE if the lkey matches
>> +		 * the sc_dma_lkey, otherwise, ignore it since it is
>> +		 * an FRMR lkey and will be unmapped later when the
>> +		 * last WR that uses it completes.
> 
> I kinda wish at the start of this that we'd made a rule that any acronym
> that won't doesn't have a definition in the first page of its google
> results should have at least its expansion added to a glossary someplace
> in Documentation/ or the c source.  Maybe it's too late now.
> 

I suppose I could add a glossary to the doc,

>> +		 */
>> +		if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
> 
> I'd probably have done
> 
> 		if (ctx->sge[i].lkey != xprt->sc_dma_lkey)
> 			continue;

I'm game, it reduces the indent level.

> 
> and left the rest alone; but OK.
> 
> (By the way, is it ever possible this test will return different results
> on different passes through the loop, or could we just as well break out
> the first time we see this?:
> 
> 		if (ctx->sge[i].lkey != xprt->sc_dma_lkey)
> 			break;
> 

I believe that you could have a head that's an lkey and a tail that's an 
lkey, but the page_list is fetched with RDMA and was fastreg'd. Mapping 
that to an SGE would give you a mix.


> )
> 
>> +			atomic_dec(&xprt->sc_dma_used);
>> +			ib_dma_unmap_single(xprt->sc_cm_id->device,
>> +					    ctxt->sge[i].addr,
>> +					    ctxt->sge[i].length,
>> +					    ctxt->direction);
>> +		}
>>  	}
>>  }
>>  
>> @@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
>>  		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
>>  	}
>>  	map->count = 0;
>> +	map->frmr = NULL;
>>  	return map;
>>  }
>>  
>> @@ -425,10 +435,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
>>  	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
>>  	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
>>  	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
>> +	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
>>  	init_waitqueue_head(&cma_xprt->sc_send_wait);
>>  
>>  	spin_lock_init(&cma_xprt->sc_lock);
>>  	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
>> +	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
>>  
>>  	cma_xprt->sc_ord = svcrdma_ord;
>>  
>> @@ -686,6 +698,106 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
>>  	return ERR_PTR(ret);
>>  }
>>  
>> +static int rdma_alloc_frmr(struct svcxprt_rdma *xprt)
>> +{
>> +	struct ib_mr *mr;
>> +	struct ib_fast_reg_page_list *pl;
>> +	struct svc_rdma_fastreg_mr *frmr;
>> +
>> +	mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
>> +	if (!mr)
>> +		goto errout;
>> +	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
>> +					 RPCSVC_MAXPAGES);
>> +	if (!pl) {
>> +		ib_dereg_mr(mr);
>> +		goto errout;
>> +	}
>> +	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
>> +	if (!frmr) {
>> +		ib_dereg_mr(mr);
>> +		ib_free_fast_reg_page_list(pl);
>> +		goto errout;
>> +	}
>> +	frmr->mr = mr;
>> +	frmr->page_list = pl;
>> +	INIT_LIST_HEAD(&frmr->frmr_list);
> 
> This INIT_LIST_HEAD() is redundant given the following list_add().
> 
>> +	spin_lock_bh(&xprt->sc_frmr_q_lock);
>> +	list_add(&frmr->frmr_list, &xprt->sc_frmr_q);
>> +	spin_unlock_bh(&xprt->sc_frmr_q_lock);
>> +
>> +	return 0;
>> +
>> + errout:
>> +	return -ENOMEM;
> 
> Just a style nit: I like the exit-in-one-place thing, but if it's just a
> bare "return" I don't see much advantage over replacing each goto by a
> bare return.  The standard kernel style here would be:
> 
> 	...
> 	if (!mr)
> 		goto errout;
> 	...
> 	if (!pl)
> 		goto err_free_mr;
> 	...
> 	if (!frmr)
> 		goto err_free_frmr;
> 	...
> 	return 0;
> err_free_frmr:
> 	ib_free_fast_reg_page_list(pl);
> err_free_mr:
> 	ib_dereg_mr(mr);
> errout:
> 	return -ENOMEM;
> }
> 

agreed.

>> +}
>> +
>> +static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
>> +{
>> +	struct svc_rdma_fastreg_mr *frmr;
>> +
>> +	while (!list_empty(&xprt->sc_frmr_q)) {
>> +		frmr = list_entry(xprt->sc_frmr_q.next,
>> +				  struct svc_rdma_fastreg_mr, frmr_list);
>> +		list_del_init(&frmr->frmr_list);
>> +		ib_dereg_mr(frmr->mr);
>> +		ib_free_fast_reg_page_list(frmr->page_list);
>> +		kfree(frmr);
>> +	}
>> +}
>> +
>> +struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
>> +{
>> +	struct svc_rdma_fastreg_mr *frmr = NULL;
>> +
>> +	while (1) {
>> +		spin_lock_bh(&rdma->sc_frmr_q_lock);
>> +		if (!list_empty(&rdma->sc_frmr_q)) {
>> +			frmr = list_entry(rdma->sc_frmr_q.next,
>> +					  struct svc_rdma_fastreg_mr, frmr_list);
>> +			list_del_init(&frmr->frmr_list);
>> +		}
>> +		spin_unlock_bh(&rdma->sc_frmr_q_lock);
>> +		if (frmr)
>> +			break;
>> +		if (rdma_alloc_frmr(rdma))
>> +			return ERR_PTR(-ENOMEM);
> 
> This business with allocating stuff and adding it to the list, then
> looking for it next time around, seems a little more clever than
> necessary.  

Are you sure "clever" is the word you're looking for here :-)

> ... Why not ditch the loop, take the list_add() out of
> rdma_alloc_frmr() and have it return the newly allocated frmr or NULL
> instead, and then do something like this?:
> 
> 	spin_lock_bh(&rdma->sc_frmr_q_lock);
> 	if (!list_empty(&rdma->sc_frmr_q)) {
> 		frmr = list_entry(rdma->sc_frmr_q.next, struct
> 				  svc_rdma_fastreg_mr, frmr_list);
> 		list_del_init(&frmr->frmr_list);
> 	}
> 	spin_unlock_bh(&rdma->sc_frmr_q_lock);
> 	if (!frmr) {
> 		frmr = rdma_alloc_frmr(rdma);
> 		if (!frmr)
> 			return ERR_PTR(-ENOMEM);
> 	}
> 	frmr->map_len = 0;
> 	...
> 
> 
> There doesn't seem to be much point to adding to sc_frmr_q just to take
> it right back off again immediately.  Maybe I'm missing something.
> 

Umm. No. This code used to preallocate some entries and then grow. I 
removed that figuring that the frmr cache would very quickly reach it's 
natural size auto-magically. This "some other thread scammed my frmr" 
loop is a left-over.

Nice clean-up.


>> +	}
>> +	frmr->map_len = 0;
>> +	frmr->page_list_len = 0;
>> +
>> +	return frmr;
>> +}
>> +
>> +static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
>> +			   struct svc_rdma_fastreg_mr *frmr)
>> +{
>> +	int page_no;
>> +	dprintk("svcrdma:%s: xprt %p page_list_len %d\n",
>> +		__func__, xprt, frmr->page_list_len);
>> +	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
>> +		dma_addr_t addr = frmr->page_list->page_list[page_no];
>> +		dprintk("svcrdma:      %08x %llx\n", frmr->mr->lkey, addr);
> 
> Are these dprintk's going to be useful for debugging user issues
> remotely, or were they just for your personal use while writing the
> code?
> 
> We saw recently that we may already have too many dprintk's for them to
> be useful in production, and the above seem likely to be rather
> frequent.

Agreed. It needs to go.

> 
> --b.
> 
>> +		if (ib_dma_mapping_error(frmr->mr->device, addr))
>> +			continue;
>> +		atomic_dec(&xprt->sc_dma_used);
>> +		ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
>> +				    frmr->direction);
>> +	}
>> +}
>> +
>> +void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
>> +		       struct svc_rdma_fastreg_mr *frmr)
>> +{
>> +	if (frmr) {
>> +		frmr_unmap_dma(rdma, frmr);
>> +		spin_lock_bh(&rdma->sc_frmr_q_lock);
>> +		BUG_ON(!list_empty(&frmr->frmr_list));
>> +		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
>> +		spin_unlock_bh(&rdma->sc_frmr_q_lock);
>> +	}
>> +}
>> +
>>  /*
>>   * This is the xpo_recvfrom function for listening endpoints. Its
>>   * purpose is to accept incoming connections. The CMA callback handler
>> @@ -961,6 +1073,9 @@ static void __svc_rdma_free(struct work_struct *work)
>>  	WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
>>  	WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
>>  
>> +	/* De-allocate fastreg mr */
>> +	rdma_dealloc_frmr_q(rdma);
>> +
>>  	/* Destroy the QP if present (not a listener) */
>>  	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
>>  		ib_destroy_qp(rdma->sc_qp);

next prev parent reply	other threads:[~2008-09-25 14:25 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1221564879-85046-1-git-send-email-tom@opengridcomputing.com>
     [not found] ` <1221564879-85046-2-git-send-email-tom@opengridcomputing.com>
2008-09-24 19:11   ` [PATCH 01/10] svcrdma: Add Fast Reg MR Data Types J. Bruce Fields
2008-09-25 14:27     ` Tom Tucker
     [not found]   ` <1221564879-85046-3-git-send-email-tom@opengridcomputing.com>
2008-09-24 19:45     ` [PATCH 02/10] svcrdma: Add FRMR get/put services J. Bruce Fields
2008-09-25 14:25       ` Tom Tucker [this message]
2008-09-25 14:44         ` J. Bruce Fields
2008-09-25 20:31           ` Tom Tucker
     [not found]     ` <1221564879-85046-4-git-send-email-tom@opengridcomputing.com>
2008-09-24 20:10       ` [PATCH 03/10] svcrdma: Query device for Fast Reg support during connection setup J. Bruce Fields
2008-09-25 14:08         ` Tom Tucker
     [not found]       ` <1221564879-85046-5-git-send-email-tom@opengridcomputing.com>
2008-09-24 20:25         ` [PATCH 04/10] svcrdma: Add a service to register a Fast Reg MR with the device J. Bruce Fields
2008-09-25 13:31           ` Tom Tucker
     [not found]         ` <1221564879-85046-6-git-send-email-tom@opengridcomputing.com>
2008-09-24 20:31           ` [PATCH 05/10] svcrdma: Modify post recv path to use local dma key J. Bruce Fields
2008-09-25 13:36             ` Tom Tucker
     [not found]           ` <1221564879-85046-7-git-send-email-tom@opengridcomputing.com>
     [not found]             ` <1221564879-85046-8-git-send-email-tom@opengridcomputing.com>
     [not found]               ` <1221564879-85046-9-git-send-email-tom@opengridcomputing.com>
     [not found]                 ` <1221564879-85046-10-git-send-email-tom@opengridcomputing.com>
     [not found]                   ` <1221564879-85046-11-git-send-email-tom@opengridcomputing.com>
2008-09-24 21:21                     ` [PATCH 10/10] svcrdma: Documentation update for the FastReg memory model J. Bruce Fields
2008-09-25 13:35                       ` Tom Tucker
2008-09-26 16:01                         ` Talpey, Thomas
     [not found]                           ` <RTPCLUEXC2-PRDGryWt0000003c-rtwIt2gI0FxT+ZUat5FNkAK/GNPrWCqfQQ4Iyu8u01E@public.gmane.org>
2008-09-30  2:59                             ` Tom Tucker
2008-09-26 23:40                         ` J. Bruce Fields
2008-09-30  3:07                           ` Tom Tucker
2008-09-30 18:44                             ` J. Bruce Fields
2008-09-30 18:55                               ` Tom Tucker
2008-09-30 18:57                                 ` J. Bruce Fields
2008-09-30 20:17                                   ` Tom Tucker
2008-10-01 16:17                                     ` J. Bruce Fields
2008-10-02  0:38                                       ` Tom Tucker
2008-09-30 19:04                               ` Talpey, Thomas
     [not found]                                 ` <RTPCLUEXC2-PRDgFrYI00000094-rtwIt2gI0FxT+ZUat5FNkAK/GNPrWCqfQQ4Iyu8u01E@public.gmane.org>
2008-10-01 18:26                                   ` J. Bruce Fields
2008-10-01 19:18                                     ` Talpey, Thomas
     [not found]                                     ` <RTPCLUEXC2-PRDVjCRG000000bb-rtwIt2gI0FxT+ZUat5FNkAK/GNPrWCqfQQ4Iyu8u01E@public.gmane.org>
2008-10-01 19:23                                       ` Talpey, Thomas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48DB9F5A.4090401@opengridcomputing.com \
    --to=tom@opengridcomputing.com \
    --cc=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox