* Re: [PATCH 04/12] svcrdma: Add a service to register a Fast Reg MR with the device [not found] ` <1223063486-4136-5-git-send-email-tom@opengridcomputing.com> @ 2008-10-03 20:34 ` Tom Tucker 0 siblings, 0 replies; 2+ messages in thread From: Tom Tucker @ 2008-10-03 20:34 UTC (permalink / raw) To: bfields; +Cc: linux-nfs Bruce: This patch has a gratuitous dprintk. The version below doesn't have it. The version in my git repository doesn't either. Sorry for the noise... Tom Fast Reg MR introduces a new WR type. Add a service to register the region with the adapter and update the completion handling to support completions with a NULL WR context. Signed-off-by: Tom Tucker <tom@opengridcomputing.com> --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 118 +++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 35 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 3425268..1402d19 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -214,6 +214,7 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 5918285..60d98db 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -326,6 +326,45 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) } /* + * Processs a completion context + */ +static void process_context(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt) +{ + svc_rdma_unmap_dma(ctxt); + + switch (ctxt->wr_op) { + case IB_WR_SEND: + svc_rdma_put_context(ctxt, 1); + break; + + case IB_WR_RDMA_WRITE: + svc_rdma_put_context(ctxt, 0); + break; + + case IB_WR_RDMA_READ: + if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { + struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; + BUG_ON(!read_hdr); + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + svc_xprt_enqueue(&xprt->sc_xprt); + } + svc_rdma_put_context(ctxt, 0); + break; + + default: + printk(KERN_ERR "svcrdma: unexpected completion type, " + "opcode=%d\n", + ctxt->wr_op); + break; + } +} + +/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. @@ -337,17 +376,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; int ret; - if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - xprt = ctxt->xprt; - - svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -356,35 +390,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); - switch (ctxt->wr_op) { - case IB_WR_SEND: - svc_rdma_put_context(ctxt, 1); - break; - - case IB_WR_RDMA_WRITE: - svc_rdma_put_context(ctxt, 0); - break; - - case IB_WR_RDMA_READ: - if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { - struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - spin_lock_bh(&xprt->sc_rq_dto_lock); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_rq_dto_lock); - svc_xprt_enqueue(&xprt->sc_xprt); - } - svc_rdma_put_context(ctxt, 0); - break; + ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; + if (ctxt) + process_context(xprt, ctxt); - default: - printk(KERN_ERR "svcrdma: unexpected completion type, " - "opcode=%d, status=%d\n", - wc.opcode, wc.status); - break; - } svc_xprt_put(&xprt->sc_xprt); } @@ -1190,6 +1199,47 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +/* + * Attempt to register the kvec representing the RPC memory with the + * device. + * + * Returns: + * NULL : The device does not support fastreg or there were no more + * fastreg mr. + * frmr : The kvec register request was successfully posted. + * <0 : An error was encountered attempting to register the kvec. + */ +int svc_rdma_fastreg(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + struct ib_send_wr fastreg_wr; + u8 key; + int ret; + + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof fastreg_wr); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + ret = svc_rdma_send(xprt, &fastreg_wr); + dprintk("svcrdma:%s: reg lkey %08x kva %p mrlen %lu pages %d ret %d\n", + __func__, frmr->mr->lkey, frmr->kva, frmr->map_len, + frmr->page_list_len, + ret); + + return ret; +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr; @@ -1199,8 +1249,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); - BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != - wr->opcode); /* If the SQ is full, wait until an SQ entry is available */ while (1) { spin_lock_bh(&xprt->sc_lock); ^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 00/12] svcrdma: Fast memory registration support @ 2008-10-03 21:33 Tom Tucker 2008-10-03 21:33 ` [PATCH 01/12] svcrdma: Add Fast Reg MR Data Types Tom Tucker 0 siblings, 1 reply; 2+ messages in thread From: Tom Tucker @ 2008-10-03 21:33 UTC (permalink / raw) To: bfields; +Cc: thomas.talpey, linux-nfs, Tom Tucker Bruce: This updated patchset implements support for Fast Memory Registration in the NFS server. The second to last patch has been updated to provide better guidance on whether the connection is safe or not. Basically the code gives you a qualification string instead of expecting the administrator to know how to interpret the memory registration strategy and transport idiosyncrasies. The documentation has also been updated given your suggestions and the change to the informative log message. The string is certainly informative, but may be a little long. For this reason, there is a proc file entry to disable it. Fast Memory Regstration is the ability to quickly map a kernel memory page list as a logically contiguous memory region from the perspective of the adapter. This mapping is created and invalidated using work requests posted on the SQ. There are two benefits of this new memory registration strategy: - It allows for large amounts of data to be transferred between the client and server with a single work request - It improves security since the effective lifetime and scope of an RKEY is a single RPC and WR. The documentation file Documentation/filesystems/nfs-rdma.txt file has been updated with information about NFSRDMA security in general and how it is affected by the new Fast Memory Registration capability. This new capability is only enabled if the underlying device advertises that it is supported. It is not necessary that both the client and server use the same strategy, however, for Fast Memory Registration, it improves performance. These patches are also available here: git://git.linux-nfs.org/projects/tomtucker/xprt-switch-2.6.git Signed-off-by: Tom Tucker <tom@opengridcomputing.com> [PATCH 01/12] svcrdma: Add Fast Reg MR Data Types include/linux/sunrpc/svc_rdma.h | 22 +++++++++++++++++++++- 1 files changed, 21 insertions(+), 1 deletions(-) [PATCH 02/12] svcrdma: Add FRMR get/put services include/linux/sunrpc/svc_rdma.h | 3 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 122 ++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 5 deletions(-) [PATCH 03/12] svcrdma: Query device for Fast Reg support during connection setup net/sunrpc/xprtrdma/svc_rdma_transport.c | 76 +++++++++++++++++++++++++++-- 1 files changed, 70 insertions(+), 6 deletions(-) [PATCH 04/12] svcrdma: Add a service to register a Fast Reg MR with the device include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 118 +++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 35 deletions(-) [PATCH 05/12] svcrdma: Modify post recv path to use local dma key net/sunrpc/xprtrdma/svc_rdma_transport.c | 12 +++++++++--- 1 files changed, 9 insertions(+), 3 deletions(-) [PATCH 06/12] svcrdma: Add support to svc_rdma_send to handle chained WR net/sunrpc/xprtrdma/svc_rdma_transport.c | 29 +++++++++++++++++++++-------- 1 files changed, 21 insertions(+), 8 deletions(-) [PATCH 07/12] svcrdma: Modify the RPC recv path to use FRMR when available include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 187 ++++++++++++++++++++++++++---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 +- 3 files changed, 171 insertions(+), 22 deletions(-) [PATCH 08/12] svcrdma: Modify the RPC reply path to use FRMR when available net/sunrpc/xprtrdma/svc_rdma_sendto.c | 263 +++++++++++++++++++++++++----- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 + 2 files changed, 225 insertions(+), 40 deletions(-) [PATCH 09/12] svcrdma: Update svc_rdma_send_error to use DMA LKEY net/sunrpc/xprtrdma/svc_rdma_transport.c | 11 +++++++++-- 1 files changed, 9 insertions(+), 2 deletions(-) [PATCH 10/12] svcrdma: Fix IRD/ORD polarity net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) [PATCH 11/12] svcrdma: Add a message log string to indicate if FastReg is being used include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma.c | 8 ++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 63 ++++++++++++++++++----------- 3 files changed, 48 insertions(+), 24 deletions(-) [PATCH 12/12] svcrdma: Documentation update for the FastReg memory model Documentation/filesystems/nfs-rdma.txt | 126 ++++++++++++++++++++++++++++++++ 1 files changed, 126 insertions(+), 0 deletions(-) ^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH 01/12] svcrdma: Add Fast Reg MR Data Types 2008-10-03 21:33 [PATCH 00/12] svcrdma: Fast memory registration support Tom Tucker @ 2008-10-03 21:33 ` Tom Tucker 2008-10-03 21:33 ` [PATCH 02/12] svcrdma: Add FRMR get/put services Tom Tucker 0 siblings, 1 reply; 2+ messages in thread From: Tom Tucker @ 2008-10-03 21:33 UTC (permalink / raw) To: bfields; +Cc: thomas.talpey, linux-nfs, Tom Tucker Add data types to track Fast Reg Memory Regions. The core data type is svc_rdma_fastreg_mr that associates a device MR with a host kva and page list. A field is added to the WR context to keep track of the FRMR used to map the local memory for an RPC. An FRMR list and spin lock are added to the transport instance to keep track of all FRMR allocated for the transport. Also added are device capability flags to indicate what the memory registration capabilities are for the underlying device and whether or not fast memory registration is supported. Signed-off-by: Tom Tucker <tom@opengridcomputing.com> --- include/linux/sunrpc/svc_rdma.h | 22 +++++++++++++++++++++- 1 files changed, 21 insertions(+), 1 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index dc05b54..49e458d 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod; */ struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt *read_hdr; + struct svc_rdma_fastreg_mr *frmr; int hdr_count; struct xdr_buf arg; struct list_head dto_q; @@ -103,16 +104,30 @@ struct svc_rdma_chunk_sge { int start; /* sge no for this chunk */ int count; /* sge count for this chunk */ }; +struct svc_rdma_fastreg_mr { + struct ib_mr *mr; + void *kva; + struct ib_fast_reg_page_list *page_list; + int page_list_len; + unsigned long access_flags; + unsigned long map_len; + enum dma_data_direction direction; + struct list_head frmr_list; +}; struct svc_rdma_req_map { + struct svc_rdma_fastreg_mr *frmr; unsigned long count; union { struct kvec sge[RPCSVC_MAXPAGES]; struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; }; }; - +#define RDMACTXT_F_FAST_UNREG 1 #define RDMACTXT_F_LAST_CTXT 2 +#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ +#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ + struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ @@ -136,6 +151,11 @@ struct svcxprt_rdma { struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; struct ib_mr *sc_phys_mr; /* MR for server memory */ + u32 sc_dev_caps; /* distilled device caps */ + u32 sc_dma_lkey; /* local dma key */ + unsigned int sc_frmr_pg_list_len; + struct list_head sc_frmr_q; + spinlock_t sc_frmr_q_lock; spinlock_t sc_lock; /* transport lock */ ^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 02/12] svcrdma: Add FRMR get/put services 2008-10-03 21:33 ` [PATCH 01/12] svcrdma: Add Fast Reg MR Data Types Tom Tucker @ 2008-10-03 21:33 ` Tom Tucker 2008-10-03 21:33 ` [PATCH 03/12] svcrdma: Query device for Fast Reg support during connection setup Tom Tucker 0 siblings, 1 reply; 2+ messages in thread From: Tom Tucker @ 2008-10-03 21:33 UTC (permalink / raw) To: bfields; +Cc: thomas.talpey, linux-nfs, Tom Tucker Add services for the allocating, freeing, and unmapping Fast Reg MR. These services will be used by the transport connection setup, send and receive routines. Signed-off-by: Tom Tucker <tom@opengridcomputing.com> --- include/linux/sunrpc/svc_rdma.h | 3 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 122 ++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 49e458d..3425268 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -214,6 +214,9 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); +extern void svc_rdma_put_frmr(struct svcxprt_rdma *, + struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); extern struct svc_xprt_class svc_rdma_class; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 900cb69..dd170af 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -100,6 +100,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) ctxt->xprt = xprt; INIT_LIST_HEAD(&ctxt->dto_q); ctxt->count = 0; + ctxt->frmr = NULL; atomic_inc(&xprt->sc_ctxt_used); return ctxt; } @@ -109,11 +110,19 @@ static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) struct svcxprt_rdma *xprt = ctxt->xprt; int i; for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_single(xprt->sc_cm_id->device, - ctxt->sge[i].addr, - ctxt->sge[i].length, - ctxt->direction); + /* + * Unmap the DMA addr in the SGE if the lkey matches + * the sc_dma_lkey, otherwise, ignore it since it is + * an FRMR lkey and will be unmapped later when the + * last WR that uses it completes. + */ + if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(xprt->sc_cm_id->device, + ctxt->sge[i].addr, + ctxt->sge[i].length, + ctxt->direction); + } } } @@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } map->count = 0; + map->frmr = NULL; return map; } @@ -425,10 +435,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); + INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); + spin_lock_init(&cma_xprt->sc_frmr_q_lock); cma_xprt->sc_ord = svcrdma_ord; @@ -686,6 +698,103 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, return ERR_PTR(ret); } +static int rdma_alloc_frmr(struct svcxprt_rdma *xprt) +{ + struct ib_mr *mr; + struct ib_fast_reg_page_list *pl; + struct svc_rdma_fastreg_mr *frmr; + + mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); + if (!mr) + goto errout; + pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, + RPCSVC_MAXPAGES); + if (!pl) { + ib_dereg_mr(mr); + goto errout; + } + frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); + if (!frmr) { + ib_dereg_mr(mr); + ib_free_fast_reg_page_list(pl); + goto errout; + } + frmr->mr = mr; + frmr->page_list = pl; + INIT_LIST_HEAD(&frmr->frmr_list); + spin_lock_bh(&xprt->sc_frmr_q_lock); + list_add(&frmr->frmr_list, &xprt->sc_frmr_q); + spin_unlock_bh(&xprt->sc_frmr_q_lock); + + return 0; + + errout: + return -ENOMEM; +} + +static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_fastreg_mr *frmr; + + while (!list_empty(&xprt->sc_frmr_q)) { + frmr = list_entry(xprt->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + ib_dereg_mr(frmr->mr); + ib_free_fast_reg_page_list(frmr->page_list); + kfree(frmr); + } +} + +struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) +{ + struct svc_rdma_fastreg_mr *frmr = NULL; + + while (1) { + spin_lock_bh(&rdma->sc_frmr_q_lock); + if (!list_empty(&rdma->sc_frmr_q)) { + frmr = list_entry(rdma->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + } + spin_unlock_bh(&rdma->sc_frmr_q_lock); + if (frmr) + break; + if (rdma_alloc_frmr(rdma)) + return ERR_PTR(-ENOMEM); + } + frmr->map_len = 0; + frmr->page_list_len = 0; + + return frmr; +} + +static void frmr_unmap_dma(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + int page_no; + for (page_no = 0; page_no < frmr->page_list_len; page_no++) { + dma_addr_t addr = frmr->page_list->page_list[page_no]; + if (ib_dma_mapping_error(frmr->mr->device, addr)) + continue; + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, + frmr->direction); + } +} + +void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, + struct svc_rdma_fastreg_mr *frmr) +{ + if (frmr) { + frmr_unmap_dma(rdma, frmr); + spin_lock_bh(&rdma->sc_frmr_q_lock); + BUG_ON(!list_empty(&frmr->frmr_list)); + list_add(&frmr->frmr_list, &rdma->sc_frmr_q); + spin_unlock_bh(&rdma->sc_frmr_q_lock); + } +} + /* * This is the xpo_recvfrom function for listening endpoints. Its * purpose is to accept incoming connections. The CMA callback handler @@ -961,6 +1070,9 @@ static void __svc_rdma_free(struct work_struct *work) WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + /* De-allocate fastreg mr */ + rdma_dealloc_frmr_q(rdma); + /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_destroy_qp(rdma->sc_qp); ^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 03/12] svcrdma: Query device for Fast Reg support during connection setup 2008-10-03 21:33 ` [PATCH 02/12] svcrdma: Add FRMR get/put services Tom Tucker @ 2008-10-03 21:33 ` Tom Tucker 2008-10-03 21:33 ` [PATCH 04/12] svcrdma: Add a service to register a Fast Reg MR with the device Tom Tucker 0 siblings, 1 reply; 2+ messages in thread From: Tom Tucker @ 2008-10-03 21:33 UTC (permalink / raw) To: bfields; +Cc: thomas.talpey, linux-nfs, Tom Tucker Query the device capabilities in the svc_rdma_accept function to determine what advanced memory management capabilities are supported by the device. Based on the query, select the most secure model available given the requirements of the transport and capabilities of the adapter. Signed-off-by: Tom Tucker <tom@opengridcomputing.com> --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 76 +++++++++++++++++++++++++++-- 1 files changed, 70 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index dd170af..5918285 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -813,6 +813,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; + int dma_mr_acc; + int need_dma_mr; int ret; int i; @@ -928,15 +930,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) } newxprt->sc_qp = newxprt->sc_cm_id->qp; - /* Register all of physical memory */ - newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(newxprt->sc_phys_mr)) { - dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); + /* + * Use the most secure set of MR resources based on the + * transport type and available memory management features in + * the device. Here's the table implemented below: + * + * Fast Global DMA Remote WR + * Reg LKEY MR Access + * Sup'd Sup'd Needed Needed + * + * IWARP N N Y Y + * N Y Y Y + * Y N Y N + * Y Y N - + * + * IB N N Y N + * N Y N - + * Y N Y N + * Y Y N - + * + * NB: iWARP requires remote write access for the data sink + * of an RDMA_READ. IB does not. + */ + if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + newxprt->sc_frmr_pg_list_len = + devattr.max_fast_reg_page_list_len; + newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; + } + + /* + * Determine if a DMA MR is required and if so, what privs are required + */ + switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { + case RDMA_TRANSPORT_IWARP: + newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; + if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { + need_dma_mr = 1; + dma_mr_acc = + (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else + need_dma_mr = 0; + break; + case RDMA_TRANSPORT_IB: + if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else + need_dma_mr = 0; + break; + default: goto errout; } + /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ + if (need_dma_mr) { + /* Register all of physical memory */ + newxprt->sc_phys_mr = + ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); + if (IS_ERR(newxprt->sc_phys_mr)) { + dprintk("svcrdma: Failed to create DMA MR ret=%d\n", + ret); + goto errout; + } + newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; + } else + newxprt->sc_dma_lkey = + newxprt->sc_cm_id->device->local_dma_lkey; + /* Post receive buffers */ for (i = 0; i < newxprt->sc_max_requests; i++) { ret = svc_rdma_post_recv(newxprt); ^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 04/12] svcrdma: Add a service to register a Fast Reg MR with the device 2008-10-03 21:33 ` [PATCH 03/12] svcrdma: Query device for Fast Reg support during connection setup Tom Tucker @ 2008-10-03 21:33 ` Tom Tucker 0 siblings, 0 replies; 2+ messages in thread From: Tom Tucker @ 2008-10-03 21:33 UTC (permalink / raw) To: bfields; +Cc: thomas.talpey, linux-nfs, Tom Tucker Fast Reg MR introduces a new WR type. Add a service to register the region with the adapter and update the completion handling to support completions with a NULL WR context. Signed-off-by: Tom Tucker <tom@opengridcomputing.com> --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 111 ++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 35 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 3425268..1402d19 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -214,6 +214,7 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 5918285..4b70282 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -326,6 +326,45 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) } /* + * Processs a completion context + */ +static void process_context(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt) +{ + svc_rdma_unmap_dma(ctxt); + + switch (ctxt->wr_op) { + case IB_WR_SEND: + svc_rdma_put_context(ctxt, 1); + break; + + case IB_WR_RDMA_WRITE: + svc_rdma_put_context(ctxt, 0); + break; + + case IB_WR_RDMA_READ: + if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { + struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; + BUG_ON(!read_hdr); + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + svc_xprt_enqueue(&xprt->sc_xprt); + } + svc_rdma_put_context(ctxt, 0); + break; + + default: + printk(KERN_ERR "svcrdma: unexpected completion type, " + "opcode=%d\n", + ctxt->wr_op); + break; + } +} + +/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. @@ -337,17 +376,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; int ret; - if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - xprt = ctxt->xprt; - - svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -356,35 +390,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); - switch (ctxt->wr_op) { - case IB_WR_SEND: - svc_rdma_put_context(ctxt, 1); - break; - - case IB_WR_RDMA_WRITE: - svc_rdma_put_context(ctxt, 0); - break; - - case IB_WR_RDMA_READ: - if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { - struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - spin_lock_bh(&xprt->sc_rq_dto_lock); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_rq_dto_lock); - svc_xprt_enqueue(&xprt->sc_xprt); - } - svc_rdma_put_context(ctxt, 0); - break; + ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; + if (ctxt) + process_context(xprt, ctxt); - default: - printk(KERN_ERR "svcrdma: unexpected completion type, " - "opcode=%d, status=%d\n", - wc.opcode, wc.status); - break; - } svc_xprt_put(&xprt->sc_xprt); } @@ -1190,6 +1199,40 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +/* + * Attempt to register the kvec representing the RPC memory with the + * device. + * + * Returns: + * NULL : The device does not support fastreg or there were no more + * fastreg mr. + * frmr : The kvec register request was successfully posted. + * <0 : An error was encountered attempting to register the kvec. + */ +int svc_rdma_fastreg(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + struct ib_send_wr fastreg_wr; + u8 key; + + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof fastreg_wr); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + return svc_rdma_send(xprt, &fastreg_wr); +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr; @@ -1199,8 +1242,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); - BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != - wr->opcode); /* If the SQ is full, wait until an SQ entry is available */ while (1) { spin_lock_bh(&xprt->sc_lock); ^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2008-10-03 21:33 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1223063486-4136-1-git-send-email-tom@opengridcomputing.com>
[not found] ` <1223063486-4136-2-git-send-email-tom@opengridcomputing.com>
[not found] ` <1223063486-4136-3-git-send-email-tom@opengridcomputing.com>
[not found] ` <1223063486-4136-4-git-send-email-tom@opengridcomputing.com>
[not found] ` <1223063486-4136-5-git-send-email-tom@opengridcomputing.com>
2008-10-03 20:34 ` [PATCH 04/12] svcrdma: Add a service to register a Fast Reg MR with the device Tom Tucker
2008-10-03 21:33 [PATCH 00/12] svcrdma: Fast memory registration support Tom Tucker
2008-10-03 21:33 ` [PATCH 01/12] svcrdma: Add Fast Reg MR Data Types Tom Tucker
2008-10-03 21:33 ` [PATCH 02/12] svcrdma: Add FRMR get/put services Tom Tucker
2008-10-03 21:33 ` [PATCH 03/12] svcrdma: Query device for Fast Reg support during connection setup Tom Tucker
2008-10-03 21:33 ` [PATCH 04/12] svcrdma: Add a service to register a Fast Reg MR with the device Tom Tucker
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.