From mboxrd@z Thu Jan 1 00:00:00 1970 From: Sagi Grimberg Subject: Re: [PATCH libibverbs] Add MR re-registeration Date: Sun, 02 Nov 2014 14:35:49 +0200 Message-ID: <54562525.6060803@dev.mellanox.co.il> References: <1414920605-18943-1-git-send-email-ogerlitz@mellanox.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1414920605-18943-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> Sender: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org To: Or Gerlitz , Roland Dreier , Doug Ledford Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Yishai Hadas , Matan Barak List-Id: linux-rdma@vger.kernel.org On 11/2/2014 11:30 AM, Or Gerlitz wrote: > From: Matan Barak > > Memory re-registeration is a feature that enables one to change > the attributes of a memory region, including PD, translation > (address and length) and access flags. > > Signed-off-by: Matan Barak > Signed-off-by: Or Gerlitz > --- > include/infiniband/driver.h | 5 +++ > include/infiniband/kern-abi.h | 20 +++++++++++ > include/infiniband/verbs.h | 11 +++++- > man/ibv_rereg_mr.3 | 72 +++++++++++++++++++++++++++++++++++++++++ > src/cmd.c | 29 ++++++++++++++++ > src/libibverbs.map | 1 + > src/verbs.c | 55 +++++++++++++++++++++++++++++++ > 7 files changed, 191 insertions(+), 2 deletions(-) > create mode 100644 man/ibv_rereg_mr.3 > > diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h > index 5cc092b..4aa2a03 100644 > --- a/include/infiniband/driver.h > +++ b/include/infiniband/driver.h > @@ -128,6 +128,11 @@ int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length, > struct ibv_mr *mr, struct ibv_reg_mr *cmd, > size_t cmd_size, > struct ibv_reg_mr_resp *resp, size_t resp_size); > +int ibv_cmd_rereg_mr(struct ibv_mr *mr, uint32_t flags, void *addr, > + size_t length, uint64_t hca_va, int access, > + struct ibv_pd *pd, struct ibv_rereg_mr *cmd, > + size_t cmd_sz, struct ibv_rereg_mr_resp *resp, > + size_t resp_sz); > int ibv_cmd_dereg_mr(struct ibv_mr *mr); > int ibv_cmd_create_cq(struct ibv_context *context, int cqe, > struct ibv_comp_channel *channel, > diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h > index 20812c3..6001a7e 100644 > --- a/include/infiniband/kern-abi.h > +++ b/include/infiniband/kern-abi.h > @@ -333,6 +333,26 @@ struct ibv_reg_mr_resp { > __u32 rkey; > }; > > +struct ibv_rereg_mr { > + __u32 command; > + __u16 in_words; > + __u16 out_words; > + __u64 response; > + __u32 mr_handle; > + __u32 flags; > + __u64 start; > + __u64 length; > + __u64 hca_va; > + __u32 pd_handle; > + __u32 access_flags; > + __u64 driver_data[0]; > +}; > + > +struct ibv_rereg_mr_resp { > + __u32 lkey; > + __u32 rkey; > +}; > + > struct ibv_dereg_mr { > __u32 command; > __u16 in_words; > diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h > index 045a42b..4b30880 100644 > --- a/include/infiniband/verbs.h > +++ b/include/infiniband/verbs.h > @@ -342,7 +342,8 @@ enum ibv_rereg_mr_flags { > IBV_REREG_MR_CHANGE_TRANSLATION = (1 << 0), > IBV_REREG_MR_CHANGE_PD = (1 << 1), > IBV_REREG_MR_CHANGE_ACCESS = (1 << 2), > - IBV_REREG_MR_KEEP_VALID = (1 << 3) > + IBV_REREG_MR_KEEP_VALID = (1 << 3), > + IBV_REREG_MR_FLAGS_SUPPORTED = ((IBV_REREG_MR_KEEP_VALID << 1) - 1) > }; > > struct ibv_mr { > @@ -886,7 +887,7 @@ struct ibv_context_ops { > int (*dealloc_pd)(struct ibv_pd *pd); > struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, > int access); > - struct ibv_mr * (*rereg_mr)(struct ibv_mr *mr, > + int (*rereg_mr)(struct ibv_mr *mr, > int flags, > struct ibv_pd *pd, void *addr, > size_t length, > @@ -1162,6 +1163,12 @@ struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, > size_t length, int access); > > /** > + * ibv_rereg_mr - Re-Register a memory region > + */ > +int ibv_rereg_mr(struct ibv_mr *mr, int flags, > + struct ibv_pd *pd, void *addr, > + size_t length, uint64_t access); > +/** > * ibv_dereg_mr - Deregister a memory region > */ > int ibv_dereg_mr(struct ibv_mr *mr); > diff --git a/man/ibv_rereg_mr.3 b/man/ibv_rereg_mr.3 > new file mode 100644 > index 0000000..b13f610 > --- /dev/null > +++ b/man/ibv_rereg_mr.3 > @@ -0,0 +1,72 @@ > +.\" -*- nroff -*- > +.\" > +.TH IBV_REREG_MR 3 2014-08-27 libibverbs "Libibverbs Programmer's Manual" > +.SH "NAME" > +ibv_rereg_mr \- re-register a memory region (MR) > +.SH "SYNOPSIS" > +.nf > +.B #include > +.sp > +.BI "int ibv_rereg_mr(struct ibv_mr " "*mr" ", int " " flags" , > +.BI " struct ibv_pd * " "pd" ", void " " *addr", > +.BI " size_t " " length" ", uint64_t " " access", > +.BI " struct ibv_rereg_mr_attr *" " attr"); > +.fi > +.fi > +.SH "DESCRIPTION" > +.B ibv_rereg_mr() > +Modifies the attributes of an existing memory region (MR) > +.I mr\fR. > +Conceptually, this call performs the functions deregister memory region > +followed by register memory region. Where possible, > +resources are reused instead of deallocated and reallocated. > +.PP > +.I flags\fR > +is a bit-mask used to indicate which of the following properties of the memory region are being modified. Flags should be a combination (bit field) of: > +.PP > +.TP > +.B IBV_REREG_MR_CHANGE_TRANSLATION \fR Change translation (location and length) > +.TP > +.B IBV_REREG_MR_CHANGE_PD \fR Change protection domain > +.TP > +.B IBV_REREG_MR_CHANGE_ACCESS \fR Change access flags > +.PP > +When > +.B IBV_REREG_MR_CHANGE_PD > +is used, > +.I pd\fR > +represents the new PD this MR should be registered to. > +.br > +When > +.B IBV_REREG_MR_CHANGE_TRANSLATION > +is used, > +.I addr\fR. > +represents the virtual address (user-space pointer) of the new MR, while > +.I length\fR > +represents its length. > +.PP > +The access and other flags are represented in the field > +.I access\fR. > +This field describes the desired memory protection attributes; it is either 0 or the bitwise OR of one or more of the following flags: > +.PP > +.TP > +.B IBV_ACCESS_LOCAL_WRITE \fR Enable Local Write Access > +.TP > +.B IBV_ACCESS_REMOTE_WRITE \fR Enable Remote Write Access > +.TP > +.B IBV_ACCESS_REMOTE_READ\fR Enable Remote Read Access > +.TP > +.I attr\fR > +is available for future extensions. > +.SH "RETURN VALUE" > +.B ibv_rereg_mr() > +returns 0 on success, or the value of errno on failure (which indicates the failure reason). > +.SH "NOTES" > +If the memory re-registration call failed, the MR can't be used. > +Even on a failure, the user still needs to call ibv_dereg_mr on this MR. > +.SH "SEE ALSO" > +.BR ibv_reg_mr (3), > +.BR ibv_dereg_mr (3), > +.SH "AUTHORS" > +.TP > +Matan Barak > diff --git a/src/cmd.c b/src/cmd.c > index 04477f7..e037aa5 100644 > --- a/src/cmd.c > +++ b/src/cmd.c > @@ -267,6 +267,35 @@ int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length, > return 0; > } > > +int ibv_cmd_rereg_mr(struct ibv_mr *mr, uint32_t flags, void *addr, > + size_t length, uint64_t hca_va, int access, > + struct ibv_pd *pd, struct ibv_rereg_mr *cmd, > + size_t cmd_sz, struct ibv_rereg_mr_resp *resp, > + size_t resp_sz) > +{ > + IBV_INIT_CMD_RESP(cmd, cmd_sz, REREG_MR, resp, resp_sz); > + > + cmd->mr_handle = mr->handle; > + cmd->flags = flags; > + cmd->start = (uintptr_t) addr; > + cmd->length = length; > + cmd->hca_va = hca_va; > + cmd->pd_handle = (flags & IBV_REREG_MR_CHANGE_PD) ? pd->handle : 0; > + cmd->access_flags = access; > + > + if (write(mr->context->cmd_fd, cmd, cmd_sz) != cmd_sz) > + return errno; > + > + (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_sz); > + > + mr->lkey = resp->lkey; > + mr->rkey = resp->rkey; > + if (flags & IBV_REREG_MR_CHANGE_PD) > + mr->context = pd->context; > + > + return 0; > +} > + > int ibv_cmd_dereg_mr(struct ibv_mr *mr) > { > struct ibv_dereg_mr cmd; > diff --git a/src/libibverbs.map b/src/libibverbs.map > index 30212f3..307f412 100644 > --- a/src/libibverbs.map > +++ b/src/libibverbs.map > @@ -108,5 +108,6 @@ IBVERBS_1.1 { > ibv_cmd_create_srq_ex; > ibv_cmd_create_qp_ex; > ibv_cmd_open_qp; > + ibv_cmd_rereg_mr; > > } IBVERBS_1.0; > diff --git a/src/verbs.c b/src/verbs.c > index a6aae70..2e5abde 100644 > --- a/src/verbs.c > +++ b/src/verbs.c > @@ -223,6 +223,61 @@ struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, > } > default_symver(__ibv_reg_mr, ibv_reg_mr); > > +int __ibv_rereg_mr(struct ibv_mr *mr, int flags, > + struct ibv_pd *pd, void *addr, > + size_t length, uint64_t access) > +{ > + int dofork_onfail = 0; > + int err; > + void *old_addr; > + size_t old_len; > + > + if (flags & ~IBV_REREG_MR_FLAGS_SUPPORTED) > + return errno = EINVAL; > + > + if ((flags & IBV_REREG_MR_CHANGE_TRANSLATION) && > + (0 >= length)) > + return errno = EINVAL; > + > + if (!(flags & IBV_REREG_MR_CHANGE_ACCESS)) > + access = 0; > + > + if ((flags & IBV_REREG_MR_CHANGE_TRANSLATION) && > + (addr == NULL)) > + return errno = EINVAL; > + > + if (NULL == mr->context->ops.rereg_mr) > + return errno = ENOSYS; > + > + /* If address will be allocated internally we should > + call don't fork later after having addr. > + */ > + if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { > + err = ibv_dontfork_range(addr, length); > + if (err) > + return err; > + dofork_onfail = 1; > + } > + > + old_addr = mr->addr; > + old_len = mr->length; > + err = mr->context->ops.rereg_mr(mr, flags, pd, addr, length, access); > + if (!err) { > + if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { > + ibv_dofork_range(old_addr, old_len); > + mr->addr = addr; > + mr->length = length; > + } > + if (flags & IBV_REREG_MR_CHANGE_PD) > + mr->pd = pd; > + } else if (dofork_onfail) { > + ibv_dofork_range(addr, length); > + } > + > + return err; > +} > +default_symver(__ibv_rereg_mr, ibv_rereg_mr); > + > int __ibv_dereg_mr(struct ibv_mr *mr) > { > int ret; > This looks fine to me. Reviewed-by: Sagi Grimberg -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html