* [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <1412693281-6161-1-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-10-07 14:48 ` Sagi Grimberg
[not found] ` <1412693281-6161-2-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Sagi Grimberg @ 2014-10-07 14:48 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: bvanassche-HInyCGIudOg, roland-DgEjT+Ai2ygdnm+yROfE0A,
eli-VPRAkNaXOzVWk0Htik3J/w, ogerlitz-VPRAkNaXOzVWk0Htik3J/w,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
In order to support that we provide the user with an interface
to pass a scattered list of buffers to the IB core layer called
ib_indir_reg_list and provide the a new send work request opcode
called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
memory registration called indir_reg where the user can place the
relevant information to perform such a memory registration.
The verbs user is expected to perform these steps:
0. Make sure that the device supports Indirect memory registration via
ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
that ib_device_attr max_indir_reg_mr_list_len suffice for the
expected scatterlist length
1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
This is done via ib_create_mr() with mr_init_attr.flags = IB_MR_INDIRECT_REG
2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
pointers. This is done via new ib_alloc_indir_reg_list() verb
3. Populate the scattered buffers in ib_indir_reg_list.sg_list
4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
provide the populated ib_indir_reg_list
5. Perform data transfer
6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
7. Free indirect MR and ib_indir_reg_list via
ib_destroy_mr() and ib_free_indir_reg_list()
Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/core/verbs.c | 29 ++++++++++++++++++++
include/rdma/ib_verbs.h | 55 +++++++++++++++++++++++++++++++++++++-
2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c2b89cc..0364551 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1445,3 +1445,32 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
}
EXPORT_SYMBOL(ib_check_mr_status);
+
+struct ib_indir_reg_list *
+ib_alloc_indir_reg_list(struct ib_device *device,
+ unsigned int max_indir_list_len)
+{
+ struct ib_indir_reg_list *indir_list;
+
+ if (!device->alloc_indir_reg_list)
+ return ERR_PTR(-ENOSYS);
+
+ indir_list = device->alloc_indir_reg_list(device,
+ max_indir_list_len);
+ if (!IS_ERR(indir_list)) {
+ indir_list->device = device;
+ indir_list->max_indir_list_len = max_indir_list_len;
+ }
+
+ return indir_list;
+}
+EXPORT_SYMBOL(ib_alloc_indir_reg_list);
+
+void
+ib_free_indir_reg_list(struct ib_device *device,
+ struct ib_indir_reg_list *indir_list)
+{
+ if (device->free_indir_reg_list)
+ device->free_indir_reg_list(device, indir_list);
+}
+EXPORT_SYMBOL(ib_free_indir_reg_list);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 470a011..f5fe53c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -123,7 +123,8 @@ enum ib_device_cap_flags {
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1<<30)
+ IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
+ IB_DEVICE_INDIR_REGISTRATION = (1<<31)
};
enum ib_signature_prot_cap {
@@ -182,6 +183,7 @@ struct ib_device_attr {
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
+ unsigned int max_indir_reg_mr_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
int sig_prot_cap;
@@ -476,7 +478,8 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
enum ib_mr_create_flags {
- IB_MR_SIGNATURE_EN = 1,
+ IB_MR_SIGNATURE_EN = 1 << 0,
+ IB_MR_INDIRECT_REG = 1 << 1
};
/**
@@ -651,6 +654,7 @@ enum ib_wc_opcode {
IB_WC_FAST_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
+ IB_WC_REG_INDIR_MR,
/*
* Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV).
@@ -945,6 +949,7 @@ enum ib_wr_opcode {
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_BIND_MW,
IB_WR_REG_SIG_MR,
+ IB_WR_REG_INDIR_MR,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
*/
@@ -984,6 +989,12 @@ struct ib_fast_reg_page_list {
unsigned int max_page_list_len;
};
+struct ib_indir_reg_list {
+ struct ib_device *device;
+ struct ib_sge *sg_list;
+ unsigned int max_indir_list_len;
+};
+
/**
* struct ib_mw_bind_info - Parameters for a memory window bind operation.
* @mr: A memory region to bind the memory window to.
@@ -1056,6 +1067,14 @@ struct ib_send_wr {
int access_flags;
struct ib_sge *prot;
} sig_handover;
+ struct {
+ u64 iova_start;
+ struct ib_indir_reg_list *indir_list;
+ unsigned int indir_list_len;
+ u64 length;
+ unsigned int access_flags;
+ u32 mkey;
+ } indir_reg;
} wr;
u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
@@ -1562,6 +1581,10 @@ struct ib_device {
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
int page_list_len);
void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
+ struct ib_indir_reg_list * (*alloc_indir_reg_list)(struct ib_device *device,
+ unsigned int indir_list_len);
+ void (*free_indir_reg_list)(struct ib_device *device,
+ struct ib_indir_reg_list *indir_list);
int (*rereg_phys_mr)(struct ib_mr *mr,
int mr_rereg_mask,
struct ib_pd *pd,
@@ -2460,6 +2483,34 @@ struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
/**
+ * ib_alloc_indir_reg_list() - Allocates an indirect list array
+ * @device: ib device pointer
+ * @indir_list_len: size of the list array to be allocated
+ *
+ * Allocate a struct ib_indir_reg_list and a sg_list array
+ * that is at least indir_list_len in size. The actual size is
+ * returned in max_indir_list_len. The caller is responsible for
+ * initializing the contents of the sg_list array before posting
+ * a send work request with the IB_WC_INDIR_REG_MR opcode.
+ *
+ * The sg_list array entries should be set exactly the same way
+ * the ib_send_wr sg_list {lkey, addr, length}.
+ */
+struct ib_indir_reg_list *
+ib_alloc_indir_reg_list(struct ib_device *device,
+ unsigned int indir_list_len);
+
+/**
+ * ib_free_indir_reg_list() - Deallocates a previously allocated
+ * indirect list array
+ * @device: ib device pointer
+ * @indir_list: pointer to be deallocated
+ */
+void
+ib_free_indir_reg_list(struct ib_device *device,
+ struct ib_indir_reg_list *indir_list);
+
+/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
* R_Key and L_Key.
* @mr - struct ib_mr pointer to be updated.
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <1412693281-6161-2-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-10-07 18:12 ` Steve Wise
[not found] ` <54342D0C.6050103-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2014-10-14 5:40 ` Bart Van Assche
1 sibling, 1 reply; 12+ messages in thread
From: Steve Wise @ 2014-10-07 18:12 UTC (permalink / raw)
To: Sagi Grimberg, linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: bvanassche-HInyCGIudOg, roland-DgEjT+Ai2ygdnm+yROfE0A,
eli-VPRAkNaXOzVWk0Htik3J/w, ogerlitz-VPRAkNaXOzVWk0Htik3J/w,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
On 10/7/2014 9:48 AM, Sagi Grimberg wrote:
> In order to support that we provide the user with an interface
> to pass a scattered list of buffers to the IB core layer called
> ib_indir_reg_list and provide the a new send work request opcode
> called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
> memory registration called indir_reg where the user can place the
> relevant information to perform such a memory registration.
>
> The verbs user is expected to perform these steps:
> 0. Make sure that the device supports Indirect memory registration via
> ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
> that ib_device_attr max_indir_reg_mr_list_len suffice for the
> expected scatterlist length
>
> 1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
> This is done via ib_create_mr() with mr_init_attr.flags = IB_MR_INDIRECT_REG
>
> 2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
> pointers. This is done via new ib_alloc_indir_reg_list() verb
>
> 3. Populate the scattered buffers in ib_indir_reg_list.sg_list
>
> 4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
> provide the populated ib_indir_reg_list
>
> 5. Perform data transfer
>
> 6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
>
> 7. Free indirect MR and ib_indir_reg_list via
> ib_destroy_mr() and ib_free_indir_reg_list()
>
> Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
> drivers/infiniband/core/verbs.c | 29 ++++++++++++++++++++
> include/rdma/ib_verbs.h | 55 +++++++++++++++++++++++++++++++++++++-
> 2 files changed, 82 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> index c2b89cc..0364551 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -1445,3 +1445,32 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
> mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
> }
> EXPORT_SYMBOL(ib_check_mr_status);
> +
> +struct ib_indir_reg_list *
> +ib_alloc_indir_reg_list(struct ib_device *device,
> + unsigned int max_indir_list_len)
> +{
> + struct ib_indir_reg_list *indir_list;
> +
> + if (!device->alloc_indir_reg_list)
> + return ERR_PTR(-ENOSYS);
> +
> + indir_list = device->alloc_indir_reg_list(device,
> + max_indir_list_len);
> + if (!IS_ERR(indir_list)) {
> + indir_list->device = device;
> + indir_list->max_indir_list_len = max_indir_list_len;
> + }
> +
> + return indir_list;
> +}
> +EXPORT_SYMBOL(ib_alloc_indir_reg_list);
> +
> +void
> +ib_free_indir_reg_list(struct ib_device *device,
> + struct ib_indir_reg_list *indir_list)
> +{
> + if (device->free_indir_reg_list)
> + device->free_indir_reg_list(device, indir_list);
> +}
> +EXPORT_SYMBOL(ib_free_indir_reg_list);
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 470a011..f5fe53c 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -123,7 +123,8 @@ enum ib_device_cap_flags {
> IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
> IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
> IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
> - IB_DEVICE_SIGNATURE_HANDOVER = (1<<30)
> + IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
> + IB_DEVICE_INDIR_REGISTRATION = (1<<31)
> };
>
> enum ib_signature_prot_cap {
> @@ -182,6 +183,7 @@ struct ib_device_attr {
> int max_srq_wr;
> int max_srq_sge;
> unsigned int max_fast_reg_page_list_len;
> + unsigned int max_indir_reg_mr_list_len;
> u16 max_pkeys;
> u8 local_ca_ack_delay;
> int sig_prot_cap;
> @@ -476,7 +478,8 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
> __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
>
> enum ib_mr_create_flags {
> - IB_MR_SIGNATURE_EN = 1,
> + IB_MR_SIGNATURE_EN = 1 << 0,
> + IB_MR_INDIRECT_REG = 1 << 1
> };
>
> /**
> @@ -651,6 +654,7 @@ enum ib_wc_opcode {
> IB_WC_FAST_REG_MR,
> IB_WC_MASKED_COMP_SWAP,
> IB_WC_MASKED_FETCH_ADD,
> + IB_WC_REG_INDIR_MR,
> /*
> * Set value of IB_WC_RECV so consumers can test if a completion is a
> * receive by testing (opcode & IB_WC_RECV).
> @@ -945,6 +949,7 @@ enum ib_wr_opcode {
> IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
> IB_WR_BIND_MW,
> IB_WR_REG_SIG_MR,
> + IB_WR_REG_INDIR_MR,
> /* reserve values for low level drivers' internal use.
> * These values will not be used at all in the ib core layer.
> */
> @@ -984,6 +989,12 @@ struct ib_fast_reg_page_list {
> unsigned int max_page_list_len;
> };
>
> +struct ib_indir_reg_list {
> + struct ib_device *device;
> + struct ib_sge *sg_list;
> + unsigned int max_indir_list_len;
> +};
> +
> /**
> * struct ib_mw_bind_info - Parameters for a memory window bind operation.
> * @mr: A memory region to bind the memory window to.
> @@ -1056,6 +1067,14 @@ struct ib_send_wr {
> int access_flags;
> struct ib_sge *prot;
> } sig_handover;
> + struct {
> + u64 iova_start;
> + struct ib_indir_reg_list *indir_list;
> + unsigned int indir_list_len;
> + u64 length;
> + unsigned int access_flags;
> + u32 mkey;
> + } indir_reg;
What is mkey? Shouldn't this be an rkey?
> } wr;
> u32 xrc_remote_srq_num; /* XRC TGT QPs only */
> };
> @@ -1562,6 +1581,10 @@ struct ib_device {
> struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
> int page_list_len);
> void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
> + struct ib_indir_reg_list * (*alloc_indir_reg_list)(struct ib_device *device,
> + unsigned int indir_list_len);
> + void (*free_indir_reg_list)(struct ib_device *device,
> + struct ib_indir_reg_list *indir_list);
> int (*rereg_phys_mr)(struct ib_mr *mr,
> int mr_rereg_mask,
> struct ib_pd *pd,
> @@ -2460,6 +2483,34 @@ struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
> void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
>
> /**
> + * ib_alloc_indir_reg_list() - Allocates an indirect list array
> + * @device: ib device pointer
> + * @indir_list_len: size of the list array to be allocated
> + *
> + * Allocate a struct ib_indir_reg_list and a sg_list array
> + * that is at least indir_list_len in size. The actual size is
> + * returned in max_indir_list_len. The caller is responsible for
> + * initializing the contents of the sg_list array before posting
> + * a send work request with the IB_WC_INDIR_REG_MR opcode.
> + *
> + * The sg_list array entries should be set exactly the same way
> + * the ib_send_wr sg_list {lkey, addr, length}.
> + */
> +struct ib_indir_reg_list *
> +ib_alloc_indir_reg_list(struct ib_device *device,
> + unsigned int indir_list_len);
> +
> +/**
> + * ib_free_indir_reg_list() - Deallocates a previously allocated
> + * indirect list array
> + * @device: ib device pointer
> + * @indir_list: pointer to be deallocated
> + */
> +void
> +ib_free_indir_reg_list(struct ib_device *device,
> + struct ib_indir_reg_list *indir_list);
> +
> +/**
> * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
> * R_Key and L_Key.
> * @mr - struct ib_mr pointer to be updated.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <54342D0C.6050103-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2014-10-08 5:48 ` Sagi Grimberg
[not found] ` <5434D037.4040208-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Sagi Grimberg @ 2014-10-08 5:48 UTC (permalink / raw)
To: Steve Wise, Sagi Grimberg, linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: bvanassche-HInyCGIudOg, roland-DgEjT+Ai2ygdnm+yROfE0A,
eli-VPRAkNaXOzVWk0Htik3J/w, ogerlitz-VPRAkNaXOzVWk0Htik3J/w,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
On 10/7/2014 9:12 PM, Steve Wise wrote:
> On 10/7/2014 9:48 AM, Sagi Grimberg wrote:
>> In order to support that we provide the user with an interface
>> to pass a scattered list of buffers to the IB core layer called
>> ib_indir_reg_list and provide the a new send work request opcode
>> called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
>> memory registration called indir_reg where the user can place the
>> relevant information to perform such a memory registration.
>>
>> The verbs user is expected to perform these steps:
>> 0. Make sure that the device supports Indirect memory registration via
>> ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
>> that ib_device_attr max_indir_reg_mr_list_len suffice for the
>> expected scatterlist length
>>
>> 1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
>> This is done via ib_create_mr() with mr_init_attr.flags =
>> IB_MR_INDIRECT_REG
>>
>> 2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
>> pointers. This is done via new ib_alloc_indir_reg_list() verb
>>
>> 3. Populate the scattered buffers in ib_indir_reg_list.sg_list
>>
>> 4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
>> provide the populated ib_indir_reg_list
>>
>> 5. Perform data transfer
>>
>> 6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
>>
>> 7. Free indirect MR and ib_indir_reg_list via
>> ib_destroy_mr() and ib_free_indir_reg_list()
>>
>> Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
>> ---
>> drivers/infiniband/core/verbs.c | 29 ++++++++++++++++++++
>> include/rdma/ib_verbs.h | 55
>> +++++++++++++++++++++++++++++++++++++-
>> 2 files changed, 82 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/verbs.c
>> b/drivers/infiniband/core/verbs.c
>> index c2b89cc..0364551 100644
>> --- a/drivers/infiniband/core/verbs.c
>> +++ b/drivers/infiniband/core/verbs.c
>> @@ -1445,3 +1445,32 @@ int ib_check_mr_status(struct ib_mr *mr, u32
>> check_mask,
>> mr->device->check_mr_status(mr, check_mask, mr_status) :
>> -ENOSYS;
>> }
>> EXPORT_SYMBOL(ib_check_mr_status);
>> +
>> +struct ib_indir_reg_list *
>> +ib_alloc_indir_reg_list(struct ib_device *device,
>> + unsigned int max_indir_list_len)
>> +{
>> + struct ib_indir_reg_list *indir_list;
>> +
>> + if (!device->alloc_indir_reg_list)
>> + return ERR_PTR(-ENOSYS);
>> +
>> + indir_list = device->alloc_indir_reg_list(device,
>> + max_indir_list_len);
>> + if (!IS_ERR(indir_list)) {
>> + indir_list->device = device;
>> + indir_list->max_indir_list_len = max_indir_list_len;
>> + }
>> +
>> + return indir_list;
>> +}
>> +EXPORT_SYMBOL(ib_alloc_indir_reg_list);
>> +
>> +void
>> +ib_free_indir_reg_list(struct ib_device *device,
>> + struct ib_indir_reg_list *indir_list)
>> +{
>> + if (device->free_indir_reg_list)
>> + device->free_indir_reg_list(device, indir_list);
>> +}
>> +EXPORT_SYMBOL(ib_free_indir_reg_list);
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index 470a011..f5fe53c 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -123,7 +123,8 @@ enum ib_device_cap_flags {
>> IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
>> IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
>> IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
>> - IB_DEVICE_SIGNATURE_HANDOVER = (1<<30)
>> + IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
>> + IB_DEVICE_INDIR_REGISTRATION = (1<<31)
>> };
>> enum ib_signature_prot_cap {
>> @@ -182,6 +183,7 @@ struct ib_device_attr {
>> int max_srq_wr;
>> int max_srq_sge;
>> unsigned int max_fast_reg_page_list_len;
>> + unsigned int max_indir_reg_mr_list_len;
>> u16 max_pkeys;
>> u8 local_ca_ack_delay;
>> int sig_prot_cap;
>> @@ -476,7 +478,8 @@ __attribute_const__ int ib_rate_to_mult(enum
>> ib_rate rate);
>> __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
>> enum ib_mr_create_flags {
>> - IB_MR_SIGNATURE_EN = 1,
>> + IB_MR_SIGNATURE_EN = 1 << 0,
>> + IB_MR_INDIRECT_REG = 1 << 1
>> };
>> /**
>> @@ -651,6 +654,7 @@ enum ib_wc_opcode {
>> IB_WC_FAST_REG_MR,
>> IB_WC_MASKED_COMP_SWAP,
>> IB_WC_MASKED_FETCH_ADD,
>> + IB_WC_REG_INDIR_MR,
>> /*
>> * Set value of IB_WC_RECV so consumers can test if a completion is a
>> * receive by testing (opcode & IB_WC_RECV).
>> @@ -945,6 +949,7 @@ enum ib_wr_opcode {
>> IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
>> IB_WR_BIND_MW,
>> IB_WR_REG_SIG_MR,
>> + IB_WR_REG_INDIR_MR,
>> /* reserve values for low level drivers' internal use.
>> * These values will not be used at all in the ib core layer.
>> */
>> @@ -984,6 +989,12 @@ struct ib_fast_reg_page_list {
>> unsigned int max_page_list_len;
>> };
>> +struct ib_indir_reg_list {
>> + struct ib_device *device;
>> + struct ib_sge *sg_list;
>> + unsigned int max_indir_list_len;
>> +};
>> +
>> /**
>> * struct ib_mw_bind_info - Parameters for a memory window bind
>> operation.
>> * @mr: A memory region to bind the memory window to.
>> @@ -1056,6 +1067,14 @@ struct ib_send_wr {
>> int access_flags;
>> struct ib_sge *prot;
>> } sig_handover;
>> + struct {
>> + u64 iova_start;
>> + struct ib_indir_reg_list *indir_list;
>> + unsigned int indir_list_len;
>> + u64 length;
>> + unsigned int access_flags;
>> + u32 mkey;
>> + } indir_reg;
>
> What is mkey? Shouldn't this be an rkey?
mkey means memory key. I can change it to rkey if that
is clearer.
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <5434D037.4040208-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2014-10-08 13:54 ` Steve Wise
2014-10-13 7:57 ` Sagi Grimberg
0 siblings, 1 reply; 12+ messages in thread
From: Steve Wise @ 2014-10-08 13:54 UTC (permalink / raw)
To: 'Sagi Grimberg', 'Sagi Grimberg',
linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: bvanassche-HInyCGIudOg, roland-DgEjT+Ai2ygdnm+yROfE0A,
eli-VPRAkNaXOzVWk0Htik3J/w, ogerlitz-VPRAkNaXOzVWk0Htik3J/w,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
> >> @@ -1056,6 +1067,14 @@ struct ib_send_wr {
> >> int access_flags;
> >> struct ib_sge *prot;
> >> } sig_handover;
> >> + struct {
> >> + u64 iova_start;
> >> + struct ib_indir_reg_list *indir_list;
> >> + unsigned int indir_list_len;
> >> + u64 length;
> >> + unsigned int access_flags;
> >> + u32 mkey;
> >> + } indir_reg;
> >
> > What is mkey? Shouldn't this be an rkey?
>
> mkey means memory key. I can change it to rkey if that
> is clearer.
Is it valid to use an lkey here? Or is an rkey required? If an rkey is required, then I'd say it is clearer to name it rkey (and
that aligns with the fastreg struct).
Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
@ 2014-10-09 20:13 Or Gerlitz
[not found] ` <CAJ3xEMjdnNNbhRC0T_=hmRedwJFvSR9r-JccLZ2m0zaece5OQQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Or Gerlitz @ 2014-10-09 20:13 UTC (permalink / raw)
To: Sagi Grimberg
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Bart Van Assche, Roland Dreier, Eli Cohen, Or Gerlitz,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
On Tue, Oct 7, 2014, Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:
[...]
> enum ib_signature_prot_cap {
> @@ -182,6 +183,7 @@ struct ib_device_attr {
> int max_srq_wr;
> int max_srq_sge;
> unsigned int max_fast_reg_page_list_len;
> + unsigned int max_indir_reg_mr_list_len;
The indirection registration list is basically made of struct ib_sge
objects which are posted on a send-like work-request, any reason to
have a dedicated dev cap attribute for that and not use the already
existing one (max_sge)?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
2014-10-08 13:54 ` Steve Wise
@ 2014-10-13 7:57 ` Sagi Grimberg
[not found] ` <543B85F7.1060000-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Sagi Grimberg @ 2014-10-13 7:57 UTC (permalink / raw)
To: Steve Wise, 'Sagi Grimberg',
linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: bvanassche-HInyCGIudOg, roland-DgEjT+Ai2ygdnm+yROfE0A,
eli-VPRAkNaXOzVWk0Htik3J/w, ogerlitz-VPRAkNaXOzVWk0Htik3J/w,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
On 10/8/2014 4:54 PM, Steve Wise wrote:
>>>> @@ -1056,6 +1067,14 @@ struct ib_send_wr {
>>>> int access_flags;
>>>> struct ib_sge *prot;
>>>> } sig_handover;
>>>> + struct {
>>>> + u64 iova_start;
>>>> + struct ib_indir_reg_list *indir_list;
>>>> + unsigned int indir_list_len;
>>>> + u64 length;
>>>> + unsigned int access_flags;
>>>> + u32 mkey;
>>>> + } indir_reg;
>>>
>>> What is mkey? Shouldn't this be an rkey?
>>
>> mkey means memory key. I can change it to rkey if that
>> is clearer.
>
> Is it valid to use an lkey here? Or is an rkey required? If an rkey is required, then I'd say it is clearer to name it rkey (and
> that aligns with the fastreg struct).
>
It is valid. The memory key depends on the use case.
In case a client want to send an rkey to a peer, it will register using
rkey. In case a server wants to transfer data from it's local buffer
it will register using lkey.
So I didn't impose a specific key here - this is why I chose mkey.
I can modify it to rkey to mimic the well known fastreg, but its not
a must.
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <CAJ3xEMjdnNNbhRC0T_=hmRedwJFvSR9r-JccLZ2m0zaece5OQQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2014-10-13 8:10 ` Sagi Grimberg
2014-10-13 8:11 ` Sagi Grimberg
1 sibling, 0 replies; 12+ messages in thread
From: Sagi Grimberg @ 2014-10-13 8:10 UTC (permalink / raw)
To: Or Gerlitz
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Bart Van Assche, Roland Dreier, Eli Cohen, Or Gerlitz,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
On 10/9/2014 11:13 PM, Or Gerlitz wrote:
> On Tue, Oct 7, 2014, Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:
> [...]
>> enum ib_signature_prot_cap {
>> @@ -182,6 +183,7 @@ struct ib_device_attr {
>> int max_srq_wr;
>> int max_srq_sge;
>> unsigned int max_fast_reg_page_list_len;
>> + unsigned int max_indir_reg_mr_list_len;
>
> The indirection registration list is basically made of struct ib_sge
> objects which are posted on a send-like work-request, any reason to
> have a dedicated dev cap attribute for that and not use the already
> existing one (max_sge)?
>
Hi Or,
max_send_sge capability is how many ib_sge's the device can handle in a
single send work request which takes into consideration element such as
wqe control segment size and sq reservations. This is different from how
many ib_sge's the device can register to a single indirect memory key
which is free of such limitations.
So given these are different capabilities I prefer to expose them in
different attributes.
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <CAJ3xEMjdnNNbhRC0T_=hmRedwJFvSR9r-JccLZ2m0zaece5OQQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-10-13 8:10 ` Sagi Grimberg
@ 2014-10-13 8:11 ` Sagi Grimberg
1 sibling, 0 replies; 12+ messages in thread
From: Sagi Grimberg @ 2014-10-13 8:11 UTC (permalink / raw)
To: Or Gerlitz
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Bart Van Assche, Roland Dreier, Eli Cohen, Or Gerlitz,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
On 10/9/2014 11:13 PM, Or Gerlitz wrote:
> On Tue, Oct 7, 2014, Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:
> [...]
>> enum ib_signature_prot_cap {
>> @@ -182,6 +183,7 @@ struct ib_device_attr {
>> int max_srq_wr;
>> int max_srq_sge;
>> unsigned int max_fast_reg_page_list_len;
>> + unsigned int max_indir_reg_mr_list_len;
>
> The indirection registration list is basically made of struct ib_sge
> objects which are posted on a send-like work-request, any reason to
> have a dedicated dev cap attribute for that and not use the already
> existing one (max_sge)?
>
Hi Or,
max_send_sge capability is how many ib_sge's the device can handle in a
single send work request which takes into consideration element such as
wqe control segment size and sq reservations. This is different from how
many ib_sge's the device can register to a single indirect memory key
which is free of such limitations.
So given these are different capabilities I prefer to expose them in
different attributes.
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <543B85F7.1060000-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2014-10-13 14:41 ` Steve Wise
0 siblings, 0 replies; 12+ messages in thread
From: Steve Wise @ 2014-10-13 14:41 UTC (permalink / raw)
To: 'Sagi Grimberg', 'Sagi Grimberg',
linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: bvanassche-HInyCGIudOg, roland-DgEjT+Ai2ygdnm+yROfE0A,
eli-VPRAkNaXOzVWk0Htik3J/w, ogerlitz-VPRAkNaXOzVWk0Htik3J/w,
oren-VPRAkNaXOzVWk0Htik3J/w, sean.hefty-ral2JQCrhuEAvxtiuMwx3w
> On 10/8/2014 4:54 PM, Steve Wise wrote:
> >>>> @@ -1056,6 +1067,14 @@ struct ib_send_wr {
> >>>> int access_flags;
> >>>> struct ib_sge *prot;
> >>>> } sig_handover;
> >>>> + struct {
> >>>> + u64 iova_start;
> >>>> + struct ib_indir_reg_list *indir_list;
> >>>> + unsigned int indir_list_len;
> >>>> + u64 length;
> >>>> + unsigned int access_flags;
> >>>> + u32 mkey;
> >>>> + } indir_reg;
> >>>
> >>> What is mkey? Shouldn't this be an rkey?
> >>
> >> mkey means memory key. I can change it to rkey if that
> >> is clearer.
> >
> > Is it valid to use an lkey here? Or is an rkey required? If an rkey is required, then I'd say it is clearer to name it rkey
(and
> > that aligns with the fastreg struct).
> >
>
> It is valid. The memory key depends on the use case.
> In case a client want to send an rkey to a peer, it will register using
> rkey. In case a server wants to transfer data from it's local buffer
> it will register using lkey.
>
> So I didn't impose a specific key here - this is why I chose mkey.
>
> I can modify it to rkey to mimic the well known fastreg, but its not
> a must.
>
If both local-only and local/remote are valid, then I agree mkey is good. I was thinking an application wouldn't need this API for
local-only registrations; it could just use the local dma lkey and a bus address in the sge. But perhaps the indirect fastreg
allows a deeper sgl than is supported by providers via the SEND/READ/WRITE/RECV work requests...
Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <1412693281-6161-2-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-07 18:12 ` Steve Wise
@ 2014-10-14 5:40 ` Bart Van Assche
[not found] ` <543CB76B.7020208-HInyCGIudOg@public.gmane.org>
1 sibling, 1 reply; 12+ messages in thread
From: Bart Van Assche @ 2014-10-14 5:40 UTC (permalink / raw)
To: Sagi Grimberg, linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: roland-DgEjT+Ai2ygdnm+yROfE0A, eli-VPRAkNaXOzVWk0Htik3J/w,
ogerlitz-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
sean.hefty-ral2JQCrhuEAvxtiuMwx3w
On 10/07/14 16:48, Sagi Grimberg wrote:
> In order to support that we provide the user with an interface
> to pass a scattered list of buffers to the IB core layer called
> ib_indir_reg_list and provide the a new send work request opcode
> called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
> memory registration called indir_reg where the user can place the
> relevant information to perform such a memory registration.
>
> The verbs user is expected to perform these steps:
> 0. Make sure that the device supports Indirect memory registration via
> ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
> that ib_device_attr max_indir_reg_mr_list_len suffice for the
> expected scatterlist length
>
> 1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
> This is done via ib_create_mr() with mr_init_attr.flags = IB_MR_INDIRECT_REG
>
> 2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
> pointers. This is done via new ib_alloc_indir_reg_list() verb
>
> 3. Populate the scattered buffers in ib_indir_reg_list.sg_list
>
> 4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
> provide the populated ib_indir_reg_list
>
> 5. Perform data transfer
>
> 6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
>
> 7. Free indirect MR and ib_indir_reg_list via
> ib_destroy_mr() and ib_free_indir_reg_list()
Hello Sagi,
Is there documentation available somewhere about the order in which an
HCA must execute an indirect memory registration request relative to
other work requests, similar to the "Work Request Operation Ordering"
table in the InfiniBand specification ? I think such documentation is
needed to ensure consistent behavior across HCA models.
Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <543CB76B.7020208-HInyCGIudOg@public.gmane.org>
@ 2014-10-19 19:01 ` Sagi Grimberg
[not found] ` <54440A7E.200-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Sagi Grimberg @ 2014-10-19 19:01 UTC (permalink / raw)
To: Bart Van Assche, Sagi Grimberg, linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: roland-DgEjT+Ai2ygdnm+yROfE0A, eli-VPRAkNaXOzVWk0Htik3J/w,
ogerlitz-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
sean.hefty-ral2JQCrhuEAvxtiuMwx3w, Chuck Lever, Steve Wise
On 10/14/2014 8:40 AM, Bart Van Assche wrote:
> On 10/07/14 16:48, Sagi Grimberg wrote:
>> In order to support that we provide the user with an interface
>> to pass a scattered list of buffers to the IB core layer called
>> ib_indir_reg_list and provide the a new send work request opcode
>> called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
>> memory registration called indir_reg where the user can place the
>> relevant information to perform such a memory registration.
>>
>> The verbs user is expected to perform these steps:
>> 0. Make sure that the device supports Indirect memory registration via
>> ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
>> that ib_device_attr max_indir_reg_mr_list_len suffice for the
>> expected scatterlist length
>>
>> 1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
>> This is done via ib_create_mr() with mr_init_attr.flags =
>> IB_MR_INDIRECT_REG
>>
>> 2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
>> pointers. This is done via new ib_alloc_indir_reg_list() verb
>>
>> 3. Populate the scattered buffers in ib_indir_reg_list.sg_list
>>
>> 4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
>> provide the populated ib_indir_reg_list
>>
>> 5. Perform data transfer
>>
>> 6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
>>
>> 7. Free indirect MR and ib_indir_reg_list via
>> ib_destroy_mr() and ib_free_indir_reg_list()
>
> Hello Sagi,
>
> Is there documentation available somewhere about the order in which an
> HCA must execute an indirect memory registration request relative to
> other work requests, similar to the "Work Request Operation Ordering"
> table in the InfiniBand specification ? I think such documentation is
> needed to ensure consistent behavior across HCA models.
>
So basically Indirect registration request generalizes fast registration
work request, so it naturally it complies to the same operation ordering
specification as fast memory registration operations.
Does it make sense to add some form of
"Documentation/infiniband/registration_ordering_rules.txt"? This should
probably include bind_mw, fastreg, indirect_reg, local_inv..
I'd like to hear more opinions here before I add it...
Roland, Sean, Steve, Chuck, Or?
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
[not found] ` <54440A7E.200-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2014-10-20 14:54 ` Steve Wise
0 siblings, 0 replies; 12+ messages in thread
From: Steve Wise @ 2014-10-20 14:54 UTC (permalink / raw)
To: 'Sagi Grimberg', 'Bart Van Assche',
'Sagi Grimberg', linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: roland-DgEjT+Ai2ygdnm+yROfE0A, eli-VPRAkNaXOzVWk0Htik3J/w,
ogerlitz-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
sean.hefty-ral2JQCrhuEAvxtiuMwx3w, 'Chuck Lever'
> -----Original Message-----
> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Sagi Grimberg
> Sent: Sunday, October 19, 2014 2:01 PM
> To: Bart Van Assche; Sagi Grimberg; linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Cc: roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org; eli-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org; ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org; oren-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org; sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org; Chuck Lever; Steve
> Wise
> Subject: Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
>
> On 10/14/2014 8:40 AM, Bart Van Assche wrote:
> > On 10/07/14 16:48, Sagi Grimberg wrote:
> >> In order to support that we provide the user with an interface
> >> to pass a scattered list of buffers to the IB core layer called
> >> ib_indir_reg_list and provide the a new send work request opcode
> >> called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
> >> memory registration called indir_reg where the user can place the
> >> relevant information to perform such a memory registration.
> >>
> >> The verbs user is expected to perform these steps:
> >> 0. Make sure that the device supports Indirect memory registration via
> >> ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
> >> that ib_device_attr max_indir_reg_mr_list_len suffice for the
> >> expected scatterlist length
> >>
> >> 1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
> >> This is done via ib_create_mr() with mr_init_attr.flags =
> >> IB_MR_INDIRECT_REG
> >>
> >> 2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
> >> pointers. This is done via new ib_alloc_indir_reg_list() verb
> >>
> >> 3. Populate the scattered buffers in ib_indir_reg_list.sg_list
> >>
> >> 4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
> >> provide the populated ib_indir_reg_list
> >>
> >> 5. Perform data transfer
> >>
> >> 6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
> >>
> >> 7. Free indirect MR and ib_indir_reg_list via
> >> ib_destroy_mr() and ib_free_indir_reg_list()
> >
> > Hello Sagi,
> >
> > Is there documentation available somewhere about the order in which an
> > HCA must execute an indirect memory registration request relative to
> > other work requests, similar to the "Work Request Operation Ordering"
> > table in the InfiniBand specification ? I think such documentation is
> > needed to ensure consistent behavior across HCA models.
> >
>
> So basically Indirect registration request generalizes fast registration
> work request, so it naturally it complies to the same operation ordering
> specification as fast memory registration operations.
>
> Does it make sense to add some form of
> "Documentation/infiniband/registration_ordering_rules.txt"? This should
> probably include bind_mw, fastreg, indirect_reg, local_inv..
>
> I'd like to hear more opinions here before I add it...
> Roland, Sean, Steve, Chuck, Or?
>
> Sagi.
I wouldn't replicate the IB and IW specs in Documentation/infiniband/. Perhaps just something referencing the specs and then
saying the indirect registration adheres exactly to the fast registration rules?
Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2014-10-20 14:54 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-09 20:13 [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API Or Gerlitz
[not found] ` <CAJ3xEMjdnNNbhRC0T_=hmRedwJFvSR9r-JccLZ2m0zaece5OQQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-10-13 8:10 ` Sagi Grimberg
2014-10-13 8:11 ` Sagi Grimberg
-- strict thread matches above, loose matches on Subject: below --
2014-10-07 14:47 [PATCH RFC 0/2] Indirect Fast Memory registration support Sagi Grimberg
[not found] ` <1412693281-6161-1-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-07 14:48 ` [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API Sagi Grimberg
[not found] ` <1412693281-6161-2-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-07 18:12 ` Steve Wise
[not found] ` <54342D0C.6050103-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2014-10-08 5:48 ` Sagi Grimberg
[not found] ` <5434D037.4040208-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-08 13:54 ` Steve Wise
2014-10-13 7:57 ` Sagi Grimberg
[not found] ` <543B85F7.1060000-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-13 14:41 ` Steve Wise
2014-10-14 5:40 ` Bart Van Assche
[not found] ` <543CB76B.7020208-HInyCGIudOg@public.gmane.org>
2014-10-19 19:01 ` Sagi Grimberg
[not found] ` <54440A7E.200-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-20 14:54 ` Steve Wise
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.