linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
To: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: bvanassche-HInyCGIudOg@public.gmane.org,
	roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	eli-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org,
	ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org,
	oren-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org
Subject: Re: [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API
Date: Tue, 07 Oct 2014 13:12:28 -0500	[thread overview]
Message-ID: <54342D0C.6050103@opengridcomputing.com> (raw)
In-Reply-To: <1412693281-6161-2-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

On 10/7/2014 9:48 AM, Sagi Grimberg wrote:
> In order to support that we provide the user with an interface
> to pass a scattered list of buffers to the IB core layer called
> ib_indir_reg_list and provide the a new send work request opcode
> called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
> memory registration called indir_reg where the user can place the
> relevant information to perform such a memory registration.
>
> The verbs user is expected to perform these steps:
> 0. Make sure that the device supports Indirect memory registration via
>     ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
>     that ib_device_attr max_indir_reg_mr_list_len suffice for the
>     expected scatterlist length
>
> 1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
>     This is done via ib_create_mr() with mr_init_attr.flags = IB_MR_INDIRECT_REG
>
> 2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
>     pointers. This is done via new ib_alloc_indir_reg_list() verb
>
> 3. Populate the scattered buffers in ib_indir_reg_list.sg_list
>
> 4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
>     provide the populated ib_indir_reg_list
>
> 5. Perform data transfer
>
> 6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
>
> 7. Free indirect MR and ib_indir_reg_list via
>     ib_destroy_mr() and ib_free_indir_reg_list()
>
> Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>   drivers/infiniband/core/verbs.c |   29 ++++++++++++++++++++
>   include/rdma/ib_verbs.h         |   55 +++++++++++++++++++++++++++++++++++++-
>   2 files changed, 82 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> index c2b89cc..0364551 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -1445,3 +1445,32 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
>   		mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
>   }
>   EXPORT_SYMBOL(ib_check_mr_status);
> +
> +struct ib_indir_reg_list *
> +ib_alloc_indir_reg_list(struct ib_device *device,
> +			unsigned int max_indir_list_len)
> +{
> +	struct ib_indir_reg_list *indir_list;
> +
> +	if (!device->alloc_indir_reg_list)
> +		return ERR_PTR(-ENOSYS);
> +
> +	indir_list = device->alloc_indir_reg_list(device,
> +						  max_indir_list_len);
> +	if (!IS_ERR(indir_list)) {
> +		indir_list->device = device;
> +		indir_list->max_indir_list_len = max_indir_list_len;
> +	}
> +
> +	return indir_list;
> +}
> +EXPORT_SYMBOL(ib_alloc_indir_reg_list);
> +
> +void
> +ib_free_indir_reg_list(struct ib_device *device,
> +		       struct ib_indir_reg_list *indir_list)
> +{
> +	if (device->free_indir_reg_list)
> +		device->free_indir_reg_list(device, indir_list);
> +}
> +EXPORT_SYMBOL(ib_free_indir_reg_list);
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 470a011..f5fe53c 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -123,7 +123,8 @@ enum ib_device_cap_flags {
>   	IB_DEVICE_MEM_WINDOW_TYPE_2A	= (1<<23),
>   	IB_DEVICE_MEM_WINDOW_TYPE_2B	= (1<<24),
>   	IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
> -	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30)
> +	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30),
> +	IB_DEVICE_INDIR_REGISTRATION	= (1<<31)
>   };
>   
>   enum ib_signature_prot_cap {
> @@ -182,6 +183,7 @@ struct ib_device_attr {
>   	int			max_srq_wr;
>   	int			max_srq_sge;
>   	unsigned int		max_fast_reg_page_list_len;
> +	unsigned int		max_indir_reg_mr_list_len;
>   	u16			max_pkeys;
>   	u8			local_ca_ack_delay;
>   	int			sig_prot_cap;
> @@ -476,7 +478,8 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
>   __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
>   
>   enum ib_mr_create_flags {
> -	IB_MR_SIGNATURE_EN = 1,
> +	IB_MR_SIGNATURE_EN = 1 << 0,
> +	IB_MR_INDIRECT_REG = 1 << 1
>   };
>   
>   /**
> @@ -651,6 +654,7 @@ enum ib_wc_opcode {
>   	IB_WC_FAST_REG_MR,
>   	IB_WC_MASKED_COMP_SWAP,
>   	IB_WC_MASKED_FETCH_ADD,
> +	IB_WC_REG_INDIR_MR,
>   /*
>    * Set value of IB_WC_RECV so consumers can test if a completion is a
>    * receive by testing (opcode & IB_WC_RECV).
> @@ -945,6 +949,7 @@ enum ib_wr_opcode {
>   	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
>   	IB_WR_BIND_MW,
>   	IB_WR_REG_SIG_MR,
> +	IB_WR_REG_INDIR_MR,
>   	/* reserve values for low level drivers' internal use.
>   	 * These values will not be used at all in the ib core layer.
>   	 */
> @@ -984,6 +989,12 @@ struct ib_fast_reg_page_list {
>   	unsigned int		max_page_list_len;
>   };
>   
> +struct ib_indir_reg_list {
> +	struct ib_device       *device;
> +	struct ib_sge          *sg_list;
> +	unsigned int		max_indir_list_len;
> +};
> +
>   /**
>    * struct ib_mw_bind_info - Parameters for a memory window bind operation.
>    * @mr: A memory region to bind the memory window to.
> @@ -1056,6 +1067,14 @@ struct ib_send_wr {
>   			int			access_flags;
>   			struct ib_sge	       *prot;
>   		} sig_handover;
> +		struct {
> +			u64				iova_start;
> +			struct ib_indir_reg_list       *indir_list;
> +			unsigned int			indir_list_len;
> +			u64				length;
> +			unsigned int			access_flags;
> +			u32				mkey;
> +		} indir_reg;

What is mkey?  Shouldn't this be an rkey?

>   	} wr;
>   	u32			xrc_remote_srq_num;	/* XRC TGT QPs only */
>   };
> @@ -1562,6 +1581,10 @@ struct ib_device {
>   	struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
>   								   int page_list_len);
>   	void			   (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
> +	struct ib_indir_reg_list * (*alloc_indir_reg_list)(struct ib_device *device,
> +							   unsigned int indir_list_len);
> +	void			   (*free_indir_reg_list)(struct ib_device *device,
> +							  struct ib_indir_reg_list *indir_list);
>   	int                        (*rereg_phys_mr)(struct ib_mr *mr,
>   						    int mr_rereg_mask,
>   						    struct ib_pd *pd,
> @@ -2460,6 +2483,34 @@ struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
>   void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
>   
>   /**
> + * ib_alloc_indir_reg_list() - Allocates an indirect list array
> + * @device: ib device pointer
> + * @indir_list_len: size of the list array to be allocated
> + *
> + * Allocate a struct ib_indir_reg_list and a sg_list array
> + * that is at least indir_list_len in size. The actual size is
> + * returned in max_indir_list_len. The caller is responsible for
> + * initializing the contents of the sg_list array before posting
> + * a send work request with the IB_WC_INDIR_REG_MR opcode.
> + *
> + * The sg_list array entries should be set exactly the same way
> + * the ib_send_wr sg_list {lkey, addr, length}.
> + */
> +struct ib_indir_reg_list *
> +ib_alloc_indir_reg_list(struct ib_device *device,
> +			unsigned int indir_list_len);
> +
> +/**
> + * ib_free_indir_reg_list() - Deallocates a previously allocated
> + *     indirect list array
> + * @device: ib device pointer
> + * @indir_list: pointer to be deallocated
> + */
> +void
> +ib_free_indir_reg_list(struct ib_device *device,
> +		       struct ib_indir_reg_list *indir_list);
> +
> +/**
>    * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
>    *   R_Key and L_Key.
>    * @mr - struct ib_mr pointer to be updated.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2014-10-07 18:12 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-07 14:47 [PATCH RFC 0/2] Indirect Fast Memory registration support Sagi Grimberg
     [not found] ` <1412693281-6161-1-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-07 14:48   ` [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API Sagi Grimberg
     [not found]     ` <1412693281-6161-2-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-07 18:12       ` Steve Wise [this message]
     [not found]         ` <54342D0C.6050103-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2014-10-08  5:48           ` Sagi Grimberg
     [not found]             ` <5434D037.4040208-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-08 13:54               ` Steve Wise
2014-10-13  7:57                 ` Sagi Grimberg
     [not found]                   ` <543B85F7.1060000-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-13 14:41                     ` Steve Wise
2014-10-14  5:40       ` Bart Van Assche
     [not found]         ` <543CB76B.7020208-HInyCGIudOg@public.gmane.org>
2014-10-19 19:01           ` Sagi Grimberg
     [not found]             ` <54440A7E.200-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-20 14:54               ` Steve Wise
2014-10-07 14:48   ` [PATCH RFC 2/2] IB/mlx5: Implement Fast Indirect Memory Registration Feature Sagi Grimberg
     [not found]     ` <1412693281-6161-3-git-send-email-sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-12 19:39       ` Or Gerlitz
     [not found]         ` <543AD8DE.5060404-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-13  8:32           ` Sagi Grimberg
     [not found]             ` <543B8E09.6090606-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-13 12:57               ` Or Gerlitz
2014-10-13 13:00               ` Or Gerlitz
2014-10-21 13:12               ` Eli Cohen
2014-10-14  5:41       ` Bart Van Assche
     [not found]         ` <543CB79B.6050400-HInyCGIudOg@public.gmane.org>
2014-10-14 10:50           ` Sagi Grimberg
2014-10-19 19:34           ` Sagi Grimberg
     [not found]             ` <5444122C.6070804-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-20  7:46               ` Bart Van Assche
     [not found]                 ` <5444BDDC.1060507-HInyCGIudOg@public.gmane.org>
2014-10-21  9:32                   ` Sagi Grimberg
     [not found]                     ` <54462842.8080701-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-21 10:54                       ` Bart Van Assche
     [not found]                         ` <54463B4A.1070308-HInyCGIudOg@public.gmane.org>
2014-10-21 10:59                           ` Sagi Grimberg
2014-10-21 14:20               ` Eli Cohen
2014-10-21 14:30                 ` Sagi Grimberg
2014-10-08 11:06   ` [PATCH RFC 0/2] Indirect Fast Memory registration support Devesh Sharma
     [not found]     ` <EE7902D3F51F404C82415C4803930ACD40C4114B-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2014-10-13  8:01       ` Sagi Grimberg
2014-10-12 19:43   ` Or Gerlitz
     [not found]     ` <543AD9CD.80803-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-13  8:48       ` Bart Van Assche
     [not found]         ` <543B91E2.70206-HInyCGIudOg@public.gmane.org>
2014-10-13 11:18           ` Sagi Grimberg
     [not found]             ` <543BB4F4.8090203-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-10-13 14:51               ` Steve Wise
  -- strict thread matches above, loose matches on Subject: below --
2014-10-09 20:13 [PATCH RFC 1/2] IB/core: Introduce Fast Indirect Memory Registration verbs API Or Gerlitz
     [not found] ` <CAJ3xEMjdnNNbhRC0T_=hmRedwJFvSR9r-JccLZ2m0zaece5OQQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-10-13  8:10   ` Sagi Grimberg
2014-10-13  8:11   ` Sagi Grimberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54342D0C.6050103@opengridcomputing.com \
    --to=swise-7bpotxp6k4+p2yhjcf5u+vpxobypeauw@public.gmane.org \
    --cc=bvanassche-HInyCGIudOg@public.gmane.org \
    --cc=eli-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=oren-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).