public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Yann Droneaud <ydroneaud-RlY5vtjFyJ3QT0dZR+AlfA@public.gmane.org>
To: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org
Subject: Re: [PATCH for-next 5/9] IB/core: Invalidation support for peer memory
Date: Wed, 01 Oct 2014 18:25:04 +0200	[thread overview]
Message-ID: <1412180704.4380.40.camel@localhost.localdomain> (raw)
In-Reply-To: <1412176717-11979-6-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Le mercredi 01 octobre 2014 à 18:18 +0300, Yishai Hadas a écrit :
> Adds the required functionality to invalidate a given peer
> memory represented by some core context.
> 
> Each umem that was built over peer memory and supports invalidation has
> some invalidation context assigned to it with the required data to
> manage, once peer will call the invalidation callback below actions are
> taken:
> 
> 1) Taking lock on peer client to sync with inflight dereg_mr on that
> memory.
> 2) Once lock is taken have a lookup for ticket id to find the matching
> core context.
> 3) In case found will call umem invalidation function, otherwise call is
> returned.
> 
> Some notes:
> 1) As peer invalidate callback defined to be blocking it must return
> just after that pages are not going to be accessed any more. For that
> reason ib_invalidate_peer_memory is waiting for a completion event in
> case there is other inflight call coming as part of dereg_mr.
> 
> 2) The peer memory API assumes that a lock might be taken by a peer
> client to protect its memory operations. Specifically, its invalidate
> callback might be called under that lock which may lead to an AB/BA
> dead-lock in case IB core will call get/put pages APIs with the IB core peer's lock taken,
> for that reason as part of  ib_umem_activate_invalidation_notifier lock is taken
> then checking for some inflight invalidation state before activating it.
> 
> 3) Once a peer client admits as part of its registration that it may
> require invalidation support, it can't be an owner of a memory range
> which doesn't support it.
> 
> Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Signed-off-by: Shachar Raindel <raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/infiniband/core/peer_mem.c |   86 +++++++++++++++++++++++++++++++++---
>  drivers/infiniband/core/umem.c     |   51 ++++++++++++++++++---
>  include/rdma/ib_peer_mem.h         |    4 +-
>  include/rdma/ib_umem.h             |   17 +++++++
>  4 files changed, 143 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/infiniband/core/peer_mem.c b/drivers/infiniband/core/peer_mem.c
> index ad10672..d6bd192 100644
> --- a/drivers/infiniband/core/peer_mem.c
> +++ b/drivers/infiniband/core/peer_mem.c
> @@ -38,10 +38,57 @@ static DEFINE_MUTEX(peer_memory_mutex);
>  static LIST_HEAD(peer_memory_list);
>  static int num_registered_peers;
>  
> -static int ib_invalidate_peer_memory(void *reg_handle, void *core_context)
> +/* Caller should be holding the peer client lock, ib_peer_client->lock */
> +static struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client,
> +						  unsigned long key)
> +{
> +	struct core_ticket *core_ticket;
> +
> +	list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list,
> +			    ticket_list) {
> +		if (core_ticket->key == key)
> +			return core_ticket;
> +	}
>  
> +	return NULL;
> +}
> +

You have now two functions to lookup key in ticket list:
see peer_ticket_exists().

> +static int ib_invalidate_peer_memory(void *reg_handle, void *core_context)
>  {
> -	return -ENOSYS;
> +	struct ib_peer_memory_client *ib_peer_client =
> +		(struct ib_peer_memory_client *)reg_handle;
> +	struct invalidation_ctx *invalidation_ctx;
> +	struct core_ticket *core_ticket;
> +	int need_unlock = 1;
> +
> +	mutex_lock(&ib_peer_client->lock);
> +	core_ticket = ib_peer_search_context(ib_peer_client,
> +					     (unsigned long)core_context);
> +	if (!core_ticket)
> +		goto out;
> +
> +	invalidation_ctx = (struct invalidation_ctx *)core_ticket->context;
> +	/* If context is not ready yet, mark it to be invalidated */
> +	if (!invalidation_ctx->func) {
> +		invalidation_ctx->peer_invalidated = 1;
> +		goto out;
> +	}
> +	invalidation_ctx->func(invalidation_ctx->cookie,
> +					invalidation_ctx->umem, 0, 0);
> +	if (invalidation_ctx->inflight_invalidation) {
> +		/* init the completion to wait on before letting other thread to run */
> +		init_completion(&invalidation_ctx->comp);
> +		mutex_unlock(&ib_peer_client->lock);
> +		need_unlock = 0;
> +		wait_for_completion(&invalidation_ctx->comp);
> +	}
> +
> +	kfree(invalidation_ctx);
> +out:
> +	if (need_unlock)
> +		mutex_unlock(&ib_peer_client->lock);
> +
> +	return 0;
>  }
>  
>  static int peer_ticket_exists(struct ib_peer_memory_client *ib_peer_client,
> @@ -168,11 +215,30 @@ int ib_peer_create_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, s
>  void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem,
>  				      struct invalidation_ctx *invalidation_ctx)
>  {
> -	mutex_lock(&ib_peer_mem->lock);
> -	ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
> -	mutex_unlock(&ib_peer_mem->lock);
> +	int peer_callback;
> +	int inflight_invalidation;
>  
> -	kfree(invalidation_ctx);
> +	/* If we are under peer callback lock was already taken.*/
> +	if (!invalidation_ctx->peer_callback)
> +		mutex_lock(&ib_peer_mem->lock);
> +	ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
> +	/* make sure to check inflight flag after took the lock and remove from tree.
> +	 * in addition, from that point using local variables for peer_callback and
> +	 * inflight_invalidation as after the complete invalidation_ctx can't be accessed
> +	 * any more as it may be freed by the callback.
> +	 */
> +	peer_callback = invalidation_ctx->peer_callback;
> +	inflight_invalidation = invalidation_ctx->inflight_invalidation;
> +	if (inflight_invalidation)
> +		complete(&invalidation_ctx->comp);
> +
> +	/* On peer callback lock is handled externally */
> +	if (!peer_callback)
> +		mutex_unlock(&ib_peer_mem->lock);
> +
> +	/* in case under callback context or callback is pending let it free the invalidation context */
> +	if (!peer_callback && !inflight_invalidation)
> +		kfree(invalidation_ctx);
>  }
>  static int ib_memory_peer_check_mandatory(const struct peer_memory_client
>  						     *peer_client)
> @@ -261,13 +327,19 @@ void ib_unregister_peer_memory_client(void *reg_handle)
>  EXPORT_SYMBOL(ib_unregister_peer_memory_client);
>  
>  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
> -						 size_t size, void **peer_client_context)
> +						 size_t size, unsigned long peer_mem_flags,
> +						 void **peer_client_context)
>  {
>  	struct ib_peer_memory_client *ib_peer_client;
>  	int ret;
>  
>  	mutex_lock(&peer_memory_mutex);
>  	list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
> +		/* In case peer requires invalidation it can't own memory which doesn't support it */
> +		if (ib_peer_client->invalidation_required &&
> +		    (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP)))
> +			continue;
> +
>  		ret = ib_peer_client->peer_mem->acquire(addr, size,
>  						   context->peer_mem_private_data,
>  						   context->peer_mem_name,
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index 0de9916..51f32a1 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -44,12 +44,19 @@
>  
>  static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
>  				     struct ib_umem *umem, unsigned long addr,
> -				     int dmasync)
> +				     int dmasync, unsigned long peer_mem_flags)
>  {
>  	int ret;
>  	const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> +	struct invalidation_ctx *invalidation_ctx = NULL;
>  
>  	umem->ib_peer_mem = ib_peer_mem;
> +	if (peer_mem_flags & IB_PEER_MEM_INVAL_SUPP) {
> +		ret = ib_peer_create_invalidation_ctx(ib_peer_mem, umem, &invalidation_ctx);
> +		if (ret)
> +			goto end;
> +	}
> +
>  	/*
>  	 * We always request write permissions to the pages, to force breaking of any CoW
>  	 * during the registration of the MR. For read-only MRs we use the "force" flag to
> @@ -60,7 +67,9 @@ static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
>  				  1, !umem->writable,
>  				  &umem->sg_head,
>  				  umem->peer_mem_client_context,
> -				  NULL);
> +				  invalidation_ctx ?
> +				  (void *)invalidation_ctx->context_ticket : NULL);
> +

NULL may be a valid "ticket" once converted to unsigned long and looked
up in the ticket list.

>  	if (ret)
>  		goto out;
>  
> @@ -84,6 +93,9 @@ put_pages:
>  	peer_mem->put_pages(umem->peer_mem_client_context,
>  					&umem->sg_head);
>  out:
> +	if (invalidation_ctx)
> +		ib_peer_destroy_invalidation_ctx(ib_peer_mem, invalidation_ctx);
> +end:
>  	ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
>  	kfree(umem);
>  	return ERR_PTR(ret);
> @@ -91,15 +103,19 @@ out:
>  
>  static void peer_umem_release(struct ib_umem *umem)
>  {
> -	const struct peer_memory_client *peer_mem =
> -				umem->ib_peer_mem->peer_mem;
> +	struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
> +	const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> +	struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> +
> +	if (invalidation_ctx)
> +		ib_peer_destroy_invalidation_ctx(ib_peer_mem, invalidation_ctx);
>  
>  	peer_mem->dma_unmap(&umem->sg_head,
>  			    umem->peer_mem_client_context,
>  			    umem->context->device->dma_device);
>  	peer_mem->put_pages(&umem->sg_head,
>  			    umem->peer_mem_client_context);
> -	ib_put_peer_client(umem->ib_peer_mem, umem->peer_mem_client_context);
> +	ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
>  	kfree(umem);
>  }
>  
> @@ -127,6 +143,27 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
>  
>  }
>  
> +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +					   umem_invalidate_func_t func,
> +					   void *cookie)
> +{
> +	struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> +	int ret = 0;
> +
> +	mutex_lock(&umem->ib_peer_mem->lock);
> +	if (invalidation_ctx->peer_invalidated) {
> +		pr_err("ib_umem_activate_invalidation_notifier: pages were invalidated by peer\n");
> +		ret = -EINVAL;
> +		goto end;
> +	}
> +	invalidation_ctx->func = func;
> +	invalidation_ctx->cookie = cookie;
> +	/* from that point any pending invalidations can be called */
> +end:
> +	mutex_unlock(&umem->ib_peer_mem->lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
>  /**
>   * ib_umem_get - Pin and DMA map userspace memory.
>   * @context: userspace context to pin memory for
> @@ -179,11 +216,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
>  	if (peer_mem_flags & IB_PEER_MEM_ALLOW) {
>  		struct ib_peer_memory_client *peer_mem_client;
>  
> -		peer_mem_client =  ib_get_peer_client(context, addr, size,
> +		peer_mem_client =  ib_get_peer_client(context, addr, size, peer_mem_flags,
>  						      &umem->peer_mem_client_context);
>  		if (peer_mem_client)
>  			return peer_umem_get(peer_mem_client, umem, addr,
> -					dmasync);
> +					dmasync, peer_mem_flags);
>  	}
>  
>  	/* We assume the memory is from hugetlb until proved otherwise */
> diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h
> index d3fbb50..8f67aaf 100644
> --- a/include/rdma/ib_peer_mem.h
> +++ b/include/rdma/ib_peer_mem.h
> @@ -22,6 +22,7 @@ struct ib_peer_memory_client {
>  
>  enum ib_peer_mem_flags {
>  	IB_PEER_MEM_ALLOW	= 1,
> +	IB_PEER_MEM_INVAL_SUPP = (1<<1),
>  };
>  
>  struct core_ticket {
> @@ -31,7 +32,8 @@ struct core_ticket {
>  };
>  
>  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
> -						 size_t size, void **peer_client_context);
> +						 size_t size, unsigned long peer_mem_flags,
> +						 void **peer_client_context);
>  
>  void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
>  			void *peer_client_context);
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index 4b8a042..83d6059 100644
> --- a/include/rdma/ib_umem.h
> +++ b/include/rdma/ib_umem.h
> @@ -39,10 +39,21 @@
>  #include <rdma/ib_peer_mem.h>
>  
>  struct ib_ucontext;
> +struct ib_umem;
> +
> +typedef void (*umem_invalidate_func_t)(void *invalidation_cookie,
> +					    struct ib_umem *umem,
> +					    unsigned long addr, size_t size);
>  
>  struct invalidation_ctx {
>  	struct ib_umem *umem;
>  	unsigned long context_ticket;
> +	umem_invalidate_func_t func;
> +	void *cookie;
> +	int peer_callback;
> +	int inflight_invalidation;
> +	int peer_invalidated;
> +	struct completion comp;
>  };
>  
>  struct ib_umem {
> @@ -73,6 +84,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
>  			       unsigned long peer_mem_flags);
>  void ib_umem_release(struct ib_umem *umem);
>  int ib_umem_page_count(struct ib_umem *umem);
> +int  ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +					    umem_invalidate_func_t func,
> +					    void *cookie);
>  
>  #else /* CONFIG_INFINIBAND_USER_MEM */
>  
> @@ -87,6 +101,9 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
>  static inline void ib_umem_release(struct ib_umem *umem) { }
>  static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
>  
> +static inline int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +							 umem_invalidate_func_t func,
> +							 void *cookie) {return 0; }
>  #endif /* CONFIG_INFINIBAND_USER_MEM */
>  
>  #endif /* IB_UMEM_H */


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2014-10-01 16:25 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-01 15:18 [PATCH for-next 0/9] Peer-Direct support Yishai Hadas
     [not found] ` <1412176717-11979-1-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 15:18   ` [PATCH for-next 1/9] IB/core: Introduce peer client interface Yishai Hadas
     [not found]     ` <1412176717-11979-2-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 16:34       ` Bart Van Assche
     [not found]         ` <542C2D23.30508-HInyCGIudOg@public.gmane.org>
2014-10-02 14:37           ` Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 2/9] IB/core: Get/put peer memory client Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 3/9] IB/core: Umem tunneling peer memory APIs Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 4/9] IB/core: Infrastructure to manage peer core context Yishai Hadas
     [not found]     ` <1412176717-11979-5-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 22:24       ` Yann Droneaud
     [not found]         ` <1412202261.28184.0.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2014-10-02 15:02           ` Shachar Raindel
2014-10-01 15:18   ` [PATCH for-next 5/9] IB/core: Invalidation support for peer memory Yishai Hadas
     [not found]     ` <1412176717-11979-6-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 16:25       ` Yann Droneaud [this message]
     [not found]         ` <1412180704.4380.40.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2014-10-02 15:05           ` Shachar Raindel
2014-10-01 15:18   ` [PATCH for-next 6/9] IB/core: Sysfs " Yishai Hadas
     [not found]     ` <1412176717-11979-7-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-02 16:41       ` Jason Gunthorpe
2014-10-01 15:18   ` [PATCH for-next 7/9] IB/mlx4: Invalidation support for MR over " Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 8/9] IB/mlx5: " Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 9/9] Samples: Peer memory client example Yishai Hadas
     [not found]     ` <1412176717-11979-10-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 17:16       ` Hefty, Sean
     [not found]         ` <1828884A29C6694DAF28B7E6B8A8237399DE5096-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-10-02  3:14           ` Jason Gunthorpe
     [not found]             ` <20141002031441.GA10386-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2014-10-02 13:35               ` Shachar Raindel
2014-10-07 16:57           ` Davis, Arlin R
     [not found]             ` <54347E5A035A054EAE9D05927FB467F977CC9244-P5GAC/sN6hmkrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-10-07 17:09               ` Hefty, Sean

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1412180704.4380.40.camel@localhost.localdomain \
    --to=ydroneaud-rly5vtjfyj3qt0dzr+alfa@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox