All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yann Droneaud <ydroneaud-RlY5vtjFyJ3QT0dZR+AlfA@public.gmane.org>
To: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org
Subject: Re: [PATCH for-next 5/9] IB/core: Invalidation support for peer memory
Date: Wed, 01 Oct 2014 18:25:04 +0200	[thread overview]
Message-ID: <1412180704.4380.40.camel@localhost.localdomain> (raw)
In-Reply-To: <1412176717-11979-6-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Le mercredi 01 octobre 2014 à 18:18 +0300, Yishai Hadas a écrit :
> Adds the required functionality to invalidate a given peer
> memory represented by some core context.
> 
> Each umem that was built over peer memory and supports invalidation has
> some invalidation context assigned to it with the required data to
> manage, once peer will call the invalidation callback below actions are
> taken:
> 
> 1) Taking lock on peer client to sync with inflight dereg_mr on that
> memory.
> 2) Once lock is taken have a lookup for ticket id to find the matching
> core context.
> 3) In case found will call umem invalidation function, otherwise call is
> returned.
> 
> Some notes:
> 1) As peer invalidate callback defined to be blocking it must return
> just after that pages are not going to be accessed any more. For that
> reason ib_invalidate_peer_memory is waiting for a completion event in
> case there is other inflight call coming as part of dereg_mr.
> 
> 2) The peer memory API assumes that a lock might be taken by a peer
> client to protect its memory operations. Specifically, its invalidate
> callback might be called under that lock which may lead to an AB/BA
> dead-lock in case IB core will call get/put pages APIs with the IB core peer's lock taken,
> for that reason as part of  ib_umem_activate_invalidation_notifier lock is taken
> then checking for some inflight invalidation state before activating it.
> 
> 3) Once a peer client admits as part of its registration that it may
> require invalidation support, it can't be an owner of a memory range
> which doesn't support it.
> 
> Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Signed-off-by: Shachar Raindel <raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/infiniband/core/peer_mem.c |   86 +++++++++++++++++++++++++++++++++---
>  drivers/infiniband/core/umem.c     |   51 ++++++++++++++++++---
>  include/rdma/ib_peer_mem.h         |    4 +-
>  include/rdma/ib_umem.h             |   17 +++++++
>  4 files changed, 143 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/infiniband/core/peer_mem.c b/drivers/infiniband/core/peer_mem.c
> index ad10672..d6bd192 100644
> --- a/drivers/infiniband/core/peer_mem.c
> +++ b/drivers/infiniband/core/peer_mem.c
> @@ -38,10 +38,57 @@ static DEFINE_MUTEX(peer_memory_mutex);
>  static LIST_HEAD(peer_memory_list);
>  static int num_registered_peers;
>  
> -static int ib_invalidate_peer_memory(void *reg_handle, void *core_context)
> +/* Caller should be holding the peer client lock, ib_peer_client->lock */
> +static struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client,
> +						  unsigned long key)
> +{
> +	struct core_ticket *core_ticket;
> +
> +	list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list,
> +			    ticket_list) {
> +		if (core_ticket->key == key)
> +			return core_ticket;
> +	}
>  
> +	return NULL;
> +}
> +

You have now two functions to lookup key in ticket list:
see peer_ticket_exists().

> +static int ib_invalidate_peer_memory(void *reg_handle, void *core_context)
>  {
> -	return -ENOSYS;
> +	struct ib_peer_memory_client *ib_peer_client =
> +		(struct ib_peer_memory_client *)reg_handle;
> +	struct invalidation_ctx *invalidation_ctx;
> +	struct core_ticket *core_ticket;
> +	int need_unlock = 1;
> +
> +	mutex_lock(&ib_peer_client->lock);
> +	core_ticket = ib_peer_search_context(ib_peer_client,
> +					     (unsigned long)core_context);
> +	if (!core_ticket)
> +		goto out;
> +
> +	invalidation_ctx = (struct invalidation_ctx *)core_ticket->context;
> +	/* If context is not ready yet, mark it to be invalidated */
> +	if (!invalidation_ctx->func) {
> +		invalidation_ctx->peer_invalidated = 1;
> +		goto out;
> +	}
> +	invalidation_ctx->func(invalidation_ctx->cookie,
> +					invalidation_ctx->umem, 0, 0);
> +	if (invalidation_ctx->inflight_invalidation) {
> +		/* init the completion to wait on before letting other thread to run */
> +		init_completion(&invalidation_ctx->comp);
> +		mutex_unlock(&ib_peer_client->lock);
> +		need_unlock = 0;
> +		wait_for_completion(&invalidation_ctx->comp);
> +	}
> +
> +	kfree(invalidation_ctx);
> +out:
> +	if (need_unlock)
> +		mutex_unlock(&ib_peer_client->lock);
> +
> +	return 0;
>  }
>  
>  static int peer_ticket_exists(struct ib_peer_memory_client *ib_peer_client,
> @@ -168,11 +215,30 @@ int ib_peer_create_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, s
>  void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem,
>  				      struct invalidation_ctx *invalidation_ctx)
>  {
> -	mutex_lock(&ib_peer_mem->lock);
> -	ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
> -	mutex_unlock(&ib_peer_mem->lock);
> +	int peer_callback;
> +	int inflight_invalidation;
>  
> -	kfree(invalidation_ctx);
> +	/* If we are under peer callback lock was already taken.*/
> +	if (!invalidation_ctx->peer_callback)
> +		mutex_lock(&ib_peer_mem->lock);
> +	ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
> +	/* make sure to check inflight flag after took the lock and remove from tree.
> +	 * in addition, from that point using local variables for peer_callback and
> +	 * inflight_invalidation as after the complete invalidation_ctx can't be accessed
> +	 * any more as it may be freed by the callback.
> +	 */
> +	peer_callback = invalidation_ctx->peer_callback;
> +	inflight_invalidation = invalidation_ctx->inflight_invalidation;
> +	if (inflight_invalidation)
> +		complete(&invalidation_ctx->comp);
> +
> +	/* On peer callback lock is handled externally */
> +	if (!peer_callback)
> +		mutex_unlock(&ib_peer_mem->lock);
> +
> +	/* in case under callback context or callback is pending let it free the invalidation context */
> +	if (!peer_callback && !inflight_invalidation)
> +		kfree(invalidation_ctx);
>  }
>  static int ib_memory_peer_check_mandatory(const struct peer_memory_client
>  						     *peer_client)
> @@ -261,13 +327,19 @@ void ib_unregister_peer_memory_client(void *reg_handle)
>  EXPORT_SYMBOL(ib_unregister_peer_memory_client);
>  
>  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
> -						 size_t size, void **peer_client_context)
> +						 size_t size, unsigned long peer_mem_flags,
> +						 void **peer_client_context)
>  {
>  	struct ib_peer_memory_client *ib_peer_client;
>  	int ret;
>  
>  	mutex_lock(&peer_memory_mutex);
>  	list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
> +		/* In case peer requires invalidation it can't own memory which doesn't support it */
> +		if (ib_peer_client->invalidation_required &&
> +		    (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP)))
> +			continue;
> +
>  		ret = ib_peer_client->peer_mem->acquire(addr, size,
>  						   context->peer_mem_private_data,
>  						   context->peer_mem_name,
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index 0de9916..51f32a1 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -44,12 +44,19 @@
>  
>  static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
>  				     struct ib_umem *umem, unsigned long addr,
> -				     int dmasync)
> +				     int dmasync, unsigned long peer_mem_flags)
>  {
>  	int ret;
>  	const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> +	struct invalidation_ctx *invalidation_ctx = NULL;
>  
>  	umem->ib_peer_mem = ib_peer_mem;
> +	if (peer_mem_flags & IB_PEER_MEM_INVAL_SUPP) {
> +		ret = ib_peer_create_invalidation_ctx(ib_peer_mem, umem, &invalidation_ctx);
> +		if (ret)
> +			goto end;
> +	}
> +
>  	/*
>  	 * We always request write permissions to the pages, to force breaking of any CoW
>  	 * during the registration of the MR. For read-only MRs we use the "force" flag to
> @@ -60,7 +67,9 @@ static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
>  				  1, !umem->writable,
>  				  &umem->sg_head,
>  				  umem->peer_mem_client_context,
> -				  NULL);
> +				  invalidation_ctx ?
> +				  (void *)invalidation_ctx->context_ticket : NULL);
> +

NULL may be a valid "ticket" once converted to unsigned long and looked
up in the ticket list.

>  	if (ret)
>  		goto out;
>  
> @@ -84,6 +93,9 @@ put_pages:
>  	peer_mem->put_pages(umem->peer_mem_client_context,
>  					&umem->sg_head);
>  out:
> +	if (invalidation_ctx)
> +		ib_peer_destroy_invalidation_ctx(ib_peer_mem, invalidation_ctx);
> +end:
>  	ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
>  	kfree(umem);
>  	return ERR_PTR(ret);
> @@ -91,15 +103,19 @@ out:
>  
>  static void peer_umem_release(struct ib_umem *umem)
>  {
> -	const struct peer_memory_client *peer_mem =
> -				umem->ib_peer_mem->peer_mem;
> +	struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
> +	const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> +	struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> +
> +	if (invalidation_ctx)
> +		ib_peer_destroy_invalidation_ctx(ib_peer_mem, invalidation_ctx);
>  
>  	peer_mem->dma_unmap(&umem->sg_head,
>  			    umem->peer_mem_client_context,
>  			    umem->context->device->dma_device);
>  	peer_mem->put_pages(&umem->sg_head,
>  			    umem->peer_mem_client_context);
> -	ib_put_peer_client(umem->ib_peer_mem, umem->peer_mem_client_context);
> +	ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
>  	kfree(umem);
>  }
>  
> @@ -127,6 +143,27 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
>  
>  }
>  
> +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +					   umem_invalidate_func_t func,
> +					   void *cookie)
> +{
> +	struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> +	int ret = 0;
> +
> +	mutex_lock(&umem->ib_peer_mem->lock);
> +	if (invalidation_ctx->peer_invalidated) {
> +		pr_err("ib_umem_activate_invalidation_notifier: pages were invalidated by peer\n");
> +		ret = -EINVAL;
> +		goto end;
> +	}
> +	invalidation_ctx->func = func;
> +	invalidation_ctx->cookie = cookie;
> +	/* from that point any pending invalidations can be called */
> +end:
> +	mutex_unlock(&umem->ib_peer_mem->lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
>  /**
>   * ib_umem_get - Pin and DMA map userspace memory.
>   * @context: userspace context to pin memory for
> @@ -179,11 +216,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
>  	if (peer_mem_flags & IB_PEER_MEM_ALLOW) {
>  		struct ib_peer_memory_client *peer_mem_client;
>  
> -		peer_mem_client =  ib_get_peer_client(context, addr, size,
> +		peer_mem_client =  ib_get_peer_client(context, addr, size, peer_mem_flags,
>  						      &umem->peer_mem_client_context);
>  		if (peer_mem_client)
>  			return peer_umem_get(peer_mem_client, umem, addr,
> -					dmasync);
> +					dmasync, peer_mem_flags);
>  	}
>  
>  	/* We assume the memory is from hugetlb until proved otherwise */
> diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h
> index d3fbb50..8f67aaf 100644
> --- a/include/rdma/ib_peer_mem.h
> +++ b/include/rdma/ib_peer_mem.h
> @@ -22,6 +22,7 @@ struct ib_peer_memory_client {
>  
>  enum ib_peer_mem_flags {
>  	IB_PEER_MEM_ALLOW	= 1,
> +	IB_PEER_MEM_INVAL_SUPP = (1<<1),
>  };
>  
>  struct core_ticket {
> @@ -31,7 +32,8 @@ struct core_ticket {
>  };
>  
>  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
> -						 size_t size, void **peer_client_context);
> +						 size_t size, unsigned long peer_mem_flags,
> +						 void **peer_client_context);
>  
>  void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
>  			void *peer_client_context);
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index 4b8a042..83d6059 100644
> --- a/include/rdma/ib_umem.h
> +++ b/include/rdma/ib_umem.h
> @@ -39,10 +39,21 @@
>  #include <rdma/ib_peer_mem.h>
>  
>  struct ib_ucontext;
> +struct ib_umem;
> +
> +typedef void (*umem_invalidate_func_t)(void *invalidation_cookie,
> +					    struct ib_umem *umem,
> +					    unsigned long addr, size_t size);
>  
>  struct invalidation_ctx {
>  	struct ib_umem *umem;
>  	unsigned long context_ticket;
> +	umem_invalidate_func_t func;
> +	void *cookie;
> +	int peer_callback;
> +	int inflight_invalidation;
> +	int peer_invalidated;
> +	struct completion comp;
>  };
>  
>  struct ib_umem {
> @@ -73,6 +84,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
>  			       unsigned long peer_mem_flags);
>  void ib_umem_release(struct ib_umem *umem);
>  int ib_umem_page_count(struct ib_umem *umem);
> +int  ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +					    umem_invalidate_func_t func,
> +					    void *cookie);
>  
>  #else /* CONFIG_INFINIBAND_USER_MEM */
>  
> @@ -87,6 +101,9 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
>  static inline void ib_umem_release(struct ib_umem *umem) { }
>  static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
>  
> +static inline int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +							 umem_invalidate_func_t func,
> +							 void *cookie) {return 0; }
>  #endif /* CONFIG_INFINIBAND_USER_MEM */
>  
>  #endif /* IB_UMEM_H */


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2014-10-01 16:25 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-01 15:18 [PATCH for-next 0/9] Peer-Direct support Yishai Hadas
     [not found] ` <1412176717-11979-1-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 15:18   ` [PATCH for-next 1/9] IB/core: Introduce peer client interface Yishai Hadas
     [not found]     ` <1412176717-11979-2-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 16:34       ` Bart Van Assche
     [not found]         ` <542C2D23.30508-HInyCGIudOg@public.gmane.org>
2014-10-02 14:37           ` Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 2/9] IB/core: Get/put peer memory client Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 3/9] IB/core: Umem tunneling peer memory APIs Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 4/9] IB/core: Infrastructure to manage peer core context Yishai Hadas
     [not found]     ` <1412176717-11979-5-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 22:24       ` Yann Droneaud
     [not found]         ` <1412202261.28184.0.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2014-10-02 15:02           ` Shachar Raindel
2014-10-01 15:18   ` [PATCH for-next 5/9] IB/core: Invalidation support for peer memory Yishai Hadas
     [not found]     ` <1412176717-11979-6-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 16:25       ` Yann Droneaud [this message]
     [not found]         ` <1412180704.4380.40.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2014-10-02 15:05           ` Shachar Raindel
2014-10-01 15:18   ` [PATCH for-next 6/9] IB/core: Sysfs " Yishai Hadas
     [not found]     ` <1412176717-11979-7-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-02 16:41       ` Jason Gunthorpe
2014-10-01 15:18   ` [PATCH for-next 7/9] IB/mlx4: Invalidation support for MR over " Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 8/9] IB/mlx5: " Yishai Hadas
2014-10-01 15:18   ` [PATCH for-next 9/9] Samples: Peer memory client example Yishai Hadas
     [not found]     ` <1412176717-11979-10-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-10-01 17:16       ` Hefty, Sean
     [not found]         ` <1828884A29C6694DAF28B7E6B8A8237399DE5096-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-10-02  3:14           ` Jason Gunthorpe
     [not found]             ` <20141002031441.GA10386-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2014-10-02 13:35               ` Shachar Raindel
2014-10-07 16:57           ` Davis, Arlin R
     [not found]             ` <54347E5A035A054EAE9D05927FB467F977CC9244-P5GAC/sN6hmkrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-10-07 17:09               ` Hefty, Sean

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1412180704.4380.40.camel@localhost.localdomain \
    --to=ydroneaud-rly5vtjfyj3qt0dzr+alfa@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.