Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Yonatan Nachum <ynachum@amazon.com>,
	jgg@nvidia.com, leon@kernel.org, linux-rdma@vger.kernel.org,
	"yanjun.zhu@linux.dev" <yanjun.zhu@linux.dev>
Cc: mrgolin@amazon.com, sleybo@amazon.com, matua@amazon.com,
	gal.pressman@linux.dev, Firas Jahjah <firasj@amazon.com>
Subject: Re: [PATCH for-next v3 2/2] RDMA/efa: Add AH cache handling on create and destroy AH
Date: Sun, 7 Jun 2026 22:00:58 -0700	[thread overview]
Message-ID: <1c0463ef-79bf-4e59-9b95-fe5f5f67612a@linux.dev> (raw)
In-Reply-To: <20260607161753.1607559-3-ynachum@amazon.com>

在 2026/6/7 9:17, Yonatan Nachum 写道:
> On create AH, first check if the AH cache entry already exists and if
> so, returns the already stored AH number. If the entry doesn't exist,
> the driver creates it and calls the device to create the AH. A per-entry
> mutex serializes concurrent device commands on the same AH cache entry,
> ensuring only one thread issues the device create while others wait and
> reuse the result. If the device create fails, the entry remains
> uninitialized so subsequent threads calls can create the AH.
> 
> On destroy AH, the refcount is checked and if it's the last reference,
> the driver issues the device destroy command while holding the entry
> mutex. The entry remains in the hashtable during destroy to allow
> concurrent create threads to find it and wait on the entry mutex,
> preventing create-before-destroy races on the device. After the device
> destroy completes, the entry is either recycled if new users arrived or
> removed and freed.
> 
> Reviewed-by: Firas Jahjah <firasj@amazon.com>
> Reviewed-by: Michael Margolin <mrgolin@amazon.com>
> Signed-off-by: Yonatan Nachum <ynachum@amazon.com>
> ---
>   drivers/infiniband/hw/efa/efa_ah_cache.c | 122 +++++++++++++++++++++++
>   drivers/infiniband/hw/efa/efa_ah_cache.h |   5 +
>   drivers/infiniband/hw/efa/efa_com_cmd.c  |  73 ++++++++++----
>   drivers/infiniband/hw/efa/efa_com_cmd.h  |   1 +
>   drivers/infiniband/hw/efa/efa_verbs.c    |   9 +-
>   5 files changed, 187 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/efa/efa_ah_cache.c b/drivers/infiniband/hw/efa/efa_ah_cache.c
> index ab763b06b9bb..b8314cdbf054 100644
> --- a/drivers/infiniband/hw/efa/efa_ah_cache.c
> +++ b/drivers/infiniband/hw/efa/efa_ah_cache.c
> @@ -39,3 +39,125 @@ void efa_ah_cache_destroy(struct efa_ah_cache *ah_cache)
>   	rhashtable_free_and_destroy(&ah_cache->hashtable, efa_ah_cache_entry_free, NULL);
>   	mutex_destroy(&ah_cache->lock);
>   }
> +
> +static struct efa_ah_cache_entry *efa_ah_cache_lookup(struct efa_ah_cache *ah_cache, u16 pd,
> +						      u8 *gid)
> +	__must_hold(&ah_cache->lock)
> +{
> +	struct efa_ah_cache_key key = {};
> +
> +	memcpy(key.gid, gid, sizeof(key.gid));
> +	key.pd = pd;
> +
> +	return rhashtable_lookup_fast(&ah_cache->hashtable, &key, ah_cache_params);
> +}
> +
> +/**
> + * efa_ah_cache_get_or_create - Get or create an AH cache entry
> + * @ah_cache: AH cache
> + * @pd: Protection domain number
> + * @gid: GID address
> + *
> + * Look up an AH cache entry by PD and GID. If found, increment the refcount and
> + * return it. If not found, allocate a new entry and insert it into the
> + * hashtable. The entry is returned unlocked.
> + *
> + * Return: Pointer to the entry on success, ERR_PTR on failure.
> + */
> +struct efa_ah_cache_entry *efa_ah_cache_get_or_create(struct efa_ah_cache *ah_cache, u16 pd,
> +						      u8 *gid)
> +{
> +	struct efa_ah_cache_entry *entry;
> +	int err;
> +
> +	mutex_lock(&ah_cache->lock);
> +
> +	entry = efa_ah_cache_lookup(ah_cache, pd, gid);
> +	if (entry) {
> +		refcount_inc(&entry->refcount);
> +		mutex_unlock(&ah_cache->lock);
> +		return entry;
> +	}
> +
> +	entry = kzalloc(sizeof(*entry), GFP_KERNEL);

kzalloc_obj

Zhu Yanjun

> +	if (!entry) {
> +		mutex_unlock(&ah_cache->lock);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	memcpy(entry->key.gid, gid, sizeof(entry->key.gid));
> +	entry->key.pd = pd;
> +	refcount_set(&entry->refcount, 1);
> +	mutex_init(&entry->lock);
> +
> +	err = rhashtable_insert_fast(&ah_cache->hashtable, &entry->linkage, ah_cache_params);
> +	if (err) {
> +		mutex_destroy(&entry->lock);
> +		kfree(entry);
> +		mutex_unlock(&ah_cache->lock);
> +		return ERR_PTR(err);
> +	}
> +
> +	mutex_unlock(&ah_cache->lock);
> +	return entry;
> +}
> +
> +/**
> + * efa_ah_cache_put_unless_last - Release a reference to an AH cache entry
> + * @ah_cache: AH cache
> + * @pd: Protection domain number
> + * @gid: GID address
> + *
> + * If this is not the last reference, decrement the refcount and return NULL.
> + * If this is the last reference, return the entry with its mutex locked
> + * without decrementing.
> + *
> + * Return: Pointer to the locked entry if last reference, NULL otherwise.
> + */
> +struct efa_ah_cache_entry *efa_ah_cache_put_unless_last(struct efa_ah_cache *ah_cache, u16 pd,
> +							u8 *gid)
> +{
> +	struct efa_ah_cache_entry *entry;
> +
> +	mutex_lock(&ah_cache->lock);
> +	entry = efa_ah_cache_lookup(ah_cache, pd, gid);
> +	if (!entry) {
> +		mutex_unlock(&ah_cache->lock);
> +		return NULL;
> +	}
> +
> +	if (refcount_dec_not_one(&entry->refcount)) {
> +		mutex_unlock(&ah_cache->lock);
> +		return NULL;
> +	}
> +
> +	mutex_lock(&entry->lock);
> +	mutex_unlock(&ah_cache->lock);
> +	return entry;
> +}
> +
> +/**
> + * efa_ah_cache_put - Release the final reference to an AH cache entry
> + * @ah_cache: AH cache
> + * @entry: AH cache entry
> + *
> + * Decrement the refcount. If it reaches zero, the entry is removed from the
> + * hashtable and freed. Otherwise, the entry is kept for reuse.
> + *
> + * Called after the device destroy completes or on a failed create to release
> + * the caller's reference.
> + */
> +void efa_ah_cache_put(struct efa_ah_cache *ah_cache, struct efa_ah_cache_entry *entry)
> +{
> +	mutex_lock(&ah_cache->lock);
> +	if (!refcount_dec_and_test(&entry->refcount)) {
> +		mutex_unlock(&ah_cache->lock);
> +		return;
> +	}
> +
> +	rhashtable_remove_fast(&ah_cache->hashtable, &entry->linkage, ah_cache_params);
> +	mutex_unlock(&ah_cache->lock);
> +
> +	mutex_destroy(&entry->lock);
> +	kfree_rcu(entry, rcu_head);
> +}
> diff --git a/drivers/infiniband/hw/efa/efa_ah_cache.h b/drivers/infiniband/hw/efa/efa_ah_cache.h
> index 133181b4466d..573fd29bb416 100644
> --- a/drivers/infiniband/hw/efa/efa_ah_cache.h
> +++ b/drivers/infiniband/hw/efa/efa_ah_cache.h
> @@ -33,5 +33,10 @@ struct efa_ah_cache {
>   
>   int efa_ah_cache_init(struct efa_ah_cache *ah_cache);
>   void efa_ah_cache_destroy(struct efa_ah_cache *ah_cache);
> +struct efa_ah_cache_entry *efa_ah_cache_get_or_create(struct efa_ah_cache *ah_cache, u16 pd,
> +						      u8 *gid);
> +struct efa_ah_cache_entry *efa_ah_cache_put_unless_last(struct efa_ah_cache *ah_cache, u16 pd,
> +							u8 *gid);
> +void efa_ah_cache_put(struct efa_ah_cache *ah_cache, struct efa_ah_cache_entry *entry);
>   
>   #endif /* _EFA_AH_CACHE_H_ */
> diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
> index 63c7f07806a8..9eafcba5e028 100644
> --- a/drivers/infiniband/hw/efa/efa_com_cmd.c
> +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
> @@ -313,19 +313,25 @@ int efa_com_dereg_mr(struct efa_com_dev *edev,
>   	return 0;
>   }
>   
> -int efa_com_create_ah(struct efa_com_dev *edev,
> -		      struct efa_com_create_ah_params *params,
> -		      struct efa_com_create_ah_result *result)
> +int efa_com_destroy_ah(struct efa_com_dev *edev,
> +		       struct efa_com_destroy_ah_params *params)
>   {
> -	struct efa_admin_create_ah_resp cmd_completion;
> +	struct efa_admin_destroy_ah_resp cmd_completion;
> +	struct efa_admin_destroy_ah_cmd ah_cmd = {};
>   	struct efa_com_admin_queue *aq = &edev->aq;
> -	struct efa_admin_create_ah_cmd ah_cmd = {};
> +	struct efa_ah_cache_entry *entry;
>   	int err;
>   
> -	ah_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_AH;
> +	entry = efa_ah_cache_put_unless_last(&edev->ah_cache, params->pdn, params->gid);
> +	if (!entry)
> +		return 0;
>   
> -	memcpy(ah_cmd.dest_addr, params->dest_addr, sizeof(ah_cmd.dest_addr));
> -	ah_cmd.pd = params->pdn;
> +	if (!entry->initialized)
> +		goto out;
> +
> +	ah_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_AH;
> +	ah_cmd.ah = entry->ah;
> +	ah_cmd.pd = entry->key.pd;
>   
>   	err = efa_com_cmd_exec(aq,
>   			       (struct efa_admin_aq_entry *)&ah_cmd,
> @@ -333,27 +339,47 @@ int efa_com_create_ah(struct efa_com_dev *edev,
>   			       (struct efa_admin_acq_entry *)&cmd_completion,
>   			       sizeof(cmd_completion));
>   	if (err) {
> +		mutex_unlock(&entry->lock);
>   		ibdev_err_ratelimited(edev->efa_dev,
> -				      "Failed to create ah for %pI6 [%d]\n",
> -				      ah_cmd.dest_addr, err);
> +				      "Failed to destroy ah-%d pd-%d [%d]\n",
> +				      ah_cmd.ah, ah_cmd.pd, err);
>   		return err;
>   	}
>   
> -	result->ah = cmd_completion.ah;
> +	entry->initialized = false;
> +
> +out:
> +	mutex_unlock(&entry->lock);
> +	efa_ah_cache_put(&edev->ah_cache, entry);
>   
>   	return 0;
>   }
>   
> -int efa_com_destroy_ah(struct efa_com_dev *edev,
> -		       struct efa_com_destroy_ah_params *params)
> +int efa_com_create_ah(struct efa_com_dev *edev,
> +		      struct efa_com_create_ah_params *params,
> +		      struct efa_com_create_ah_result *result)
>   {
> -	struct efa_admin_destroy_ah_resp cmd_completion;
> -	struct efa_admin_destroy_ah_cmd ah_cmd = {};
> +	struct efa_com_destroy_ah_params destroy_params = {};
> +	struct efa_admin_create_ah_resp cmd_completion;
>   	struct efa_com_admin_queue *aq = &edev->aq;
> +	struct efa_admin_create_ah_cmd ah_cmd = {};
> +	struct efa_ah_cache_entry *entry;
>   	int err;
>   
> -	ah_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_AH;
> -	ah_cmd.ah = params->ah;
> +	entry = efa_ah_cache_get_or_create(&edev->ah_cache, params->pdn, params->dest_addr);
> +	if (IS_ERR(entry))
> +		return PTR_ERR(entry);
> +
> +	mutex_lock(&entry->lock);
> +	if (entry->initialized) {
> +		result->ah = entry->ah;
> +		mutex_unlock(&entry->lock);
> +		return 0;
> +	}
> +
> +	ah_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_AH;
> +
> +	memcpy(ah_cmd.dest_addr, params->dest_addr, sizeof(ah_cmd.dest_addr));
>   	ah_cmd.pd = params->pdn;
>   
>   	err = efa_com_cmd_exec(aq,
> @@ -362,12 +388,21 @@ int efa_com_destroy_ah(struct efa_com_dev *edev,
>   			       (struct efa_admin_acq_entry *)&cmd_completion,
>   			       sizeof(cmd_completion));
>   	if (err) {
> +		mutex_unlock(&entry->lock);
> +		memcpy(destroy_params.gid, params->dest_addr, sizeof(destroy_params.gid));
> +		destroy_params.pdn = params->pdn;
> +		efa_com_destroy_ah(edev, &destroy_params);
>   		ibdev_err_ratelimited(edev->efa_dev,
> -				      "Failed to destroy ah-%d pd-%d [%d]\n",
> -				      ah_cmd.ah, ah_cmd.pd, err);
> +				      "Failed to create ah for %pI6 [%d]\n",
> +				      ah_cmd.dest_addr, err);
>   		return err;
>   	}
>   
> +	entry->ah = cmd_completion.ah;
> +	entry->initialized = true;
> +	result->ah = cmd_completion.ah;
> +	mutex_unlock(&entry->lock);
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
> index ef15b3c38429..39bd4e06684a 100644
> --- a/drivers/infiniband/hw/efa/efa_com_cmd.h
> +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
> @@ -106,6 +106,7 @@ struct efa_com_create_ah_result {
>   
>   struct efa_com_destroy_ah_params {
>   	u16 ah;
> +	u8 gid[EFA_GID_SIZE];
>   	u16 pdn;
>   };
>   
> diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
> index 434d60235945..6742a4037888 100644
> --- a/drivers/infiniband/hw/efa/efa_verbs.c
> +++ b/drivers/infiniband/hw/efa/efa_verbs.c
> @@ -2032,10 +2032,11 @@ int efa_mmap(struct ib_ucontext *ibucontext,
>   
>   static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
>   {
> -	struct efa_com_destroy_ah_params params = {
> -		.ah = ah->ah,
> -		.pdn = to_epd(ah->ibah.pd)->pdn,
> -	};
> +	struct efa_com_destroy_ah_params params = {};
> +
> +	params.ah = ah->ah;
> +	memcpy(params.gid, ah->id, sizeof(params.gid));
> +	params.pdn = to_epd(ah->ibah.pd)->pdn;
>   
>   	return efa_com_destroy_ah(&dev->edev, &params);
>   }


      reply	other threads:[~2026-06-08  5:01 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-07 16:17 [PATCH for-next v3 0/2] RDMA/efa: Add AH cache for AH reuse Yonatan Nachum
2026-06-07 16:17 ` [PATCH for-next v3 1/2] RDMA/efa: Add initialization of AH cache rhashtable Yonatan Nachum
2026-06-07 16:17 ` [PATCH for-next v3 2/2] RDMA/efa: Add AH cache handling on create and destroy AH Yonatan Nachum
2026-06-08  5:00   ` Zhu Yanjun [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1c0463ef-79bf-4e59-9b95-fe5f5f67612a@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=firasj@amazon.com \
    --cc=gal.pressman@linux.dev \
    --cc=jgg@nvidia.com \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=matua@amazon.com \
    --cc=mrgolin@amazon.com \
    --cc=sleybo@amazon.com \
    --cc=ynachum@amazon.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox