Re: [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Jason Gunthorpe <jgg@nvidia.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: Aharon Landau <aharonl@nvidia.com>,
	linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: Re: [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree
Date: Wed, 8 Dec 2021 16:58:30 -0400	[thread overview]
Message-ID: <20211208205830.GH6385@nvidia.com> (raw)
In-Reply-To: <3afa3e9b25bb95894e65144c3fbbc5be456d7c16.1638781506.git.leonro@nvidia.com>

On Mon, Dec 06, 2021 at 11:10:49AM +0200, Leon Romanovsky wrote:

> +static struct mlx5_cache_ent *ent_search(struct mlx5_mr_cache *cache, void *mkc)

mlx5_cache_ent_find() ?

This search isn't really what I would expect, it should stop if it
finds a (somewhat) larger and otherwise compatible entry instead of
continuing to bin on power of two sizes.

> +	struct rb_node *node = cache->cache_root.rb_node;
> +	int size = MLX5_ST_SZ_BYTES(mkc);

size_t not int

> +	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> +	if (dev->cache.maintained_cache && !force) {
> +		int order;

unsigned int

> +		/*
> +		 * Try to get an mkey from pool.
> +		 */
> +		order = order_base_2(ndescs) > 2 ? order_base_2(ndescs) : 2;
> +		MLX5_SET(mkc, mkc, translations_octword_size,
> +			 get_mkc_octo_size(access_mode, 1 << order));
> +		mutex_lock(&dev->cache.cache_lock);
> +		ent = ent_search(&dev->cache, mkc);
> +		mutex_unlock(&dev->cache.cache_lock);

Why is it OK to drop the lock here? What is protecting the ent pointer
against free?

> +	if (ent && (ent->limit || force)) {

What locking protects ent->limit ?

> +		xa_lock_irq(&ent->mkeys);
> +		if (!ent->stored) {
> +			if (ent->limit) {
> +				queue_adjust_cache_locked(ent);
> +				ent->miss++;
> +			}
> +			xa_unlock_irq(&ent->mkeys);
> +
> +			err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> +			if (err)
> +				goto err;

So if there is no entry we create a bigger one rounded up to pow2?

> +
> +			WRITE_ONCE(ent->dev->cache.last_add, jiffies);
> +			xa_lock_irq(&ent->mkeys);
> +			ent->total_mrs++;
> +			xa_unlock_irq(&ent->mkeys);

May as well optimize for the normal case, do the total_mrs++ before
create_mkey while we still have the lock and undo it if it fails. It
is just minor stat reporting right?

> +		} else {
> +			xa_ent = __xa_store(&ent->mkeys, --ent->stored,
> +					  NULL, GFP_KERNEL);
> +			WARN_ON(xa_ent == NULL || xa_is_err(xa_ent));
> +			WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) !=
> +				NULL);

This whole bigger block want to be in its own function 'mlx5_ent_get_mkey()'

Then you can write it simpler without all the duplicated error
handling and deep indenting

>  			queue_adjust_cache_locked(ent);
> -			ent->miss++;
> -		}
> -		xa_unlock_irq(&ent->mkeys);
> -		err = create_cache_mkey(ent, &mr->mmkey.key);
> -		if (err) {
> -			kfree(mr);
> -			return ERR_PTR(err);
> +			xa_unlock_irq(&ent->mkeys);
> +			mr->mmkey.key = (u32)xa_to_value(xa_ent);
>  		}
> +		mr->cache_ent = ent;
>  	} else {
> -		mr = __xa_store(&ent->mkeys, --ent->stored, NULL,
> -				GFP_KERNEL);
> -		WARN_ON(mr == NULL || xa_is_err(mr));
> -		WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) != NULL);
> -		queue_adjust_cache_locked(ent);
> -		xa_unlock_irq(&ent->mkeys);
> -
> -		mr->mmkey.key = (u32)xa_to_value(xa_ent);
> +		/*
> +		 * Can not use a cache mkey.
> +		 * Create an mkey with the exact needed size.
> +		 */
> +		MLX5_SET(mkc, mkc, translations_octword_size,
> +			 get_mkc_octo_size(access_mode, ndescs));
> +		err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> +		if (err)
> +			goto err;
>  	}

I think this needs to be broken to functions, it is hard to read with
all these ifs and inlined locking

>  
> +static int ent_insert(struct mlx5_mr_cache *cache, struct mlx5_cache_ent *ent)
> +{
> +	struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL;
> +	int size = MLX5_ST_SZ_BYTES(mkc);
> +	struct mlx5_cache_ent *cur;
> +	int cmp;
> +
> +	/* Figure out where to put new node */
> +	while (*new) {
> +		cur = container_of(*new, struct mlx5_cache_ent, node);
> +		parent = *new;
> +		cmp = memcmp(ent->mkc, cur->mkc, size);
> +		if (cmp < 0)
> +			new = &((*new)->rb_left);
> +		else if (cmp > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			return -EEXIST;
> +	}
> +
> +	/* Add new node and rebalance tree. */
> +	rb_link_node(&ent->node, parent, new);
> +	rb_insert_color(&ent->node, &cache->cache_root);
> +
> +	return 0;
> +}

This should be near the find

> +
> +static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev,
> +						       int order)
> +{
> +	struct mlx5_cache_ent *ent;
> +	int ret;
> +
> +	ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> +	if (!ent)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ent->mkc = kzalloc(MLX5_ST_SZ_BYTES(mkc), GFP_KERNEL);
> +	if (!ent->mkc) {
> +		kfree(ent);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	ent->ndescs = 1 << order;
> +	mlx5_set_cache_mkc(dev, ent->mkc, 0, MLX5_MKC_ACCESS_MODE_MTT,
> +			   PAGE_SHIFT);
> +	MLX5_SET(mkc, ent->mkc, translations_octword_size,
> +		 get_mkc_octo_size(MLX5_MKC_ACCESS_MODE_MTT, ent->ndescs));
> +	mutex_lock(&dev->cache.cache_lock);
> +	ret = ent_insert(&dev->cache, ent);
> +	mutex_unlock(&dev->cache.cache_lock);
> +	if (ret) {
> +		kfree(ent->mkc);
> +		kfree(ent);
> +		return ERR_PTR(ret);
> +	}
> +
> +	xa_init_flags(&ent->mkeys, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
> +	ent->dev = dev;
> +
> +	INIT_WORK(&ent->work, cache_work_func);
> +	INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);

And the ent should be fully setup before adding it to the cache

Jason

next prev parent reply	other threads:[~2021-12-08 20:58 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1638781506.git.leonro@nvidia.com>
2021-12-06  9:10 ` [PATCH rdma-next 1/7] RDMA/mlx5: Merge similar flows of allocating MR from the cache Leon Romanovsky
2021-12-06  9:10 ` [PATCH rdma-next 2/7] RDMA/mlx5: Replace cache list with Xarray Leon Romanovsky
2021-12-08 16:46   ` Jason Gunthorpe
2021-12-09  8:03     ` Leon Romanovsky
2021-12-09  8:21     ` Aharon Landau
2021-12-09 17:56       ` Jason Gunthorpe
2021-12-06  9:10 ` [PATCH rdma-next 3/7] RDMA/mlx5: Store in the cache mkeys instead of mrs Leon Romanovsky
2021-12-08 20:44   ` Jason Gunthorpe
2021-12-06  9:10 ` [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree Leon Romanovsky
2021-12-08 20:58   ` Jason Gunthorpe [this message]
2021-12-06  9:10 ` [PATCH rdma-next 5/7] RDMA/mlx5: Reorder calls to pcie_relaxed_ordering_enabled() Leon Romanovsky
2021-12-06  9:10 ` [PATCH rdma-next 6/7] RDMA/mlx5: Delay the deregistration of a non-cache mkey Leon Romanovsky
2021-12-06  9:10 ` [PATCH rdma-next 7/7] RDMA/mlx5: Rename the mkey cache variables and functions Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211208205830.GH6385@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=aharonl@nvidia.com \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox