Re: [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Jason Gunthorpe <jgg@nvidia.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: Aharon Landau <aharonl@nvidia.com>,
	linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: Re: [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree
Date: Wed, 8 Dec 2021 16:58:30 -0400	[thread overview]
Message-ID: <20211208205830.GH6385@nvidia.com> (raw)
In-Reply-To: <3afa3e9b25bb95894e65144c3fbbc5be456d7c16.1638781506.git.leonro@nvidia.com>

On Mon, Dec 06, 2021 at 11:10:49AM +0200, Leon Romanovsky wrote:

> +static struct mlx5_cache_ent *ent_search(struct mlx5_mr_cache *cache, void *mkc)

mlx5_cache_ent_find() ?

This search isn't really what I would expect, it should stop if it
finds a (somewhat) larger and otherwise compatible entry instead of
continuing to bin on power of two sizes.

> +	struct rb_node *node = cache->cache_root.rb_node;
> +	int size = MLX5_ST_SZ_BYTES(mkc);

size_t not int

> +	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> +	if (dev->cache.maintained_cache && !force) {
> +		int order;

unsigned int

> +		/*
> +		 * Try to get an mkey from pool.
> +		 */
> +		order = order_base_2(ndescs) > 2 ? order_base_2(ndescs) : 2;
> +		MLX5_SET(mkc, mkc, translations_octword_size,
> +			 get_mkc_octo_size(access_mode, 1 << order));
> +		mutex_lock(&dev->cache.cache_lock);
> +		ent = ent_search(&dev->cache, mkc);
> +		mutex_unlock(&dev->cache.cache_lock);

Why is it OK to drop the lock here? What is protecting the ent pointer
against free?

> +	if (ent && (ent->limit || force)) {

What locking protects ent->limit ?

> +		xa_lock_irq(&ent->mkeys);
> +		if (!ent->stored) {
> +			if (ent->limit) {
> +				queue_adjust_cache_locked(ent);
> +				ent->miss++;
> +			}
> +			xa_unlock_irq(&ent->mkeys);
> +
> +			err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> +			if (err)
> +				goto err;

So if there is no entry we create a bigger one rounded up to pow2?

> +
> +			WRITE_ONCE(ent->dev->cache.last_add, jiffies);
> +			xa_lock_irq(&ent->mkeys);
> +			ent->total_mrs++;
> +			xa_unlock_irq(&ent->mkeys);

May as well optimize for the normal case, do the total_mrs++ before
create_mkey while we still have the lock and undo it if it fails. It
is just minor stat reporting right?

> +		} else {
> +			xa_ent = __xa_store(&ent->mkeys, --ent->stored,
> +					  NULL, GFP_KERNEL);
> +			WARN_ON(xa_ent == NULL || xa_is_err(xa_ent));
> +			WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) !=
> +				NULL);

This whole bigger block want to be in its own function 'mlx5_ent_get_mkey()'

Then you can write it simpler without all the duplicated error
handling and deep indenting

>  			queue_adjust_cache_locked(ent);
> -			ent->miss++;
> -		}
> -		xa_unlock_irq(&ent->mkeys);
> -		err = create_cache_mkey(ent, &mr->mmkey.key);
> -		if (err) {
> -			kfree(mr);
> -			return ERR_PTR(err);
> +			xa_unlock_irq(&ent->mkeys);
> +			mr->mmkey.key = (u32)xa_to_value(xa_ent);
>  		}
> +		mr->cache_ent = ent;
>  	} else {
> -		mr = __xa_store(&ent->mkeys, --ent->stored, NULL,
> -				GFP_KERNEL);
> -		WARN_ON(mr == NULL || xa_is_err(mr));
> -		WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) != NULL);
> -		queue_adjust_cache_locked(ent);
> -		xa_unlock_irq(&ent->mkeys);
> -
> -		mr->mmkey.key = (u32)xa_to_value(xa_ent);
> +		/*
> +		 * Can not use a cache mkey.
> +		 * Create an mkey with the exact needed size.
> +		 */
> +		MLX5_SET(mkc, mkc, translations_octword_size,
> +			 get_mkc_octo_size(access_mode, ndescs));
> +		err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> +		if (err)
> +			goto err;
>  	}

I think this needs to be broken to functions, it is hard to read with
all these ifs and inlined locking

>  
> +static int ent_insert(struct mlx5_mr_cache *cache, struct mlx5_cache_ent *ent)
> +{
> +	struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL;
> +	int size = MLX5_ST_SZ_BYTES(mkc);
> +	struct mlx5_cache_ent *cur;
> +	int cmp;
> +
> +	/* Figure out where to put new node */
> +	while (*new) {
> +		cur = container_of(*new, struct mlx5_cache_ent, node);
> +		parent = *new;
> +		cmp = memcmp(ent->mkc, cur->mkc, size);
> +		if (cmp < 0)
> +			new = &((*new)->rb_left);
> +		else if (cmp > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			return -EEXIST;
> +	}
> +
> +	/* Add new node and rebalance tree. */
> +	rb_link_node(&ent->node, parent, new);
> +	rb_insert_color(&ent->node, &cache->cache_root);
> +
> +	return 0;
> +}

This should be near the find

> +
> +static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev,
> +						       int order)
> +{
> +	struct mlx5_cache_ent *ent;
> +	int ret;
> +
> +	ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> +	if (!ent)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ent->mkc = kzalloc(MLX5_ST_SZ_BYTES(mkc), GFP_KERNEL);
> +	if (!ent->mkc) {
> +		kfree(ent);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	ent->ndescs = 1 << order;
> +	mlx5_set_cache_mkc(dev, ent->mkc, 0, MLX5_MKC_ACCESS_MODE_MTT,
> +			   PAGE_SHIFT);
> +	MLX5_SET(mkc, ent->mkc, translations_octword_size,
> +		 get_mkc_octo_size(MLX5_MKC_ACCESS_MODE_MTT, ent->ndescs));
> +	mutex_lock(&dev->cache.cache_lock);
> +	ret = ent_insert(&dev->cache, ent);
> +	mutex_unlock(&dev->cache.cache_lock);
> +	if (ret) {
> +		kfree(ent->mkc);
> +		kfree(ent);
> +		return ERR_PTR(ret);
> +	}
> +
> +	xa_init_flags(&ent->mkeys, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
> +	ent->dev = dev;
> +
> +	INIT_WORK(&ent->work, cache_work_func);
> +	INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);

And the ent should be fully setup before adding it to the cache

Jason

next prev parent reply	other threads:[~2021-12-08 20:58 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-06  9:10 [PATCH rdma-next 0/7] MR cache enhancement Leon Romanovsky
2021-12-06  9:10 ` [PATCH rdma-next 1/7] RDMA/mlx5: Merge similar flows of allocating MR from the cache Leon Romanovsky
2021-12-06  9:10 ` [PATCH rdma-next 2/7] RDMA/mlx5: Replace cache list with Xarray Leon Romanovsky
2021-12-08 16:46   ` Jason Gunthorpe
2021-12-09  8:03     ` Leon Romanovsky
2021-12-09  8:21     ` Aharon Landau
2021-12-09 17:56       ` Jason Gunthorpe
2021-12-06  9:10 ` [PATCH rdma-next 3/7] RDMA/mlx5: Store in the cache mkeys instead of mrs Leon Romanovsky
2021-12-08 20:44   ` Jason Gunthorpe
2021-12-06  9:10 ` [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree Leon Romanovsky
2021-12-08 20:58   ` Jason Gunthorpe [this message]
2021-12-06  9:10 ` [PATCH rdma-next 5/7] RDMA/mlx5: Reorder calls to pcie_relaxed_ordering_enabled() Leon Romanovsky
2021-12-06  9:10 ` [PATCH rdma-next 6/7] RDMA/mlx5: Delay the deregistration of a non-cache mkey Leon Romanovsky
2021-12-06  9:10 ` [PATCH rdma-next 7/7] RDMA/mlx5: Rename the mkey cache variables and functions Leon Romanovsky
2021-12-09  8:40 ` [PATCH rdma-next 0/7] MR cache enhancement Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211208205830.GH6385@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=aharonl@nvidia.com \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.