From: Jason Gunthorpe <jgg@nvidia.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: Aharon Landau <aharonl@nvidia.com>,
linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: Re: [PATCH rdma-next 2/7] RDMA/mlx5: Replace cache list with Xarray
Date: Wed, 8 Dec 2021 12:46:11 -0400 [thread overview]
Message-ID: <20211208164611.GB6385@nvidia.com> (raw)
In-Reply-To: <63a833106bcb03298489a80e88b1086684c76595.1638781506.git.leonro@nvidia.com>
> @@ -166,14 +169,14 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
>
> WRITE_ONCE(dev->cache.last_add, jiffies);
>
> - spin_lock_irqsave(&ent->lock, flags);
> - list_add_tail(&mr->list, &ent->head);
> - ent->available_mrs++;
> + xa_lock_irqsave(&ent->mkeys, flags);
> + xa_ent = __xa_store(&ent->mkeys, ent->stored++, mr, GFP_ATOMIC);
> + WARN_ON(xa_ent != NULL);
> + ent->pending--;
> ent->total_mrs++;
> /* If we are doing fill_to_high_water then keep going. */
> queue_adjust_cache_locked(ent);
> - ent->pending--;
> - spin_unlock_irqrestore(&ent->lock, flags);
> + xa_unlock_irqrestore(&ent->mkeys, flags);
> }
>
> static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
> @@ -196,6 +199,25 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
> return mr;
> }
>
> +static int _push_reserve_mkey(struct mlx5_cache_ent *ent)
> +{
> + unsigned long to_reserve;
> + int rc;
> +
> + while (true) {
> + to_reserve = ent->reserved;
> + rc = xa_err(__xa_cmpxchg(&ent->mkeys, to_reserve, NULL,
> + XA_ZERO_ENTRY, GFP_KERNEL));
> + if (rc)
> + return rc;
What about when old != NULL ?
> + if (to_reserve != ent->reserved)
> + continue;
There is an edge case where where reserved could have shrunk alot
while the lock was released, and xa_cmpxchg could succeed. The above
if will protect things, however a ZERO_ENTRY will have been written to
some weird place in the XA. It needs a
if (old == NULL) // ie we stored something someplace weird
__xa_erase(&ent->mkeys, to_reserve)
> + ent->reserved++;
> + break;
> + }
> + return 0;
> +}
> +
> /* Asynchronously schedule new MRs to be populated in the cache. */
> static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
> {
> @@ -217,23 +239,32 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
> err = -ENOMEM;
> break;
> }
> - spin_lock_irq(&ent->lock);
> + xa_lock_irq(&ent->mkeys);
> if (ent->pending >= MAX_PENDING_REG_MR) {
> + xa_unlock_irq(&ent->mkeys);
> err = -EAGAIN;
> - spin_unlock_irq(&ent->lock);
> + kfree(mr);
> + break;
> + }
> +
> + err = _push_reserve_mkey(ent);
The test of ent->pending is out of date now since this can drop the
lock
It feels like pending and (reserved - stored) are really the same
thing, so maybe just directly limit the number of reserved and test it
after
> @@ -287,39 +318,37 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
> {
> struct mlx5_ib_mr *mr;
>
> - lockdep_assert_held(&ent->lock);
> - if (list_empty(&ent->head))
> + if (!ent->stored)
> return;
> - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
> - list_del(&mr->list);
> - ent->available_mrs--;
> + mr = __xa_store(&ent->mkeys, --ent->stored, NULL, GFP_KERNEL);
> + WARN_ON(mr == NULL || xa_is_err(mr));
Add a if (reserved != stored) before the below?
> + WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) != NULL);
Also please avoid writing WARN_ON(something with side effects)
old = __xa_erase(&ent->mkeys, --ent->reserved);
WARN_ON(old != NULL);
Same for all places
> static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
> bool limit_fill)
> + __acquires(&ent->lock) __releases(&ent->lock)
Why?
> {
> int err;
>
> - lockdep_assert_held(&ent->lock);
> -
Why?
> static void clean_keys(struct mlx5_ib_dev *dev, int c)
> {
> struct mlx5_mr_cache *cache = &dev->cache;
> struct mlx5_cache_ent *ent = &cache->ent[c];
> - struct mlx5_ib_mr *tmp_mr;
> struct mlx5_ib_mr *mr;
> - LIST_HEAD(del_list);
> + unsigned long index;
>
> cancel_delayed_work(&ent->dwork);
> - while (1) {
> - spin_lock_irq(&ent->lock);
> - if (list_empty(&ent->head)) {
> - spin_unlock_irq(&ent->lock);
> - break;
> - }
> - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
> - list_move(&mr->list, &del_list);
> - ent->available_mrs--;
> + xa_for_each(&ent->mkeys, index, mr) {
This isn't quite the same thing, the above tolerates concurrent add,
this does not.
It should be more like
while (ent->stored) {
mr = __xa_erase(stored--);
> @@ -1886,6 +1901,17 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
> }
> }
>
> +static int push_reserve_mkey(struct mlx5_cache_ent *ent)
> +{
> + int ret;
> +
> + xa_lock_irq(&ent->mkeys);
> + ret = _push_reserve_mkey(ent);
> + xa_unlock_irq(&ent->mkeys);
> +
> + return ret;
> +}
Put this close to _push_reserve_mkey() please
Jason
next prev parent reply other threads:[~2021-12-08 16:46 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-06 9:10 [PATCH rdma-next 0/7] MR cache enhancement Leon Romanovsky
2021-12-06 9:10 ` [PATCH rdma-next 1/7] RDMA/mlx5: Merge similar flows of allocating MR from the cache Leon Romanovsky
2021-12-06 9:10 ` [PATCH rdma-next 2/7] RDMA/mlx5: Replace cache list with Xarray Leon Romanovsky
2021-12-08 16:46 ` Jason Gunthorpe [this message]
2021-12-09 8:03 ` Leon Romanovsky
2021-12-09 8:21 ` Aharon Landau
2021-12-09 17:56 ` Jason Gunthorpe
2021-12-06 9:10 ` [PATCH rdma-next 3/7] RDMA/mlx5: Store in the cache mkeys instead of mrs Leon Romanovsky
2021-12-08 20:44 ` Jason Gunthorpe
2021-12-06 9:10 ` [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree Leon Romanovsky
2021-12-08 20:58 ` Jason Gunthorpe
2021-12-06 9:10 ` [PATCH rdma-next 5/7] RDMA/mlx5: Reorder calls to pcie_relaxed_ordering_enabled() Leon Romanovsky
2021-12-06 9:10 ` [PATCH rdma-next 6/7] RDMA/mlx5: Delay the deregistration of a non-cache mkey Leon Romanovsky
2021-12-06 9:10 ` [PATCH rdma-next 7/7] RDMA/mlx5: Rename the mkey cache variables and functions Leon Romanovsky
2021-12-09 8:40 ` [PATCH rdma-next 0/7] MR cache enhancement Leon Romanovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211208164611.GB6385@nvidia.com \
--to=jgg@nvidia.com \
--cc=aharonl@nvidia.com \
--cc=leon@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.