From: Michael Guralnik <michaelgur@nvidia.com>
To: <jgg@nvidia.com>, <leonro@nvidia.com>, <linux-rdma@vger.kernel.org>
Cc: <maorg@nvidia.com>, <aharonl@nvidia.com>,
Michael Guralnik <michaelgur@nvidia.com>
Subject: [PATCH v5 rdma-next 6/6] RDMA/mlx5: Add work to remove temporary entries from the cache
Date: Thu, 26 Jan 2023 00:28:07 +0200 [thread overview]
Message-ID: <20230125222807.6921-7-michaelgur@nvidia.com> (raw)
In-Reply-To: <20230125222807.6921-1-michaelgur@nvidia.com>
The non-cache mkeys are stored in the cache only to shorten restarting
application time. Don't store them longer than needed.
Configure cache entries that store non-cache MRs as temporary entries.
If 30 seconds have passed and no user reclaimed the temporarily cached
mkeys, an asynchronous work will destroy the mkeys entries.
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
---
drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++-
drivers/infiniband/hw/mlx5/mr.c | 94 ++++++++++++++++++++++------
drivers/infiniband/hw/mlx5/odp.c | 2 +-
3 files changed, 82 insertions(+), 23 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6e0c0a931d78..8e22bb7d4c35 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -750,6 +750,7 @@ struct mlx5_cache_ent {
struct rb_node node;
struct mlx5r_cache_rb_key rb_key;
+ u8 is_tmp:1;
u8 disabled:1;
u8 fill_to_high_water:1;
@@ -783,6 +784,7 @@ struct mlx5_mkey_cache {
struct mutex rb_lock;
struct dentry *fs_root;
unsigned long last_add;
+ struct delayed_work remove_ent_dwork;
};
struct mlx5_ib_port_resources {
@@ -1326,9 +1328,10 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
-struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
- struct mlx5r_cache_rb_key rb_key,
- bool persistent_entry);
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
int access_flags, int access_mode,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 25e80529edd8..37f435cdcb52 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -140,19 +140,16 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
}
-
-static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
- void *to_store)
+static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings,
+ void *to_store)
{
XA_STATE(xas, &ent->mkeys, 0);
void *curr;
- xa_lock_irq(&ent->mkeys);
if (limit_pendings &&
- (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
- xa_unlock_irq(&ent->mkeys);
+ (ent->reserved - ent->stored) > MAX_PENDING_REG_MR)
return -EAGAIN;
- }
+
while (1) {
/*
* This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
@@ -191,6 +188,7 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
break;
xa_lock_irq(&ent->mkeys);
}
+ xa_lock_irq(&ent->mkeys);
if (xas_error(&xas))
return xas_error(&xas);
if (WARN_ON(curr))
@@ -198,6 +196,17 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
return 0;
}
+static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
+ void *to_store)
+{
+ int ret;
+
+ xa_lock_irq(&ent->mkeys);
+ ret = push_mkey_locked(ent, limit_pendings, to_store);
+ xa_unlock_irq(&ent->mkeys);
+ return ret;
+}
+
static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
{
void *old;
@@ -545,7 +554,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
{
lockdep_assert_held(&ent->mkeys.xa_lock);
- if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
+ if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp)
return;
if (ent->stored < ent->limit) {
ent->fill_to_high_water = true;
@@ -675,7 +684,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
struct mlx5_cache_ent *cur;
int cmp;
- mutex_lock(&cache->rb_lock);
/* Figure out where to put new node */
while (*new) {
cur = rb_entry(*new, struct mlx5_cache_ent, node);
@@ -695,7 +703,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
rb_link_node(&ent->node, parent, new);
rb_insert_color(&ent->node, &cache->rb_root);
- mutex_unlock(&cache->rb_lock);
return 0;
}
@@ -867,9 +874,10 @@ static void delay_time_func(struct timer_list *t)
WRITE_ONCE(dev->fill_delay, 0);
}
-struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
- struct mlx5r_cache_rb_key rb_key,
- bool persistent_entry)
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry)
{
struct mlx5_cache_ent *ent;
int order;
@@ -882,6 +890,7 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
ent->rb_key = rb_key;
ent->dev = dev;
+ ent->is_tmp = !persistent_entry;
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
@@ -906,10 +915,43 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
mlx5_mkey_cache_debugfs_add_ent(dev, ent);
}
+ else
+ mod_delayed_work(ent->dev->cache.wq,
+ &ent->dev->cache.remove_ent_dwork,
+ msecs_to_jiffies(30 * 1000));
return ent;
}
+static void remove_ent_work_func(struct work_struct *work)
+{
+ struct mlx5_mkey_cache *cache;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *cur;
+
+ cache = container_of(work, struct mlx5_mkey_cache,
+ remove_ent_dwork.work);
+ mutex_lock(&cache->rb_lock);
+ cur = rb_last(&cache->rb_root);
+ while (cur) {
+ ent = rb_entry(cur, struct mlx5_cache_ent, node);
+ cur = rb_prev(cur);
+ mutex_unlock(&cache->rb_lock);
+
+ xa_lock_irq(&ent->mkeys);
+ if (!ent->is_tmp) {
+ xa_unlock_irq(&ent->mkeys);
+ mutex_lock(&cache->rb_lock);
+ continue;
+ }
+ xa_unlock_irq(&ent->mkeys);
+
+ clean_keys(ent->dev, ent);
+ mutex_lock(&cache->rb_lock);
+ }
+ mutex_unlock(&cache->rb_lock);
+}
+
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mkey_cache *cache = &dev->cache;
@@ -925,6 +967,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->slow_path_mutex);
mutex_init(&dev->cache.rb_lock);
dev->cache.rb_root = RB_ROOT;
+ INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func);
cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
if (!cache->wq) {
mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -934,9 +977,10 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
timer_setup(&dev->delay_timer, delay_time_func, 0);
mlx5_mkey_cache_debugfs_init(dev);
+ mutex_lock(&cache->rb_lock);
for (i = 0; i <= mkey_cache_max_order(dev); i++) {
rb_key.ndescs = 1 << (i + 2);
- ent = mlx5r_cache_create_ent(dev, rb_key, true);
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
if (IS_ERR(ent)) {
ret = PTR_ERR(ent);
goto err;
@@ -947,6 +991,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
if (ret)
goto err;
+ mutex_unlock(&cache->rb_lock);
for (node = rb_first(root); node; node = rb_next(node)) {
ent = rb_entry(node, struct mlx5_cache_ent, node);
xa_lock_irq(&ent->mkeys);
@@ -957,6 +1002,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
return 0;
err:
+ mutex_unlock(&cache->rb_lock);
mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
return ret;
}
@@ -970,6 +1016,7 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
if (!dev->cache.wq)
return 0;
+ cancel_delayed_work_sync(&dev->cache.remove_ent_dwork);
mutex_lock(&dev->cache.rb_lock);
for (node = rb_first(root); node; node = rb_next(node)) {
ent = rb_entry(node, struct mlx5_cache_ent, node);
@@ -1751,33 +1798,42 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
{
struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
+ int ret;
if (mr->mmkey.cache_ent) {
xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
mr->mmkey.cache_ent->in_use--;
- xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
goto end;
}
mutex_lock(&cache->rb_lock);
ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
- mutex_unlock(&cache->rb_lock);
if (ent) {
if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
+ if (ent->disabled) {
+ mutex_unlock(&cache->rb_lock);
+ return -EOPNOTSUPP;
+ }
mr->mmkey.cache_ent = ent;
+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ mutex_unlock(&cache->rb_lock);
goto end;
}
}
- ent = mlx5r_cache_create_ent(dev, mr->mmkey.rb_key, false);
+ ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false);
+ mutex_unlock(&cache->rb_lock);
if (IS_ERR(ent))
return PTR_ERR(ent);
mr->mmkey.cache_ent = ent;
+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
end:
- return push_mkey(mr->mmkey.cache_ent, false,
- xa_mk_value(mr->mmkey.key));
+ ret = push_mkey_locked(mr->mmkey.cache_ent, false,
+ xa_mk_value(mr->mmkey.key));
+ xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
+ return ret;
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index c51d6c9a4c87..6f447095218f 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1598,7 +1598,7 @@ int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return 0;
- ent = mlx5r_cache_create_ent(dev, rb_key, true);
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
if (IS_ERR(ent))
return PTR_ERR(ent);
--
2.17.2
next prev parent reply other threads:[~2023-01-25 22:29 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-25 22:28 [PATCH v5 rdma-next 0/6] RDMA/mlx5: Switch MR cache to use RB-tree Michael Guralnik
2023-01-25 22:28 ` [PATCH v5 rdma-next 1/6] RDMA/mlx5: Don't keep umrable 'page_shift' in cache entries Michael Guralnik
2023-01-25 22:28 ` [PATCH v5 rdma-next 2/6] RDMA/mlx5: Remove implicit ODP cache entry Michael Guralnik
2023-01-25 22:28 ` [PATCH v5 rdma-next 3/6] RDMA/mlx5: Change the cache structure to an RB-tree Michael Guralnik
2023-01-25 22:28 ` [PATCH v5 rdma-next 4/6] RDMA/mlx5: Introduce mlx5r_cache_rb_key Michael Guralnik
2023-01-25 22:28 ` [PATCH v5 rdma-next 5/6] RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow Michael Guralnik
2023-01-25 22:28 ` Michael Guralnik [this message]
2023-01-27 17:17 ` [PATCH v5 rdma-next 0/6] RDMA/mlx5: Switch MR cache to use RB-tree Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230125222807.6921-7-michaelgur@nvidia.com \
--to=michaelgur@nvidia.com \
--cc=aharonl@nvidia.com \
--cc=jgg@nvidia.com \
--cc=leonro@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=maorg@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.