From: Leon Romanovsky <leon@kernel.org>
To: Doug Ledford <dledford@redhat.com>, Jason Gunthorpe <jgg@mellanox.com>
Cc: Eli Cohen <eli@mellanox.com>,
Jack Morgenstein <jackm@dev.mellanox.co.il>,
linux-rdma@vger.kernel.org, Or Gerlitz <ogerlitz@mellanox.com>,
Roland Dreier <roland@purestorage.com>
Subject: [PATCH rdma-next v1 08/12] RDMA/mlx5: Fix MR cache size and limit debugfs
Date: Tue, 10 Mar 2020 10:22:34 +0200 [thread overview]
Message-ID: <20200310082238.239865-9-leon@kernel.org> (raw)
In-Reply-To: <20200310082238.239865-1-leon@kernel.org>
From: Jason Gunthorpe <jgg@mellanox.com>
The size_write function is supposed to adjust the total_mr's to match
the user's request, but lacks locking and safety checking.
total_mrs can only be adjusted by at most available_mrs. mrs already
assigned to users cannot be revoked. Ensure that the user provides
a target value within the range of available_mrs and within the high/low
water mark.
limit_write has confusing and wrong sanity checking, and doesn't have the
ability to deallocate on limit reduction.
Since both functions use the same algorithm to adjust the available_mrs,
consolidate it into one function and write it correctly. Fix the locking
and by holding the spinlock for all accesses to ent->X.
Always fail if the user provides a malformed string.
Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/mr.c | 152 ++++++++++++++++++--------------
1 file changed, 88 insertions(+), 64 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9b980ef326b4..091e24c58e2c 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -140,7 +140,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
complete(&ent->compl);
}
-static int add_keys(struct mlx5_cache_ent *ent, int num)
+static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
@@ -200,30 +200,54 @@ static int add_keys(struct mlx5_cache_ent *ent, int num)
return err;
}
-static void remove_keys(struct mlx5_cache_ent *ent, int num)
+static void remove_cache_mr(struct mlx5_cache_ent *ent)
{
- struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
- LIST_HEAD(del_list);
- int i;
- for (i = 0; i < num; i++) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- break;
- }
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_move(&mr->list, &del_list);
- ent->available_mrs--;
- ent->total_mrs--;
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+ return;
}
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ list_del(&mr->list);
+ ent->available_mrs--;
+ ent->total_mrs--;
+ spin_unlock_irq(&ent->lock);
+ mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+ kfree(mr);
+}
- list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
- list_del(&mr->list);
- kfree(mr);
+static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
+ bool limit_fill)
+{
+ int err;
+
+ lockdep_assert_held(&ent->lock);
+
+ while (true) {
+ if (limit_fill)
+ target = ent->limit * 2;
+ if (target == ent->available_mrs + ent->pending)
+ return 0;
+ if (target > ent->available_mrs + ent->pending) {
+ u32 todo = target - (ent->available_mrs + ent->pending);
+
+ spin_unlock_irq(&ent->lock);
+ err = add_keys(ent, todo);
+ if (err == -EAGAIN)
+ usleep_range(3000, 5000);
+ spin_lock_irq(&ent->lock);
+ if (err) {
+ if (err != -EAGAIN)
+ return err;
+ } else
+ return 0;
+ } else {
+ spin_unlock_irq(&ent->lock);
+ remove_cache_mr(ent);
+ spin_lock_irq(&ent->lock);
+ }
}
}
@@ -231,33 +255,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- char lbuf[20] = {0};
- u32 var;
+ u32 target;
int err;
- count = min(count, sizeof(lbuf) - 1);
- if (copy_from_user(lbuf, buf, count))
- return -EFAULT;
-
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var < ent->limit)
- return -EINVAL;
-
- if (var > ent->total_mrs) {
- do {
- err = add_keys(ent, var - ent->total_mrs);
- if (err && err != -EAGAIN)
- return err;
+ err = kstrtou32_from_user(buf, count, 0, &target);
+ if (err)
+ return err;
- usleep_range(3000, 5000);
- } while (err);
- } else if (var < ent->total_mrs) {
- remove_keys(ent, ent->total_mrs - var);
+ /*
+ * Target is the new value of total_mrs the user requests, however we
+ * cannot free MRs that are in use. Compute the target value for
+ * available_mrs.
+ */
+ spin_lock_irq(&ent->lock);
+ if (target < ent->total_mrs - ent->available_mrs) {
+ err = -EINVAL;
+ goto err_unlock;
+ }
+ target = target - (ent->total_mrs - ent->available_mrs);
+ if (target < ent->limit || target > ent->limit*2) {
+ err = -EINVAL;
+ goto err_unlock;
}
+ err = resize_available_mrs(ent, target, false);
+ if (err)
+ goto err_unlock;
+ spin_unlock_irq(&ent->lock);
return count;
+
+err_unlock:
+ spin_unlock_irq(&ent->lock);
+ return err;
}
static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
@@ -285,28 +314,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- char lbuf[20] = {0};
u32 var;
int err;
- count = min(count, sizeof(lbuf) - 1);
- if (copy_from_user(lbuf, buf, count))
- return -EFAULT;
-
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var > ent->total_mrs)
- return -EINVAL;
+ err = kstrtou32_from_user(buf, count, 0, &var);
+ if (err)
+ return err;
+ /*
+ * Upon set we immediately fill the cache to high water mark implied by
+ * the limit.
+ */
+ spin_lock_irq(&ent->lock);
ent->limit = var;
-
- if (ent->available_mrs < ent->limit) {
- err = add_keys(ent, 2 * ent->limit - ent->available_mrs);
- if (err)
- return err;
- }
-
+ err = resize_available_mrs(ent, 0, true);
+ spin_unlock_irq(&ent->lock);
+ if (err)
+ return err;
return count;
}
@@ -371,20 +395,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
}
} else if (ent->available_mrs > 2 * ent->limit) {
/*
- * The remove_keys() logic is performed as garbage collection
- * task. Such task is intended to be run when no other active
- * processes are running.
+ * The remove_cache_mr() logic is performed as garbage
+ * collection task. Such task is intended to be run when no
+ * other active processes are running.
*
* The need_resched() will return TRUE if there are user tasks
* to be activated in near future.
*
- * In such case, we don't execute remove_keys() and postpone
- * the garbage collection work to try to run in next cycle,
- * in order to free CPU resources to other tasks.
+ * In such case, we don't execute remove_cache_mr() and postpone
+ * the garbage collection work to try to run in next cycle, in
+ * order to free CPU resources to other tasks.
*/
if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) {
- remove_keys(ent, 1);
+ remove_cache_mr(ent);
if (ent->available_mrs > ent->limit)
queue_work(cache->wq, &ent->work);
} else {
--
2.24.1
next prev parent reply other threads:[~2020-03-10 8:23 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20200310082238.239865-1-leon@kernel.org>
2020-03-10 8:22 ` [PATCH mlx5-next v1 01/12] {IB,net}/mlx5: Setup mkey variant before mr create command invocation Leon Romanovsky
2020-03-10 8:22 ` [PATCH mlx5-next v1 02/12] {IB,net}/mlx5: Assign mkey variant in mlx5_ib only Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 03/12] IB/mlx5: Replace spinlock protected write with atomic var Leon Romanovsky
2020-03-10 8:22 ` [PATCH mlx5-next v1 04/12] {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 05/12] RDMA/mlx5: Rename the tracking variables for the MR cache Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 06/12] RDMA/mlx5: Simplify how the MR cache bucket is located Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 07/12] RDMA/mlx5: Always remove MRs from the cache before destroying them Leon Romanovsky
2020-03-10 8:22 ` Leon Romanovsky [this message]
2020-03-10 8:22 ` [PATCH rdma-next v1 09/12] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 10/12] RDMA/mlx5: Fix locking in MR cache work queue Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 11/12] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 12/12] RDMA/mlx5: Allow MRs to be created in the cache synchronously Leon Romanovsky
2020-03-10 8:35 ` [PATCH rdma-next v1 00/12] MR cache fixes and refactoring Leon Romanovsky
2020-03-13 13:41 ` Jason Gunthorpe
2020-03-13 13:50 ` Leon Romanovsky
2020-03-13 14:28 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200310082238.239865-9-leon@kernel.org \
--to=leon@kernel.org \
--cc=dledford@redhat.com \
--cc=eli@mellanox.com \
--cc=jackm@dev.mellanox.co.il \
--cc=jgg@mellanox.com \
--cc=linux-rdma@vger.kernel.org \
--cc=ogerlitz@mellanox.com \
--cc=roland@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).