From: Leon Romanovsky <leon@kernel.org>
To: Doug Ledford <dledford@redhat.com>, Jason Gunthorpe <jgg@mellanox.com>
Cc: Eli Cohen <eli@mellanox.com>,
Jack Morgenstein <jackm@dev.mellanox.co.il>,
linux-rdma@vger.kernel.org, Or Gerlitz <ogerlitz@mellanox.com>,
Roland Dreier <roland@purestorage.com>
Subject: [PATCH rdma-next v1 08/12] RDMA/mlx5: Fix MR cache size and limit debugfs
Date: Tue, 10 Mar 2020 10:22:34 +0200 [thread overview]
Message-ID: <20200310082238.239865-9-leon@kernel.org> (raw)
In-Reply-To: <20200310082238.239865-1-leon@kernel.org>
From: Jason Gunthorpe <jgg@mellanox.com>
The size_write function is supposed to adjust the total_mr's to match
the user's request, but lacks locking and safety checking.
total_mrs can only be adjusted by at most available_mrs. mrs already
assigned to users cannot be revoked. Ensure that the user provides
a target value within the range of available_mrs and within the high/low
water mark.
limit_write has confusing and wrong sanity checking, and doesn't have the
ability to deallocate on limit reduction.
Since both functions use the same algorithm to adjust the available_mrs,
consolidate it into one function and write it correctly. Fix the locking
and by holding the spinlock for all accesses to ent->X.
Always fail if the user provides a malformed string.
Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/mr.c | 152 ++++++++++++++++++--------------
1 file changed, 88 insertions(+), 64 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9b980ef326b4..091e24c58e2c 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -140,7 +140,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
complete(&ent->compl);
}
-static int add_keys(struct mlx5_cache_ent *ent, int num)
+static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
@@ -200,30 +200,54 @@ static int add_keys(struct mlx5_cache_ent *ent, int num)
return err;
}
-static void remove_keys(struct mlx5_cache_ent *ent, int num)
+static void remove_cache_mr(struct mlx5_cache_ent *ent)
{
- struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
- LIST_HEAD(del_list);
- int i;
- for (i = 0; i < num; i++) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- break;
- }
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_move(&mr->list, &del_list);
- ent->available_mrs--;
- ent->total_mrs--;
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+ return;
}
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ list_del(&mr->list);
+ ent->available_mrs--;
+ ent->total_mrs--;
+ spin_unlock_irq(&ent->lock);
+ mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+ kfree(mr);
+}
- list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
- list_del(&mr->list);
- kfree(mr);
+static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
+ bool limit_fill)
+{
+ int err;
+
+ lockdep_assert_held(&ent->lock);
+
+ while (true) {
+ if (limit_fill)
+ target = ent->limit * 2;
+ if (target == ent->available_mrs + ent->pending)
+ return 0;
+ if (target > ent->available_mrs + ent->pending) {
+ u32 todo = target - (ent->available_mrs + ent->pending);
+
+ spin_unlock_irq(&ent->lock);
+ err = add_keys(ent, todo);
+ if (err == -EAGAIN)
+ usleep_range(3000, 5000);
+ spin_lock_irq(&ent->lock);
+ if (err) {
+ if (err != -EAGAIN)
+ return err;
+ } else
+ return 0;
+ } else {
+ spin_unlock_irq(&ent->lock);
+ remove_cache_mr(ent);
+ spin_lock_irq(&ent->lock);
+ }
}
}
@@ -231,33 +255,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- char lbuf[20] = {0};
- u32 var;
+ u32 target;
int err;
- count = min(count, sizeof(lbuf) - 1);
- if (copy_from_user(lbuf, buf, count))
- return -EFAULT;
-
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var < ent->limit)
- return -EINVAL;
-
- if (var > ent->total_mrs) {
- do {
- err = add_keys(ent, var - ent->total_mrs);
- if (err && err != -EAGAIN)
- return err;
+ err = kstrtou32_from_user(buf, count, 0, &target);
+ if (err)
+ return err;
- usleep_range(3000, 5000);
- } while (err);
- } else if (var < ent->total_mrs) {
- remove_keys(ent, ent->total_mrs - var);
+ /*
+ * Target is the new value of total_mrs the user requests, however we
+ * cannot free MRs that are in use. Compute the target value for
+ * available_mrs.
+ */
+ spin_lock_irq(&ent->lock);
+ if (target < ent->total_mrs - ent->available_mrs) {
+ err = -EINVAL;
+ goto err_unlock;
+ }
+ target = target - (ent->total_mrs - ent->available_mrs);
+ if (target < ent->limit || target > ent->limit*2) {
+ err = -EINVAL;
+ goto err_unlock;
}
+ err = resize_available_mrs(ent, target, false);
+ if (err)
+ goto err_unlock;
+ spin_unlock_irq(&ent->lock);
return count;
+
+err_unlock:
+ spin_unlock_irq(&ent->lock);
+ return err;
}
static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
@@ -285,28 +314,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- char lbuf[20] = {0};
u32 var;
int err;
- count = min(count, sizeof(lbuf) - 1);
- if (copy_from_user(lbuf, buf, count))
- return -EFAULT;
-
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var > ent->total_mrs)
- return -EINVAL;
+ err = kstrtou32_from_user(buf, count, 0, &var);
+ if (err)
+ return err;
+ /*
+ * Upon set we immediately fill the cache to high water mark implied by
+ * the limit.
+ */
+ spin_lock_irq(&ent->lock);
ent->limit = var;
-
- if (ent->available_mrs < ent->limit) {
- err = add_keys(ent, 2 * ent->limit - ent->available_mrs);
- if (err)
- return err;
- }
-
+ err = resize_available_mrs(ent, 0, true);
+ spin_unlock_irq(&ent->lock);
+ if (err)
+ return err;
return count;
}
@@ -371,20 +395,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
}
} else if (ent->available_mrs > 2 * ent->limit) {
/*
- * The remove_keys() logic is performed as garbage collection
- * task. Such task is intended to be run when no other active
- * processes are running.
+ * The remove_cache_mr() logic is performed as garbage
+ * collection task. Such task is intended to be run when no
+ * other active processes are running.
*
* The need_resched() will return TRUE if there are user tasks
* to be activated in near future.
*
- * In such case, we don't execute remove_keys() and postpone
- * the garbage collection work to try to run in next cycle,
- * in order to free CPU resources to other tasks.
+ * In such case, we don't execute remove_cache_mr() and postpone
+ * the garbage collection work to try to run in next cycle, in
+ * order to free CPU resources to other tasks.
*/
if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) {
- remove_keys(ent, 1);
+ remove_cache_mr(ent);
if (ent->available_mrs > ent->limit)
queue_work(cache->wq, &ent->work);
} else {
--
2.24.1
next prev parent reply other threads:[~2020-03-10 8:23 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20200310082238.239865-1-leon@kernel.org>
2020-03-10 8:22 ` [PATCH mlx5-next v1 01/12] {IB,net}/mlx5: Setup mkey variant before mr create command invocation Leon Romanovsky
2020-03-10 8:22 ` [PATCH mlx5-next v1 02/12] {IB,net}/mlx5: Assign mkey variant in mlx5_ib only Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 03/12] IB/mlx5: Replace spinlock protected write with atomic var Leon Romanovsky
2020-03-10 8:22 ` [PATCH mlx5-next v1 04/12] {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 05/12] RDMA/mlx5: Rename the tracking variables for the MR cache Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 06/12] RDMA/mlx5: Simplify how the MR cache bucket is located Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 07/12] RDMA/mlx5: Always remove MRs from the cache before destroying them Leon Romanovsky
2020-03-10 8:22 ` Leon Romanovsky [this message]
2020-03-10 8:22 ` [PATCH rdma-next v1 09/12] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 10/12] RDMA/mlx5: Fix locking in MR cache work queue Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 11/12] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling Leon Romanovsky
2020-03-10 8:22 ` [PATCH rdma-next v1 12/12] RDMA/mlx5: Allow MRs to be created in the cache synchronously Leon Romanovsky
2020-03-10 8:35 ` [PATCH rdma-next v1 00/12] MR cache fixes and refactoring Leon Romanovsky
2020-03-13 13:41 ` Jason Gunthorpe
2020-03-13 13:50 ` Leon Romanovsky
2020-03-13 14:28 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200310082238.239865-9-leon@kernel.org \
--to=leon@kernel.org \
--cc=dledford@redhat.com \
--cc=eli@mellanox.com \
--cc=jackm@dev.mellanox.co.il \
--cc=jgg@mellanox.com \
--cc=linux-rdma@vger.kernel.org \
--cc=ogerlitz@mellanox.com \
--cc=roland@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.