* [PATCH] IB: fix memlock limit handling code
@ 2023-09-15 20:03 Maxim Samoylov
2023-09-18 12:09 ` Leon Romanovsky
0 siblings, 1 reply; 5+ messages in thread
From: Maxim Samoylov @ 2023-09-15 20:03 UTC (permalink / raw)
To: linux-rdma
Cc: Jason Gunthorpe, Leon Romanovsky, Dennis Dalessandro,
Christian Benvenuti, Bernard Metzler, Vadim Fedorenko,
Maxim Samoylov
This patch fixes handling for RLIM_INFINITY value uniformly across
the infiniband/rdma subsystem.
Currently infinity constant is treated as actual limit
value, which can trigger unexpected ENOMEM errors in
corner-case configurations
Let's also provide the single helper to check against process
MEMLOCK limit while registering user memory region mappings.
Signed-off-by: Maxim Samoylov <max7255@meta.com>
---
drivers/infiniband/core/umem.c | 7 ++-----
drivers/infiniband/hw/qib/qib_user_pages.c | 7 +++----
drivers/infiniband/hw/usnic/usnic_uiom.c | 6 ++----
drivers/infiniband/sw/siw/siw_mem.c | 6 +++---
drivers/infiniband/sw/siw/siw_verbs.c | 23 ++++++++++------------
include/rdma/ib_umem.h | 11 +++++++++++
6 files changed, 31 insertions(+), 29 deletions(-)
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index f9ab671c8eda..3b197bdc21bf 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -35,12 +35,12 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/count_zeros.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
@@ -150,7 +150,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
{
struct ib_umem *umem;
struct page **page_list;
- unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
unsigned long dma_attr = 0;
@@ -200,10 +199,8 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
goto out;
}
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
- if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
+ if (!ib_umem_check_rlimit_memlock(new_pinned)) {
atomic64_sub(npages, &mm->pinned_vm);
ret = -ENOMEM;
goto out;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 1bb7507325bc..3889aefdfc6b 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -32,8 +32,8 @@
*/
#include <linux/mm.h>
-#include <linux/sched/signal.h>
#include <linux/device.h>
+#include <rdma/ib_umem.h>
#include "qib.h"
@@ -94,14 +94,13 @@ int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr)
int qib_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p)
{
- unsigned long locked, lock_limit;
+ unsigned long locked;
size_t got;
int ret;
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
locked = atomic64_add_return(num_pages, ¤t->mm->pinned_vm);
- if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ if (!ib_umem_check_rlimit_memlock(locked)) {
ret = -ENOMEM;
goto bail;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 84e0f41e7dfa..fdbb9737c7f0 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -34,13 +34,13 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/hugetlb.h>
#include <linux/iommu.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include "usnic_log.h"
#include "usnic_uiom.h"
@@ -90,7 +90,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
struct scatterlist *sg;
struct usnic_uiom_chunk *chunk;
unsigned long locked;
- unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
@@ -124,9 +123,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
mmap_read_lock(mm);
locked = atomic64_add_return(npages, ¤t->mm->pinned_vm);
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ if (!ib_umem_check_rlimit_memlock(locked)) {
ret = -ENOMEM;
goto out;
}
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index e6e25f15567d..54991ddeabc7 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -5,6 +5,7 @@
#include <linux/gfp.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
@@ -367,7 +368,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
struct siw_umem *umem;
struct mm_struct *mm_s;
u64 first_page_va;
- unsigned long mlock_limit;
unsigned int foll_flags = FOLL_LONGTERM;
int num_pages, num_chunks, i, rv = 0;
@@ -396,9 +396,9 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
mmap_read_lock(mm_s);
- mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
+ if (!ib_umem_check_rlimit_memlock(
+ atomic64_add_return(num_pages, &mm_s->pinned_vm))) {
rv = -ENOMEM;
goto out_sem_up;
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index fdbef3254e30..ad63a8db5502 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -12,6 +12,7 @@
#include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/uverbs_ioctl.h>
@@ -1321,8 +1322,8 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct siw_umem *umem = NULL;
struct siw_ureq_reg_mr ureq;
struct siw_device *sdev = to_siw_dev(pd->device);
-
- unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
+ unsigned long num_pages =
+ (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
int rv;
siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
@@ -1338,19 +1339,15 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
rv = -EINVAL;
goto err_out;
}
- if (mem_limit != RLIM_INFINITY) {
- unsigned long num_pages =
- (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
- mem_limit >>= PAGE_SHIFT;
- if (num_pages > mem_limit - current->mm->locked_vm) {
- siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
- num_pages, mem_limit,
- current->mm->locked_vm);
- rv = -ENOMEM;
- goto err_out;
- }
+ if (!ib_umem_check_rlimit_memlock(num_pages + current->mm->locked_vm)) {
+ siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
+ num_pages, rlimit(RLIMIT_MEMLOCK),
+ current->mm->locked_vm);
+ rv = -ENOMEM;
+ goto err_out;
}
+
umem = siw_umem_get(start, len, ib_access_writable(rights));
if (IS_ERR(umem)) {
rv = PTR_ERR(umem);
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 95896472a82b..3970da64b01e 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -11,6 +11,7 @@
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
+#include <linux/sched/signal.h>
struct ib_ucontext;
struct ib_umem_odp;
@@ -71,6 +72,16 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem)
return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
}
+static inline bool ib_umem_check_rlimit_memlock(unsigned long value)
+{
+ unsigned long lock_limit = rlimit(RLIMIT_MEMLOCK);
+
+ if (lock_limit == RLIM_INFINITY || capable(CAP_IPC_LOCK))
+ return true;
+
+ return value <= PFN_DOWN(lock_limit);
+}
+
static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
struct ib_umem *umem,
unsigned long pgsz)
--
2.39.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] IB: fix memlock limit handling code
2023-09-15 20:03 [PATCH] IB: fix memlock limit handling code Maxim Samoylov
@ 2023-09-18 12:09 ` Leon Romanovsky
2023-09-21 16:31 ` Maxim Samoylov
0 siblings, 1 reply; 5+ messages in thread
From: Leon Romanovsky @ 2023-09-18 12:09 UTC (permalink / raw)
To: Maxim Samoylov
Cc: linux-rdma, Jason Gunthorpe, Dennis Dalessandro,
Christian Benvenuti, Bernard Metzler, Vadim Fedorenko
On Fri, Sep 15, 2023 at 01:03:53PM -0700, Maxim Samoylov wrote:
> This patch fixes handling for RLIM_INFINITY value uniformly across
> the infiniband/rdma subsystem.
>
> Currently infinity constant is treated as actual limit
> value, which can trigger unexpected ENOMEM errors in
> corner-case configurations
Can you please provide an example and why these corner cases are
important?
BTW, The patch looks good to me, just need more information in commit message.
Thanks
>
> Let's also provide the single helper to check against process
> MEMLOCK limit while registering user memory region mappings.
>
> Signed-off-by: Maxim Samoylov <max7255@meta.com>
> ---
> drivers/infiniband/core/umem.c | 7 ++-----
> drivers/infiniband/hw/qib/qib_user_pages.c | 7 +++----
> drivers/infiniband/hw/usnic/usnic_uiom.c | 6 ++----
> drivers/infiniband/sw/siw/siw_mem.c | 6 +++---
> drivers/infiniband/sw/siw/siw_verbs.c | 23 ++++++++++------------
> include/rdma/ib_umem.h | 11 +++++++++++
> 6 files changed, 31 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index f9ab671c8eda..3b197bdc21bf 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -35,12 +35,12 @@
>
> #include <linux/mm.h>
> #include <linux/dma-mapping.h>
> -#include <linux/sched/signal.h>
> #include <linux/sched/mm.h>
> #include <linux/export.h>
> #include <linux/slab.h>
> #include <linux/pagemap.h>
> #include <linux/count_zeros.h>
> +#include <rdma/ib_umem.h>
> #include <rdma/ib_umem_odp.h>
>
> #include "uverbs.h"
> @@ -150,7 +150,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
> {
> struct ib_umem *umem;
> struct page **page_list;
> - unsigned long lock_limit;
> unsigned long new_pinned;
> unsigned long cur_base;
> unsigned long dma_attr = 0;
> @@ -200,10 +199,8 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
> goto out;
> }
>
> - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> -
> new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
> - if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
> + if (!ib_umem_check_rlimit_memlock(new_pinned)) {
> atomic64_sub(npages, &mm->pinned_vm);
> ret = -ENOMEM;
> goto out;
> diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
> index 1bb7507325bc..3889aefdfc6b 100644
> --- a/drivers/infiniband/hw/qib/qib_user_pages.c
> +++ b/drivers/infiniband/hw/qib/qib_user_pages.c
> @@ -32,8 +32,8 @@
> */
>
> #include <linux/mm.h>
> -#include <linux/sched/signal.h>
> #include <linux/device.h>
> +#include <rdma/ib_umem.h>
>
> #include "qib.h"
>
> @@ -94,14 +94,13 @@ int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr)
> int qib_get_user_pages(unsigned long start_page, size_t num_pages,
> struct page **p)
> {
> - unsigned long locked, lock_limit;
> + unsigned long locked;
> size_t got;
> int ret;
>
> - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> locked = atomic64_add_return(num_pages, ¤t->mm->pinned_vm);
>
> - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
> + if (!ib_umem_check_rlimit_memlock(locked)) {
> ret = -ENOMEM;
> goto bail;
> }
> diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
> index 84e0f41e7dfa..fdbb9737c7f0 100644
> --- a/drivers/infiniband/hw/usnic/usnic_uiom.c
> +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
> @@ -34,13 +34,13 @@
>
> #include <linux/mm.h>
> #include <linux/dma-mapping.h>
> -#include <linux/sched/signal.h>
> #include <linux/sched/mm.h>
> #include <linux/hugetlb.h>
> #include <linux/iommu.h>
> #include <linux/workqueue.h>
> #include <linux/list.h>
> #include <rdma/ib_verbs.h>
> +#include <rdma/ib_umem.h>
>
> #include "usnic_log.h"
> #include "usnic_uiom.h"
> @@ -90,7 +90,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
> struct scatterlist *sg;
> struct usnic_uiom_chunk *chunk;
> unsigned long locked;
> - unsigned long lock_limit;
> unsigned long cur_base;
> unsigned long npages;
> int ret;
> @@ -124,9 +123,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
> mmap_read_lock(mm);
>
> locked = atomic64_add_return(npages, ¤t->mm->pinned_vm);
> - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>
> - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
> + if (!ib_umem_check_rlimit_memlock(locked)) {
> ret = -ENOMEM;
> goto out;
> }
> diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
> index e6e25f15567d..54991ddeabc7 100644
> --- a/drivers/infiniband/sw/siw/siw_mem.c
> +++ b/drivers/infiniband/sw/siw/siw_mem.c
> @@ -5,6 +5,7 @@
>
> #include <linux/gfp.h>
> #include <rdma/ib_verbs.h>
> +#include <rdma/ib_umem.h>
> #include <linux/dma-mapping.h>
> #include <linux/slab.h>
> #include <linux/sched/mm.h>
> @@ -367,7 +368,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
> struct siw_umem *umem;
> struct mm_struct *mm_s;
> u64 first_page_va;
> - unsigned long mlock_limit;
> unsigned int foll_flags = FOLL_LONGTERM;
> int num_pages, num_chunks, i, rv = 0;
>
> @@ -396,9 +396,9 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
>
> mmap_read_lock(mm_s);
>
> - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>
> - if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
> + if (!ib_umem_check_rlimit_memlock(
> + atomic64_add_return(num_pages, &mm_s->pinned_vm))) {
> rv = -ENOMEM;
> goto out_sem_up;
> }
> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
> index fdbef3254e30..ad63a8db5502 100644
> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> @@ -12,6 +12,7 @@
>
> #include <rdma/iw_cm.h>
> #include <rdma/ib_verbs.h>
> +#include <rdma/ib_umem.h>
> #include <rdma/ib_user_verbs.h>
> #include <rdma/uverbs_ioctl.h>
>
> @@ -1321,8 +1322,8 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
> struct siw_umem *umem = NULL;
> struct siw_ureq_reg_mr ureq;
> struct siw_device *sdev = to_siw_dev(pd->device);
> -
> - unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
> + unsigned long num_pages =
> + (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
> int rv;
>
> siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
> @@ -1338,19 +1339,15 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
> rv = -EINVAL;
> goto err_out;
> }
> - if (mem_limit != RLIM_INFINITY) {
> - unsigned long num_pages =
> - (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
> - mem_limit >>= PAGE_SHIFT;
>
> - if (num_pages > mem_limit - current->mm->locked_vm) {
> - siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
> - num_pages, mem_limit,
> - current->mm->locked_vm);
> - rv = -ENOMEM;
> - goto err_out;
> - }
> + if (!ib_umem_check_rlimit_memlock(num_pages + current->mm->locked_vm)) {
> + siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
> + num_pages, rlimit(RLIMIT_MEMLOCK),
> + current->mm->locked_vm);
> + rv = -ENOMEM;
> + goto err_out;
> }
> +
> umem = siw_umem_get(start, len, ib_access_writable(rights));
> if (IS_ERR(umem)) {
> rv = PTR_ERR(umem);
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index 95896472a82b..3970da64b01e 100644
> --- a/include/rdma/ib_umem.h
> +++ b/include/rdma/ib_umem.h
> @@ -11,6 +11,7 @@
> #include <linux/scatterlist.h>
> #include <linux/workqueue.h>
> #include <rdma/ib_verbs.h>
> +#include <linux/sched/signal.h>
>
> struct ib_ucontext;
> struct ib_umem_odp;
> @@ -71,6 +72,16 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem)
> return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
> }
>
> +static inline bool ib_umem_check_rlimit_memlock(unsigned long value)
> +{
> + unsigned long lock_limit = rlimit(RLIMIT_MEMLOCK);
> +
> + if (lock_limit == RLIM_INFINITY || capable(CAP_IPC_LOCK))
> + return true;
> +
> + return value <= PFN_DOWN(lock_limit);
> +}
> +
> static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
> struct ib_umem *umem,
> unsigned long pgsz)
> --
> 2.39.3
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] IB: fix memlock limit handling code
2023-09-18 12:09 ` Leon Romanovsky
@ 2023-09-21 16:31 ` Maxim Samoylov
2023-09-21 16:49 ` Jason Gunthorpe
2023-09-27 17:53 ` Leon Romanovsky
0 siblings, 2 replies; 5+ messages in thread
From: Maxim Samoylov @ 2023-09-21 16:31 UTC (permalink / raw)
To: Leon Romanovsky
Cc: linux-rdma@vger.kernel.org, Jason Gunthorpe, Dennis Dalessandro,
Christian Benvenuti, Bernard Metzler, Vadim Fedorenko
On 18/09/2023 14:09, Leon Romanovsky wrote:
> On Fri, Sep 15, 2023 at 01:03:53PM -0700, Maxim Samoylov wrote:
>> This patch fixes handling for RLIM_INFINITY value uniformly across
>> the infiniband/rdma subsystem.
>>
>> Currently infinity constant is treated as actual limit
>> value, which can trigger unexpected ENOMEM errors in
>> corner-case configurations
>
> Can you please provide an example and why these corner cases are
> important?
>
Actually, I’ve come up with proposing this minor patch to avoid
confusion I got while investigating production case with
ib_reg_user_mr() returning ENOMEM for (presumably) no particular reason.
Along with that I came across some curious repro.
Consider the following code:
addr = mmap(... , PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, ... )
/* IB objects initialisation */
while (1) {
ibv_reg_mr_iova(pd, (void*)addr, LENGTH, (uint64_t)addr,
IBV_ACCESS_LOCAL_WRITE|IBV_ACCESS_REMOTE_WRITE|
IBV_ACCESS_REMOTE_READ|IBV_ACCESS_RELAXED_ORDERING);
}
This cycle can work almost eternally without triggering any errors
- until the kernel will run out of memory or we finally bail out after
comparison against thread memlock rlimit.
As far as I understand, this means we can continuously register the same
memory region for a single device over and over, bloating number of
per-device MRs. Don't know for sure if it's wrong, but
I assume it constitutes some at least logical pitfall.
Furthermore, it also bumps per-mm VmPin counter over and over without
increasing any other memory usage metric,
which is probably misguiding from the memory accounting perspective.
> BTW, The patch looks good to me, just need more information in commit message.
>
Thanks for your quick response!
And I apologise that my answer took so long.
> Thanks
>
>
>>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] IB: fix memlock limit handling code
2023-09-21 16:31 ` Maxim Samoylov
@ 2023-09-21 16:49 ` Jason Gunthorpe
2023-09-27 17:53 ` Leon Romanovsky
1 sibling, 0 replies; 5+ messages in thread
From: Jason Gunthorpe @ 2023-09-21 16:49 UTC (permalink / raw)
To: Maxim Samoylov
Cc: Leon Romanovsky, linux-rdma@vger.kernel.org, Dennis Dalessandro,
Christian Benvenuti, Bernard Metzler, Vadim Fedorenko
On Thu, Sep 21, 2023 at 04:31:44PM +0000, Maxim Samoylov wrote:
> Furthermore, it also bumps per-mm VmPin counter over and over without
> increasing any other memory usage metric,
> which is probably misguiding from the memory accounting perspective.
pinned memory accounting doesn't really work sanely in linux. There
was a whole dicussion about it that never concluded.
Jason
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] IB: fix memlock limit handling code
2023-09-21 16:31 ` Maxim Samoylov
2023-09-21 16:49 ` Jason Gunthorpe
@ 2023-09-27 17:53 ` Leon Romanovsky
1 sibling, 0 replies; 5+ messages in thread
From: Leon Romanovsky @ 2023-09-27 17:53 UTC (permalink / raw)
To: Maxim Samoylov
Cc: linux-rdma@vger.kernel.org, Jason Gunthorpe, Dennis Dalessandro,
Christian Benvenuti, Bernard Metzler, Vadim Fedorenko
On Thu, Sep 21, 2023 at 04:31:44PM +0000, Maxim Samoylov wrote:
> On 18/09/2023 14:09, Leon Romanovsky wrote:
> > On Fri, Sep 15, 2023 at 01:03:53PM -0700, Maxim Samoylov wrote:
> >> This patch fixes handling for RLIM_INFINITY value uniformly across
> >> the infiniband/rdma subsystem.
> >>
> >> Currently infinity constant is treated as actual limit
> >> value, which can trigger unexpected ENOMEM errors in
> >> corner-case configurations
> >
> > Can you please provide an example and why these corner cases are
> > important?
> >
>
> Actually, I’ve come up with proposing this minor patch to avoid
> confusion I got while investigating production case with
> ib_reg_user_mr() returning ENOMEM for (presumably) no particular reason.
>
> Along with that I came across some curious repro.
> Consider the following code:
>
>
> addr = mmap(... , PROT_READ | PROT_WRITE,
> MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, ... )
>
> /* IB objects initialisation */
>
> while (1) {
>
> ibv_reg_mr_iova(pd, (void*)addr, LENGTH, (uint64_t)addr,
> IBV_ACCESS_LOCAL_WRITE|IBV_ACCESS_REMOTE_WRITE|
> IBV_ACCESS_REMOTE_READ|IBV_ACCESS_RELAXED_ORDERING);
>
> }
>
>
> This cycle can work almost eternally without triggering any errors
> - until the kernel will run out of memory or we finally bail out after
> comparison against thread memlock rlimit.
>
> As far as I understand, this means we can continuously register the same
> memory region for a single device over and over, bloating number of
> per-device MRs. Don't know for sure if it's wrong, but
> I assume it constitutes some at least logical pitfall.
>
> Furthermore, it also bumps per-mm VmPin counter over and over without
> increasing any other memory usage metric,
> which is probably misguiding from the memory accounting perspective.
>
> > BTW, The patch looks good to me, just need more information in commit message.
> >
> Thanks for your quick response!
> And I apologise that my answer took so long.
Please improve your commit message, remove "fix" word as this patch
doesn't really fix anything and send v2.
Thanks
>
> > Thanks
> >
> >
> >>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-09-27 17:54 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-15 20:03 [PATCH] IB: fix memlock limit handling code Maxim Samoylov
2023-09-18 12:09 ` Leon Romanovsky
2023-09-21 16:31 ` Maxim Samoylov
2023-09-21 16:49 ` Jason Gunthorpe
2023-09-27 17:53 ` Leon Romanovsky
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).