From: Bernard Metzler <bernard.metzler@linux.dev>
To: rosenp@gmail.com
Cc: jgg@ziepe.ca, leon@kernel.org, kees@kernel.org,
gustavoars@kernel.org, linux-rdma@vger.kernel.org
Subject: Re: [RFC PATCH v3] RDMA/siw: use kzalloc_flex
Date: Wed, 13 May 2026 16:17:03 +0200 [thread overview]
Message-ID: <8226377c-0691-4368-bb82-48b620f784d2@linux.dev> (raw)
In-Reply-To: <20260511141149.52362-1-bernard.metzler@linux.dev>
Before bothering the list with v4,5,6 patches, I hereby
reach out for advise. Please see below my questions related
to recent findings of sashiko, which are really useful
and very relevant.
On 11.05.2026 16:11, bernard.metzler@linux.dev wrote:
> From: Bernard Metzler <bernard.metzler@linux.dev>
>
> Simplify umem allocation by using flexible array member.
> Add __counted_by to get extra runtime analysis.
>
> Suggested-by: Rosen Penev <rosenp@gmail.com>
> Signed-off-by: Bernard Metzler <bernard.metzler@linux.dev>
>
> ---
> v3:
> Considering comments from sashiko.dev to v2:
> - To minimize memory footprint of user page list, revert array
> allocation to original kzalloc_objs(),
> - Revert siw_get_upage() to original bounds check.
>
> v2:
> 1. Considering comments from sashiko.dev to original patch regarding:
> - avoiding allocation of extra empty page chunk entry,
> if number of pages is an exact multiplier
> of PAGES_PER_CHUNK,
> - fix potential signed index overflow case for
> page list generation and access,
> - restrict page list access in siw_get_upage() to
> allocated length.
>
> 2. Extend flexible array allocation to page list.
>
> 3. Remove useless PAGE_CHUNK_SIZE definition.
> ---
> drivers/infiniband/sw/siw/siw.h | 5 +--
> drivers/infiniband/sw/siw/siw_mem.c | 54 ++++++++++++++---------------
> drivers/infiniband/sw/siw/siw_mem.h | 3 +-
> 3 files changed, 30 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
> index f5fd71717b80..dbc998248b1e 100644
> --- a/drivers/infiniband/sw/siw/siw.h
> +++ b/drivers/infiniband/sw/siw/siw.h
> @@ -119,9 +119,10 @@ struct siw_page_chunk {
>
> struct siw_umem {
> struct ib_umem *base_mem;
> - struct siw_page_chunk *page_chunk;
> - int num_pages;
> + unsigned int num_pages;
> + unsigned int num_chunks;
> u64 fp_addr; /* First page base address */
> + struct siw_page_chunk page_chunk[] __counted_by(num_chunks);
> };
>
> struct siw_pble {
> diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
> index 98c802b3ed72..88ec3cacfa00 100644
> --- a/drivers/infiniband/sw/siw/siw_mem.c
> +++ b/drivers/infiniband/sw/siw/siw_mem.c
> @@ -41,16 +41,14 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
>
> void siw_umem_release(struct siw_umem *umem)
> {
> - int i, num_pages = umem->num_pages;
> + unsigned int i, num_chunks = umem->num_chunks;
>
> if (umem->base_mem)
> ib_umem_release(umem->base_mem);
>
> - for (i = 0; num_pages > 0; i++) {
> + for (i = 0; i < num_chunks; i++)
> kfree(umem->page_chunk[i].plist);
> - num_pages -= PAGES_PER_CHUNK;
> - }
> - kfree(umem->page_chunk);
> +
> kfree(umem);
> }
>
> @@ -188,7 +186,7 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
> * lookup is being done and mem is not released it check fails.
> */
> int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
> - enum ib_access_flags perms, u32 off, int len)
> + enum ib_access_flags perms, u32 off, u32 len)
> {
> struct siw_device *sdev = to_siw_dev(pd->device);
> struct siw_mem *new = NULL;
> @@ -338,25 +336,20 @@ struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
> struct sg_page_iter sg_iter;
> struct sg_table *sgt;
> u64 first_page_va;
> - int num_pages, num_chunks, i, rv = 0;
> + unsigned int num_pages, num_chunks, i;
> + int rv = 0;
>
> if (!len)
> return ERR_PTR(-EINVAL);
>
> first_page_va = start & PAGE_MASK;
> num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
> - num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
> + num_chunks = ((num_pages - 1) >> CHUNK_SHIFT) + 1;
>
> - umem = kzalloc_obj(*umem);
> + umem = kzalloc_flex(*umem, page_chunk, num_chunks);
> if (!umem)
> return ERR_PTR(-ENOMEM);
>
I got the following comment from sashiko:
"Does umem->num_chunks need to be explicitly initialized here?"
sashiko correctly points out that only newer compilers do that
initialization (I checked: only supported since gcc >= 15,
clang >= 19).
The patch intentionally does not set that value, since I felt
old compilers will not be used for v7 kernels. Shall we better
do the redundant assignment? This maybe is a general question for
the new alloc_flex api.
> - umem->page_chunk =
> - kzalloc_objs(struct siw_page_chunk, num_chunks);
> - if (!umem->page_chunk) {
> - rv = -ENOMEM;
> - goto err_out;
> - }
> base_mem = ib_umem_get(base_dev, start, len, rights);
> if (IS_ERR(base_mem)) {
> rv = PTR_ERR(base_mem);
> @@ -365,33 +358,38 @@ struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
> }
> umem->fp_addr = first_page_va;
> umem->base_mem = base_mem;
> + umem->num_pages = num_pages;
>
> sgt = &base_mem->sgt_append.sgt;
> __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
>
> - if (!__sg_page_iter_next(&sg_iter)) {
> - rv = -EINVAL;
> - goto err_out;
> - }
> - for (i = 0; num_pages > 0; i++) {
> - int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
> - struct page **plist =
> - kzalloc_objs(struct page *, nents);
> + for (i = 0; i < num_chunks; i++) {
> + struct page **plist;
> + unsigned int pix, nents = min(num_pages, PAGES_PER_CHUNK);
>
> + plist = kzalloc_objs(struct page *, nents);
> if (!plist) {
> rv = -ENOMEM;
> goto err_out;
> }
> umem->page_chunk[i].plist = plist;
> - while (nents--) {
> - *plist = sg_page_iter_page(&sg_iter);
> - umem->num_pages++;
> - num_pages--;
> - plist++;
> +
> + for (pix = 0; pix < nents; pix++) {
> if (!__sg_page_iter_next(&sg_iter))
> break;
> + plist[pix] = sg_page_iter_page(&sg_iter);
> + num_pages--;
> }
> }
> + if (unlikely(num_pages)) {
> + /*
> + * Unexpected size of sg list provided by ib_umem_get()
> + */
> + siw_dbg(base_dev, "Short SG list, missing %u pages\n",
> + num_pages);
> + rv = -EINVAL;
> + goto err_out;
> + }
> return umem;
> err_out:
> siw_umem_release(umem);
> diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h
> index 8e769d30e2ac..2dff6640da5f 100644
> --- a/drivers/infiniband/sw/siw/siw_mem.h
> +++ b/drivers/infiniband/sw/siw/siw_mem.h
> @@ -17,7 +17,7 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
> enum ib_access_flags perms, int len);
> int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge,
> struct siw_mem *mem[], enum ib_access_flags perms,
> - u32 off, int len);
> + u32 off, u32 len);
Sashiko says:
"The parameter len in siw_check_sge() is changed to u32, but it looks like
siw_check_mem() still takes an int for its len parameter."
This is correct, but shall I correct code not related to that
kzalloc_flex patch? I propose sending an extra patch fixing that
broken signed buffer access len/offset treatment all over driver code
in an extra patch. It is indeed broken and a great finding, but I'd
like to keep things separately.
Thanks very much!
Bernard.
> void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op);
> int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
> u64 start, u64 len, int rights);
> @@ -45,7 +45,6 @@ static inline void siw_unref_mem_sgl(struct siw_mem **mem, unsigned int num_sge)
> #define CHUNK_SHIFT 9 /* sets number of pages per chunk */
> #define PAGES_PER_CHUNK (_AC(1, UL) << CHUNK_SHIFT)
> #define CHUNK_MASK (~(PAGES_PER_CHUNK - 1))
> -#define PAGE_CHUNK_SIZE (PAGES_PER_CHUNK * sizeof(struct page *))
>
> /*
> * siw_get_upage()
next prev parent reply other threads:[~2026-05-13 14:17 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-11 14:11 [RFC PATCH v3] RDMA/siw: use kzalloc_flex bernard.metzler
2026-05-13 14:17 ` Bernard Metzler [this message]
2026-05-13 14:28 ` Jason Gunthorpe
2026-05-14 16:12 ` Kees Cook
2026-05-13 17:45 ` Leon Romanovsky
2026-05-14 13:14 ` Bernard Metzler
2026-05-17 14:52 ` Leon Romanovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8226377c-0691-4368-bb82-48b620f784d2@linux.dev \
--to=bernard.metzler@linux.dev \
--cc=gustavoars@kernel.org \
--cc=jgg@ziepe.ca \
--cc=kees@kernel.org \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=rosenp@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.