From: Bernard Metzler <bernard.metzler@linux.dev>
To: rosenp@gmail.com
Cc: jgg@ziepe.ca, leon@kernel.org, kees@kernel.org,
gustavoars@kernel.org, linux-rdma@vger.kernel.org
Subject: Re: [RFC PATCH v3] RDMA/siw: use kzalloc_flex
Date: Wed, 13 May 2026 16:17:03 +0200 [thread overview]
Message-ID: <8226377c-0691-4368-bb82-48b620f784d2@linux.dev> (raw)
In-Reply-To: <20260511141149.52362-1-bernard.metzler@linux.dev>
Before bothering the list with v4,5,6 patches, I hereby
reach out for advise. Please see below my questions related
to recent findings of sashiko, which are really useful
and very relevant.
On 11.05.2026 16:11, bernard.metzler@linux.dev wrote:
> From: Bernard Metzler <bernard.metzler@linux.dev>
>
> Simplify umem allocation by using flexible array member.
> Add __counted_by to get extra runtime analysis.
>
> Suggested-by: Rosen Penev <rosenp@gmail.com>
> Signed-off-by: Bernard Metzler <bernard.metzler@linux.dev>
>
> ---
> v3:
> Considering comments from sashiko.dev to v2:
> - To minimize memory footprint of user page list, revert array
> allocation to original kzalloc_objs(),
> - Revert siw_get_upage() to original bounds check.
>
> v2:
> 1. Considering comments from sashiko.dev to original patch regarding:
> - avoiding allocation of extra empty page chunk entry,
> if number of pages is an exact multiplier
> of PAGES_PER_CHUNK,
> - fix potential signed index overflow case for
> page list generation and access,
> - restrict page list access in siw_get_upage() to
> allocated length.
>
> 2. Extend flexible array allocation to page list.
>
> 3. Remove useless PAGE_CHUNK_SIZE definition.
> ---
> drivers/infiniband/sw/siw/siw.h | 5 +--
> drivers/infiniband/sw/siw/siw_mem.c | 54 ++++++++++++++---------------
> drivers/infiniband/sw/siw/siw_mem.h | 3 +-
> 3 files changed, 30 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
> index f5fd71717b80..dbc998248b1e 100644
> --- a/drivers/infiniband/sw/siw/siw.h
> +++ b/drivers/infiniband/sw/siw/siw.h
> @@ -119,9 +119,10 @@ struct siw_page_chunk {
>
> struct siw_umem {
> struct ib_umem *base_mem;
> - struct siw_page_chunk *page_chunk;
> - int num_pages;
> + unsigned int num_pages;
> + unsigned int num_chunks;
> u64 fp_addr; /* First page base address */
> + struct siw_page_chunk page_chunk[] __counted_by(num_chunks);
> };
>
> struct siw_pble {
> diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
> index 98c802b3ed72..88ec3cacfa00 100644
> --- a/drivers/infiniband/sw/siw/siw_mem.c
> +++ b/drivers/infiniband/sw/siw/siw_mem.c
> @@ -41,16 +41,14 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
>
> void siw_umem_release(struct siw_umem *umem)
> {
> - int i, num_pages = umem->num_pages;
> + unsigned int i, num_chunks = umem->num_chunks;
>
> if (umem->base_mem)
> ib_umem_release(umem->base_mem);
>
> - for (i = 0; num_pages > 0; i++) {
> + for (i = 0; i < num_chunks; i++)
> kfree(umem->page_chunk[i].plist);
> - num_pages -= PAGES_PER_CHUNK;
> - }
> - kfree(umem->page_chunk);
> +
> kfree(umem);
> }
>
> @@ -188,7 +186,7 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
> * lookup is being done and mem is not released it check fails.
> */
> int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
> - enum ib_access_flags perms, u32 off, int len)
> + enum ib_access_flags perms, u32 off, u32 len)
> {
> struct siw_device *sdev = to_siw_dev(pd->device);
> struct siw_mem *new = NULL;
> @@ -338,25 +336,20 @@ struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
> struct sg_page_iter sg_iter;
> struct sg_table *sgt;
> u64 first_page_va;
> - int num_pages, num_chunks, i, rv = 0;
> + unsigned int num_pages, num_chunks, i;
> + int rv = 0;
>
> if (!len)
> return ERR_PTR(-EINVAL);
>
> first_page_va = start & PAGE_MASK;
> num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
> - num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
> + num_chunks = ((num_pages - 1) >> CHUNK_SHIFT) + 1;
>
> - umem = kzalloc_obj(*umem);
> + umem = kzalloc_flex(*umem, page_chunk, num_chunks);
> if (!umem)
> return ERR_PTR(-ENOMEM);
>
I got the following comment from sashiko:
"Does umem->num_chunks need to be explicitly initialized here?"
sashiko correctly points out that only newer compilers do that
initialization (I checked: only supported since gcc >= 15,
clang >= 19).
The patch intentionally does not set that value, since I felt
old compilers will not be used for v7 kernels. Shall we better
do the redundant assignment? This maybe is a general question for
the new alloc_flex api.
> - umem->page_chunk =
> - kzalloc_objs(struct siw_page_chunk, num_chunks);
> - if (!umem->page_chunk) {
> - rv = -ENOMEM;
> - goto err_out;
> - }
> base_mem = ib_umem_get(base_dev, start, len, rights);
> if (IS_ERR(base_mem)) {
> rv = PTR_ERR(base_mem);
> @@ -365,33 +358,38 @@ struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
> }
> umem->fp_addr = first_page_va;
> umem->base_mem = base_mem;
> + umem->num_pages = num_pages;
>
> sgt = &base_mem->sgt_append.sgt;
> __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
>
> - if (!__sg_page_iter_next(&sg_iter)) {
> - rv = -EINVAL;
> - goto err_out;
> - }
> - for (i = 0; num_pages > 0; i++) {
> - int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
> - struct page **plist =
> - kzalloc_objs(struct page *, nents);
> + for (i = 0; i < num_chunks; i++) {
> + struct page **plist;
> + unsigned int pix, nents = min(num_pages, PAGES_PER_CHUNK);
>
> + plist = kzalloc_objs(struct page *, nents);
> if (!plist) {
> rv = -ENOMEM;
> goto err_out;
> }
> umem->page_chunk[i].plist = plist;
> - while (nents--) {
> - *plist = sg_page_iter_page(&sg_iter);
> - umem->num_pages++;
> - num_pages--;
> - plist++;
> +
> + for (pix = 0; pix < nents; pix++) {
> if (!__sg_page_iter_next(&sg_iter))
> break;
> + plist[pix] = sg_page_iter_page(&sg_iter);
> + num_pages--;
> }
> }
> + if (unlikely(num_pages)) {
> + /*
> + * Unexpected size of sg list provided by ib_umem_get()
> + */
> + siw_dbg(base_dev, "Short SG list, missing %u pages\n",
> + num_pages);
> + rv = -EINVAL;
> + goto err_out;
> + }
> return umem;
> err_out:
> siw_umem_release(umem);
> diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h
> index 8e769d30e2ac..2dff6640da5f 100644
> --- a/drivers/infiniband/sw/siw/siw_mem.h
> +++ b/drivers/infiniband/sw/siw/siw_mem.h
> @@ -17,7 +17,7 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
> enum ib_access_flags perms, int len);
> int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge,
> struct siw_mem *mem[], enum ib_access_flags perms,
> - u32 off, int len);
> + u32 off, u32 len);
Sashiko says:
"The parameter len in siw_check_sge() is changed to u32, but it looks like
siw_check_mem() still takes an int for its len parameter."
This is correct, but shall I correct code not related to that
kzalloc_flex patch? I propose sending an extra patch fixing that
broken signed buffer access len/offset treatment all over driver code
in an extra patch. It is indeed broken and a great finding, but I'd
like to keep things separately.
Thanks very much!
Bernard.
> void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op);
> int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
> u64 start, u64 len, int rights);
> @@ -45,7 +45,6 @@ static inline void siw_unref_mem_sgl(struct siw_mem **mem, unsigned int num_sge)
> #define CHUNK_SHIFT 9 /* sets number of pages per chunk */
> #define PAGES_PER_CHUNK (_AC(1, UL) << CHUNK_SHIFT)
> #define CHUNK_MASK (~(PAGES_PER_CHUNK - 1))
> -#define PAGE_CHUNK_SIZE (PAGES_PER_CHUNK * sizeof(struct page *))
>
> /*
> * siw_get_upage()
next prev parent reply other threads:[~2026-05-13 14:17 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-11 14:11 [RFC PATCH v3] RDMA/siw: use kzalloc_flex bernard.metzler
2026-05-13 14:17 ` Bernard Metzler [this message]
2026-05-13 14:28 ` Jason Gunthorpe
2026-05-14 16:12 ` Kees Cook
2026-05-13 17:45 ` Leon Romanovsky
2026-05-14 13:14 ` Bernard Metzler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8226377c-0691-4368-bb82-48b620f784d2@linux.dev \
--to=bernard.metzler@linux.dev \
--cc=gustavoars@kernel.org \
--cc=jgg@ziepe.ca \
--cc=kees@kernel.org \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=rosenp@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox