From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, linux-block@vger.kernel.org
Cc: Caleb Sander Mateos <csander@purestorage.com>,
Ming Lei <ming.lei@redhat.com>
Subject: [PATCH v2 04/10] ublk: eliminate permanent pages[] array from struct ublk_buf
Date: Tue, 31 Mar 2026 23:31:55 +0800 [thread overview]
Message-ID: <20260331153207.3635125-5-ming.lei@redhat.com> (raw)
In-Reply-To: <20260331153207.3635125-1-ming.lei@redhat.com>
The pages[] array (kvmalloc'd, 8 bytes per page = 2MB for a 1GB buffer)
was stored permanently in struct ublk_buf but only needed during
pin_user_pages_fast() and maple tree construction. Since the maple tree
already stores PFN ranges via ublk_buf_range, struct page pointers can
be recovered via pfn_to_page() during unregistration.
Make pages[] a temporary allocation in ublk_ctrl_reg_buf(), freed
immediately after the maple tree is built. Rewrite __ublk_ctrl_unreg_buf()
to iterate the maple tree for matching buf_index entries, recovering
struct page pointers via pfn_to_page() and unpinning in batches of 32.
Simplify ublk_buf_erase_ranges() to iterate the maple tree by buf_index
instead of walking the now-removed pages[] array.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 87 +++++++++++++++++++++++++---------------
1 file changed, 55 insertions(+), 32 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index c2b9992503a4..2e475bdc54dd 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -296,7 +296,6 @@ struct ublk_queue {
/* Per-registered shared memory buffer */
struct ublk_buf {
- struct page **pages;
unsigned int nr_pages;
};
@@ -5261,27 +5260,25 @@ static void ublk_unquiesce_and_resume(struct gendisk *disk)
* coalescing consecutive PFNs into single range entries.
* Returns 0 on success, negative error with partial insertions unwound.
*/
-/* Erase coalesced PFN ranges from the maple tree for pages [0, nr_pages) */
-static void ublk_buf_erase_ranges(struct ublk_device *ub,
- struct ublk_buf *ubuf,
- unsigned long nr_pages)
+/* Erase coalesced PFN ranges from the maple tree matching buf_index */
+static void ublk_buf_erase_ranges(struct ublk_device *ub, int buf_index)
{
- unsigned long i;
-
- for (i = 0; i < nr_pages; ) {
- unsigned long pfn = page_to_pfn(ubuf->pages[i]);
- unsigned long start = i;
+ MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
+ struct ublk_buf_range *range;
- while (i + 1 < nr_pages &&
- page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1)
- i++;
- i++;
- kfree(mtree_erase(&ub->buf_tree, pfn));
+ mas_lock(&mas);
+ mas_for_each(&mas, range, ULONG_MAX) {
+ if (range->buf_index == buf_index) {
+ mas_erase(&mas);
+ kfree(range);
+ }
}
+ mas_unlock(&mas);
}
static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
- struct ublk_buf *ubuf, int index,
+ struct ublk_buf *ubuf,
+ struct page **pages, int index,
unsigned short flags)
{
unsigned long nr_pages = ubuf->nr_pages;
@@ -5289,13 +5286,13 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
int ret;
for (i = 0; i < nr_pages; ) {
- unsigned long pfn = page_to_pfn(ubuf->pages[i]);
+ unsigned long pfn = page_to_pfn(pages[i]);
unsigned long start = i;
struct ublk_buf_range *range;
/* Find run of consecutive PFNs */
while (i + 1 < nr_pages &&
- page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1)
+ page_to_pfn(pages[i + 1]) == pfn + (i - start) + 1)
i++;
i++; /* past the last page in this run */
@@ -5320,7 +5317,7 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
return 0;
unwind:
- ublk_buf_erase_ranges(ub, ubuf, i);
+ ublk_buf_erase_ranges(ub, index);
return ret;
}
@@ -5335,6 +5332,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_shmem_buf_reg buf_reg;
unsigned long addr, size, nr_pages;
+ struct page **pages = NULL;
unsigned int gup_flags;
struct gendisk *disk;
struct ublk_buf *ubuf;
@@ -5371,9 +5369,8 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
goto put_disk;
}
- ubuf->pages = kvmalloc_array(nr_pages, sizeof(*ubuf->pages),
- GFP_KERNEL);
- if (!ubuf->pages) {
+ pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages) {
ret = -ENOMEM;
goto err_free;
}
@@ -5382,7 +5379,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
if (!(buf_reg.flags & UBLK_SHMEM_BUF_READ_ONLY))
gup_flags |= FOLL_WRITE;
- pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, ubuf->pages);
+ pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, pages);
if (pinned < 0) {
ret = pinned;
goto err_free_pages;
@@ -5406,7 +5403,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
if (ret)
goto err_unlock;
- ret = __ublk_ctrl_reg_buf(ub, ubuf, index, buf_reg.flags);
+ ret = __ublk_ctrl_reg_buf(ub, ubuf, pages, index, buf_reg.flags);
if (ret) {
xa_erase(&ub->bufs_xa, index);
goto err_unlock;
@@ -5414,6 +5411,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
mutex_unlock(&ub->mutex);
+ kvfree(pages);
ublk_unquiesce_and_resume(disk);
ublk_put_disk(disk);
return index;
@@ -5422,9 +5420,9 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
mutex_unlock(&ub->mutex);
ublk_unquiesce_and_resume(disk);
err_unpin:
- unpin_user_pages(ubuf->pages, pinned);
+ unpin_user_pages(pages, pinned);
err_free_pages:
- kvfree(ubuf->pages);
+ kvfree(pages);
err_free:
kfree(ubuf);
put_disk:
@@ -5433,11 +5431,36 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
}
static void __ublk_ctrl_unreg_buf(struct ublk_device *ub,
- struct ublk_buf *ubuf)
+ struct ublk_buf *ubuf, int buf_index)
{
- ublk_buf_erase_ranges(ub, ubuf, ubuf->nr_pages);
- unpin_user_pages(ubuf->pages, ubuf->nr_pages);
- kvfree(ubuf->pages);
+ MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
+ struct ublk_buf_range *range;
+ struct page *pages[32];
+
+ mas_lock(&mas);
+ mas_for_each(&mas, range, ULONG_MAX) {
+ unsigned long base, nr, off;
+
+ if (range->buf_index != buf_index)
+ continue;
+
+ base = range->base_pfn;
+ nr = mas.last - mas.index + 1;
+ mas_erase(&mas);
+
+ for (off = 0; off < nr; ) {
+ unsigned int batch = min_t(unsigned long,
+ nr - off, 32);
+ unsigned int j;
+
+ for (j = 0; j < batch; j++)
+ pages[j] = pfn_to_page(base + off + j);
+ unpin_user_pages(pages, batch);
+ off += batch;
+ }
+ kfree(range);
+ }
+ mas_unlock(&mas);
kfree(ubuf);
}
@@ -5468,7 +5491,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
return -ENOENT;
}
- __ublk_ctrl_unreg_buf(ub, ubuf);
+ __ublk_ctrl_unreg_buf(ub, ubuf, index);
mutex_unlock(&ub->mutex);
@@ -5483,7 +5506,7 @@ static void ublk_buf_cleanup(struct ublk_device *ub)
unsigned long index;
xa_for_each(&ub->bufs_xa, index, ubuf)
- __ublk_ctrl_unreg_buf(ub, ubuf);
+ __ublk_ctrl_unreg_buf(ub, ubuf, index);
xa_destroy(&ub->bufs_xa);
mtree_destroy(&ub->buf_tree);
}
--
2.53.0
next prev parent reply other threads:[~2026-03-31 15:32 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-31 15:31 [PATCH v2 00/10] ublk: add shared memory zero-copy support Ming Lei
2026-03-31 15:31 ` [PATCH v2 01/10] ublk: add UBLK_U_CMD_REG_BUF/UNREG_BUF control commands Ming Lei
2026-04-07 19:35 ` Caleb Sander Mateos
2026-03-31 15:31 ` [PATCH v2 02/10] ublk: add PFN-based buffer matching in I/O path Ming Lei
2026-04-07 19:47 ` Caleb Sander Mateos
2026-03-31 15:31 ` [PATCH v2 03/10] ublk: enable UBLK_F_SHMEM_ZC feature flag Ming Lei
2026-04-07 19:47 ` Caleb Sander Mateos
2026-03-31 15:31 ` Ming Lei [this message]
2026-04-07 19:50 ` [PATCH v2 04/10] ublk: eliminate permanent pages[] array from struct ublk_buf Caleb Sander Mateos
2026-03-31 15:31 ` [PATCH v2 05/10] selftests/ublk: add shared memory zero-copy support in kublk Ming Lei
2026-03-31 15:31 ` [PATCH v2 06/10] selftests/ublk: add UBLK_F_SHMEM_ZC support for loop target Ming Lei
2026-03-31 15:31 ` [PATCH v2 07/10] selftests/ublk: add shared memory zero-copy test Ming Lei
2026-03-31 15:31 ` [PATCH v2 08/10] selftests/ublk: add hugetlbfs shmem_zc test for loop target Ming Lei
2026-03-31 15:32 ` [PATCH v2 09/10] selftests/ublk: add filesystem fio verify test for shmem_zc Ming Lei
2026-03-31 15:32 ` [PATCH v2 10/10] selftests/ublk: add read-only buffer registration test Ming Lei
2026-04-07 2:38 ` [PATCH v2 00/10] ublk: add shared memory zero-copy support Ming Lei
2026-04-07 13:34 ` Jens Axboe
2026-04-07 19:29 ` Caleb Sander Mateos
2026-04-07 13:44 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260331153207.3635125-5-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=linux-block@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox