* [PATCH v3 1/2] ublk: use copy_{to,from}_iter() for user copy
2025-11-06 17:16 [PATCH v3 0/2] ublk: simplify user copy Caleb Sander Mateos
@ 2025-11-06 17:16 ` Caleb Sander Mateos
2025-11-06 17:16 ` [PATCH v3 2/2] ublk: use rq_for_each_segment() " Caleb Sander Mateos
2025-11-06 23:30 ` [PATCH v3 0/2] ublk: simplify " Jens Axboe
2 siblings, 0 replies; 5+ messages in thread
From: Caleb Sander Mateos @ 2025-11-06 17:16 UTC (permalink / raw)
To: Ming Lei, Jens Axboe; +Cc: linux-block, linux-kernel, Caleb Sander Mateos
ublk_copy_user_pages()/ublk_copy_io_pages() currently uses
iov_iter_get_pages2() to extract the pages from the iov_iter and
memcpy()s between the bvec_iter and the iov_iter's pages one at a time.
Switch to using copy_to_iter()/copy_from_iter() instead. This avoids the
user page reference count increments and decrements and needing to split
the memcpy() at user page boundaries. It also simplifies the code
considerably.
Ming reports a 40% throughput improvement when issuing I/O to the
selftests null ublk server with zero-copy disabled.
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 62 +++++++++-------------------------------
1 file changed, 14 insertions(+), 48 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 97cc4bc0a6ce..40eee3e15a4c 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -911,58 +911,47 @@ static const struct block_device_operations ub_fops = {
.open = ublk_open,
.free_disk = ublk_free_disk,
.report_zones = ublk_report_zones,
};
-#define UBLK_MAX_PIN_PAGES 32
-
struct ublk_io_iter {
- struct page *pages[UBLK_MAX_PIN_PAGES];
struct bio *bio;
struct bvec_iter iter;
};
-/* return how many pages are copied */
-static void ublk_copy_io_pages(struct ublk_io_iter *data,
- size_t total, size_t pg_off, int dir)
+/* return how many bytes are copied */
+static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
+ struct iov_iter *uiter, int dir)
{
- unsigned done = 0;
- unsigned pg_idx = 0;
+ size_t done = 0;
- while (done < total) {
+ for (;;) {
struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
- unsigned int bytes = min3(bv.bv_len, (unsigned)total - done,
- (unsigned)(PAGE_SIZE - pg_off));
void *bv_buf = bvec_kmap_local(&bv);
- void *pg_buf = kmap_local_page(data->pages[pg_idx]);
+ size_t copied;
if (dir == ITER_DEST)
- memcpy(pg_buf + pg_off, bv_buf, bytes);
+ copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
else
- memcpy(bv_buf, pg_buf + pg_off, bytes);
+ copied = copy_from_iter(bv_buf, bv.bv_len, uiter);
- kunmap_local(pg_buf);
kunmap_local(bv_buf);
- /* advance page array */
- pg_off += bytes;
- if (pg_off == PAGE_SIZE) {
- pg_idx += 1;
- pg_off = 0;
- }
-
- done += bytes;
+ done += copied;
+ if (copied < bv.bv_len)
+ break;
/* advance bio */
- bio_advance_iter_single(data->bio, &data->iter, bytes);
+ bio_advance_iter_single(data->bio, &data->iter, copied);
if (!data->iter.bi_size) {
data->bio = data->bio->bi_next;
if (data->bio == NULL)
break;
data->iter = data->bio->bi_iter;
}
}
+ return done;
}
static bool ublk_advance_io_iter(const struct request *req,
struct ublk_io_iter *iter, unsigned int offset)
{
@@ -986,38 +975,15 @@ static bool ublk_advance_io_iter(const struct request *req,
*/
static size_t ublk_copy_user_pages(const struct request *req,
unsigned offset, struct iov_iter *uiter, int dir)
{
struct ublk_io_iter iter;
- size_t done = 0;
if (!ublk_advance_io_iter(req, &iter, offset))
return 0;
- while (iov_iter_count(uiter) && iter.bio) {
- unsigned nr_pages;
- ssize_t len;
- size_t off;
- int i;
-
- len = iov_iter_get_pages2(uiter, iter.pages,
- iov_iter_count(uiter),
- UBLK_MAX_PIN_PAGES, &off);
- if (len <= 0)
- return done;
-
- ublk_copy_io_pages(&iter, len, off, dir);
- nr_pages = DIV_ROUND_UP(len + off, PAGE_SIZE);
- for (i = 0; i < nr_pages; i++) {
- if (dir == ITER_DEST)
- set_page_dirty(iter.pages[i]);
- put_page(iter.pages[i]);
- }
- done += len;
- }
-
- return done;
+ return ublk_copy_io_pages(&iter, uiter, dir);
}
static inline bool ublk_need_map_req(const struct request *req)
{
return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE;
--
2.45.2
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v3 2/2] ublk: use rq_for_each_segment() for user copy
2025-11-06 17:16 [PATCH v3 0/2] ublk: simplify user copy Caleb Sander Mateos
2025-11-06 17:16 ` [PATCH v3 1/2] ublk: use copy_{to,from}_iter() for " Caleb Sander Mateos
@ 2025-11-06 17:16 ` Caleb Sander Mateos
2025-11-07 0:10 ` Ming Lei
2025-11-06 23:30 ` [PATCH v3 0/2] ublk: simplify " Jens Axboe
2 siblings, 1 reply; 5+ messages in thread
From: Caleb Sander Mateos @ 2025-11-06 17:16 UTC (permalink / raw)
To: Ming Lei, Jens Axboe; +Cc: linux-block, linux-kernel, Caleb Sander Mateos
ublk_advance_io_iter() and ublk_copy_io_pages() currently open-code the
iteration over the request's bvecs. Switch to the rq_for_each_segment()
macro provided by blk-mq to avoid reaching into the bio internals and
simplify the code.
Suggested-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
---
drivers/block/ublk_drv.c | 70 +++++++++++-----------------------------
1 file changed, 19 insertions(+), 51 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 40eee3e15a4c..5cf288809226 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -911,26 +911,33 @@ static const struct block_device_operations ub_fops = {
.open = ublk_open,
.free_disk = ublk_free_disk,
.report_zones = ublk_report_zones,
};
-struct ublk_io_iter {
- struct bio *bio;
- struct bvec_iter iter;
-};
-
-/* return how many bytes are copied */
-static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
- struct iov_iter *uiter, int dir)
+/*
+ * Copy data between request pages and io_iter, and 'offset'
+ * is the start point of linear offset of request.
+ */
+static size_t ublk_copy_user_pages(const struct request *req,
+ unsigned offset, struct iov_iter *uiter, int dir)
{
+ struct req_iterator iter;
+ struct bio_vec bv;
size_t done = 0;
- for (;;) {
- struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
- void *bv_buf = bvec_kmap_local(&bv);
+ rq_for_each_segment(bv, req, iter) {
+ void *bv_buf;
size_t copied;
+ if (offset >= bv.bv_len) {
+ offset -= bv.bv_len;
+ continue;
+ }
+
+ bv.bv_offset += offset;
+ bv.bv_len -= offset;
+ bv_buf = bvec_kmap_local(&bv);
if (dir == ITER_DEST)
copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
else
copied = copy_from_iter(bv_buf, bv.bv_len, uiter);
@@ -938,54 +945,15 @@ static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
done += copied;
if (copied < bv.bv_len)
break;
- /* advance bio */
- bio_advance_iter_single(data->bio, &data->iter, copied);
- if (!data->iter.bi_size) {
- data->bio = data->bio->bi_next;
- if (data->bio == NULL)
- break;
- data->iter = data->bio->bi_iter;
- }
+ offset = 0;
}
return done;
}
-static bool ublk_advance_io_iter(const struct request *req,
- struct ublk_io_iter *iter, unsigned int offset)
-{
- struct bio *bio = req->bio;
-
- for_each_bio(bio) {
- if (bio->bi_iter.bi_size > offset) {
- iter->bio = bio;
- iter->iter = bio->bi_iter;
- bio_advance_iter(iter->bio, &iter->iter, offset);
- return true;
- }
- offset -= bio->bi_iter.bi_size;
- }
- return false;
-}
-
-/*
- * Copy data between request pages and io_iter, and 'offset'
- * is the start point of linear offset of request.
- */
-static size_t ublk_copy_user_pages(const struct request *req,
- unsigned offset, struct iov_iter *uiter, int dir)
-{
- struct ublk_io_iter iter;
-
- if (!ublk_advance_io_iter(req, &iter, offset))
- return 0;
-
- return ublk_copy_io_pages(&iter, uiter, dir);
-}
-
static inline bool ublk_need_map_req(const struct request *req)
{
return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH v3 2/2] ublk: use rq_for_each_segment() for user copy
2025-11-06 17:16 ` [PATCH v3 2/2] ublk: use rq_for_each_segment() " Caleb Sander Mateos
@ 2025-11-07 0:10 ` Ming Lei
0 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2025-11-07 0:10 UTC (permalink / raw)
To: Caleb Sander Mateos; +Cc: Jens Axboe, linux-block, linux-kernel
On Thu, Nov 06, 2025 at 10:16:47AM -0700, Caleb Sander Mateos wrote:
> ublk_advance_io_iter() and ublk_copy_io_pages() currently open-code the
> iteration over the request's bvecs. Switch to the rq_for_each_segment()
> macro provided by blk-mq to avoid reaching into the bio internals and
> simplify the code.
>
> Suggested-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Nice cleanup:
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks,
Ming
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 0/2] ublk: simplify user copy
2025-11-06 17:16 [PATCH v3 0/2] ublk: simplify user copy Caleb Sander Mateos
2025-11-06 17:16 ` [PATCH v3 1/2] ublk: use copy_{to,from}_iter() for " Caleb Sander Mateos
2025-11-06 17:16 ` [PATCH v3 2/2] ublk: use rq_for_each_segment() " Caleb Sander Mateos
@ 2025-11-06 23:30 ` Jens Axboe
2 siblings, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2025-11-06 23:30 UTC (permalink / raw)
To: Ming Lei, Caleb Sander Mateos; +Cc: linux-block, linux-kernel
On Thu, 06 Nov 2025 10:16:45 -0700, Caleb Sander Mateos wrote:
> Use copy_page_{to,from}_user() and rq_for_each_segment() to simplify the
> implementation of ublk_copy_user_pages(). Avoiding the page pinning and
> unpinning saves expensive atomic increments and decrements of the page
> reference counts. And copying via user virtual addresses avoids needing
> to split the copy at user page boundaries. Ming reports a 40% throughput
> improvement when issuing I/O to the selftests null ublk server with
> zero-copy disabled.
>
> [...]
Applied, thanks!
[1/2] ublk: use copy_{to,from}_iter() for user copy
commit: 2299ceec364eecdc0a5b4ec80c757551d130389c
[2/2] ublk: use rq_for_each_segment() for user copy
commit: e87d66ab27ac89494b75ddc3fed697b5aa8417f1
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 5+ messages in thread