* [PATCH v2 0/2] ublk: simplify user copy
@ 2025-11-05 20:28 Caleb Sander Mateos
2025-11-05 20:28 ` [PATCH v2 1/2] ublk: use copy_{to,from}_iter() for " Caleb Sander Mateos
2025-11-05 20:28 ` [PATCH v2 2/2] ublk: use rq_for_each_bvec() " Caleb Sander Mateos
0 siblings, 2 replies; 6+ messages in thread
From: Caleb Sander Mateos @ 2025-11-05 20:28 UTC (permalink / raw)
To: Ming Lei, Jens Axboe; +Cc: linux-block, linux-kernel, Caleb Sander Mateos
Use copy_page_{to,from}_user() and rq_for_each_bvec() to simplify the
implementation of ublk_copy_user_pages(). Avoiding the page pinning and
unpinning saves expensive atomic increments and decrements of the page
reference counts. And copying via user virtual addresses avoids needing
to split the copy at user page boundaries. Ming reports a 40% throughput
improvement when issuing I/O to the selftests null ublk server with
zero-copy disabled.
v2:
- Use rq_for_each_bvec() to further simplify the code (Ming)
- Add performance measurements from Ming
Caleb Sander Mateos (2):
ublk: use copy_{to,from}_iter() for user copy
ublk: use rq_for_each_bvec() for user copy
drivers/block/ublk_drv.c | 113 ++++++++-------------------------------
1 file changed, 23 insertions(+), 90 deletions(-)
--
2.45.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v2 1/2] ublk: use copy_{to,from}_iter() for user copy
2025-11-05 20:28 [PATCH v2 0/2] ublk: simplify user copy Caleb Sander Mateos
@ 2025-11-05 20:28 ` Caleb Sander Mateos
2025-11-06 4:22 ` Ming Lei
2025-11-05 20:28 ` [PATCH v2 2/2] ublk: use rq_for_each_bvec() " Caleb Sander Mateos
1 sibling, 1 reply; 6+ messages in thread
From: Caleb Sander Mateos @ 2025-11-05 20:28 UTC (permalink / raw)
To: Ming Lei, Jens Axboe; +Cc: linux-block, linux-kernel, Caleb Sander Mateos
ublk_copy_user_pages()/ublk_copy_io_pages() currently uses
iov_iter_get_pages2() to extract the pages from the iov_iter and
memcpy()s between the bvec_iter and the iov_iter's pages one at a time.
Switch to using copy_to_iter()/copy_from_iter() instead. This avoids the
user page reference count increments and decrements and needing to split
the memcpy() at user page boundaries. It also simplifies the code
considerably.
Ming reports a 40% throughput improvement when issuing I/O to the
selftests null ublk server with zero-copy disabled.
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
---
drivers/block/ublk_drv.c | 62 +++++++++-------------------------------
1 file changed, 14 insertions(+), 48 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 97cc4bc0a6ce..40eee3e15a4c 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -911,58 +911,47 @@ static const struct block_device_operations ub_fops = {
.open = ublk_open,
.free_disk = ublk_free_disk,
.report_zones = ublk_report_zones,
};
-#define UBLK_MAX_PIN_PAGES 32
-
struct ublk_io_iter {
- struct page *pages[UBLK_MAX_PIN_PAGES];
struct bio *bio;
struct bvec_iter iter;
};
-/* return how many pages are copied */
-static void ublk_copy_io_pages(struct ublk_io_iter *data,
- size_t total, size_t pg_off, int dir)
+/* return how many bytes are copied */
+static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
+ struct iov_iter *uiter, int dir)
{
- unsigned done = 0;
- unsigned pg_idx = 0;
+ size_t done = 0;
- while (done < total) {
+ for (;;) {
struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
- unsigned int bytes = min3(bv.bv_len, (unsigned)total - done,
- (unsigned)(PAGE_SIZE - pg_off));
void *bv_buf = bvec_kmap_local(&bv);
- void *pg_buf = kmap_local_page(data->pages[pg_idx]);
+ size_t copied;
if (dir == ITER_DEST)
- memcpy(pg_buf + pg_off, bv_buf, bytes);
+ copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
else
- memcpy(bv_buf, pg_buf + pg_off, bytes);
+ copied = copy_from_iter(bv_buf, bv.bv_len, uiter);
- kunmap_local(pg_buf);
kunmap_local(bv_buf);
- /* advance page array */
- pg_off += bytes;
- if (pg_off == PAGE_SIZE) {
- pg_idx += 1;
- pg_off = 0;
- }
-
- done += bytes;
+ done += copied;
+ if (copied < bv.bv_len)
+ break;
/* advance bio */
- bio_advance_iter_single(data->bio, &data->iter, bytes);
+ bio_advance_iter_single(data->bio, &data->iter, copied);
if (!data->iter.bi_size) {
data->bio = data->bio->bi_next;
if (data->bio == NULL)
break;
data->iter = data->bio->bi_iter;
}
}
+ return done;
}
static bool ublk_advance_io_iter(const struct request *req,
struct ublk_io_iter *iter, unsigned int offset)
{
@@ -986,38 +975,15 @@ static bool ublk_advance_io_iter(const struct request *req,
*/
static size_t ublk_copy_user_pages(const struct request *req,
unsigned offset, struct iov_iter *uiter, int dir)
{
struct ublk_io_iter iter;
- size_t done = 0;
if (!ublk_advance_io_iter(req, &iter, offset))
return 0;
- while (iov_iter_count(uiter) && iter.bio) {
- unsigned nr_pages;
- ssize_t len;
- size_t off;
- int i;
-
- len = iov_iter_get_pages2(uiter, iter.pages,
- iov_iter_count(uiter),
- UBLK_MAX_PIN_PAGES, &off);
- if (len <= 0)
- return done;
-
- ublk_copy_io_pages(&iter, len, off, dir);
- nr_pages = DIV_ROUND_UP(len + off, PAGE_SIZE);
- for (i = 0; i < nr_pages; i++) {
- if (dir == ITER_DEST)
- set_page_dirty(iter.pages[i]);
- put_page(iter.pages[i]);
- }
- done += len;
- }
-
- return done;
+ return ublk_copy_io_pages(&iter, uiter, dir);
}
static inline bool ublk_need_map_req(const struct request *req)
{
return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE;
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 2/2] ublk: use rq_for_each_bvec() for user copy
2025-11-05 20:28 [PATCH v2 0/2] ublk: simplify user copy Caleb Sander Mateos
2025-11-05 20:28 ` [PATCH v2 1/2] ublk: use copy_{to,from}_iter() for " Caleb Sander Mateos
@ 2025-11-05 20:28 ` Caleb Sander Mateos
2025-11-06 4:27 ` Ming Lei
1 sibling, 1 reply; 6+ messages in thread
From: Caleb Sander Mateos @ 2025-11-05 20:28 UTC (permalink / raw)
To: Ming Lei, Jens Axboe; +Cc: linux-block, linux-kernel, Caleb Sander Mateos
ublk_advance_io_iter() and ublk_copy_io_pages() currently open-code the
iteration over request's bvecs. Switch to the rq_for_each_bvec() macro
provided by blk-mq to avoid reaching into the bio internals and simplify
the code. Unlike bio_iter_iovec(), rq_for_each_bvec() can return
multi-page bvecs. So switch from copy_{to,from}_iter() to
copy_page_{to,from}_iter() to map and copy each page in the bvec.
Suggested-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
---
drivers/block/ublk_drv.c | 78 ++++++++++++----------------------------
1 file changed, 23 insertions(+), 55 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 40eee3e15a4c..929d40fe0250 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -911,81 +911,49 @@ static const struct block_device_operations ub_fops = {
.open = ublk_open,
.free_disk = ublk_free_disk,
.report_zones = ublk_report_zones,
};
-struct ublk_io_iter {
- struct bio *bio;
- struct bvec_iter iter;
-};
-
-/* return how many bytes are copied */
-static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
- struct iov_iter *uiter, int dir)
+/*
+ * Copy data between request pages and io_iter, and 'offset'
+ * is the start point of linear offset of request.
+ */
+static size_t ublk_copy_user_pages(const struct request *req,
+ unsigned offset, struct iov_iter *uiter, int dir)
{
+ struct req_iterator iter;
+ struct bio_vec bv;
size_t done = 0;
- for (;;) {
- struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
- void *bv_buf = bvec_kmap_local(&bv);
+ rq_for_each_bvec(bv, req, iter) {
size_t copied;
+ if (offset >= bv.bv_len) {
+ offset -= bv.bv_len;
+ continue;
+ }
+
+ bv.bv_offset += offset;
+ bv.bv_len -= offset;
+ bv.bv_page += bv.bv_offset / PAGE_SIZE;
+ bv.bv_offset %= PAGE_SIZE;
if (dir == ITER_DEST)
- copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
+ copied = copy_page_to_iter(
+ bv.bv_page, bv.bv_offset, bv.bv_len, uiter);
else
- copied = copy_from_iter(bv_buf, bv.bv_len, uiter);
-
- kunmap_local(bv_buf);
+ copied = copy_page_from_iter(
+ bv.bv_page, bv.bv_offset, bv.bv_len, uiter);
done += copied;
if (copied < bv.bv_len)
break;
- /* advance bio */
- bio_advance_iter_single(data->bio, &data->iter, copied);
- if (!data->iter.bi_size) {
- data->bio = data->bio->bi_next;
- if (data->bio == NULL)
- break;
- data->iter = data->bio->bi_iter;
- }
+ offset = 0;
}
return done;
}
-static bool ublk_advance_io_iter(const struct request *req,
- struct ublk_io_iter *iter, unsigned int offset)
-{
- struct bio *bio = req->bio;
-
- for_each_bio(bio) {
- if (bio->bi_iter.bi_size > offset) {
- iter->bio = bio;
- iter->iter = bio->bi_iter;
- bio_advance_iter(iter->bio, &iter->iter, offset);
- return true;
- }
- offset -= bio->bi_iter.bi_size;
- }
- return false;
-}
-
-/*
- * Copy data between request pages and io_iter, and 'offset'
- * is the start point of linear offset of request.
- */
-static size_t ublk_copy_user_pages(const struct request *req,
- unsigned offset, struct iov_iter *uiter, int dir)
-{
- struct ublk_io_iter iter;
-
- if (!ublk_advance_io_iter(req, &iter, offset))
- return 0;
-
- return ublk_copy_io_pages(&iter, uiter, dir);
-}
-
static inline bool ublk_need_map_req(const struct request *req)
{
return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v2 1/2] ublk: use copy_{to,from}_iter() for user copy
2025-11-05 20:28 ` [PATCH v2 1/2] ublk: use copy_{to,from}_iter() for " Caleb Sander Mateos
@ 2025-11-06 4:22 ` Ming Lei
0 siblings, 0 replies; 6+ messages in thread
From: Ming Lei @ 2025-11-06 4:22 UTC (permalink / raw)
To: Caleb Sander Mateos; +Cc: Jens Axboe, linux-block, linux-kernel
On Wed, Nov 05, 2025 at 01:28:22PM -0700, Caleb Sander Mateos wrote:
> ublk_copy_user_pages()/ublk_copy_io_pages() currently uses
> iov_iter_get_pages2() to extract the pages from the iov_iter and
> memcpy()s between the bvec_iter and the iov_iter's pages one at a time.
> Switch to using copy_to_iter()/copy_from_iter() instead. This avoids the
> user page reference count increments and decrements and needing to split
> the memcpy() at user page boundaries. It also simplifies the code
> considerably.
> Ming reports a 40% throughput improvement when issuing I/O to the
> selftests null ublk server with zero-copy disabled.
>
> Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks,
Ming
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 2/2] ublk: use rq_for_each_bvec() for user copy
2025-11-05 20:28 ` [PATCH v2 2/2] ublk: use rq_for_each_bvec() " Caleb Sander Mateos
@ 2025-11-06 4:27 ` Ming Lei
2025-11-06 16:49 ` Caleb Sander Mateos
0 siblings, 1 reply; 6+ messages in thread
From: Ming Lei @ 2025-11-06 4:27 UTC (permalink / raw)
To: Caleb Sander Mateos; +Cc: Jens Axboe, linux-block, linux-kernel
On Wed, Nov 05, 2025 at 01:28:23PM -0700, Caleb Sander Mateos wrote:
> ublk_advance_io_iter() and ublk_copy_io_pages() currently open-code the
> iteration over request's bvecs. Switch to the rq_for_each_bvec() macro
> provided by blk-mq to avoid reaching into the bio internals and simplify
> the code. Unlike bio_iter_iovec(), rq_for_each_bvec() can return
> multi-page bvecs. So switch from copy_{to,from}_iter() to
> copy_page_{to,from}_iter() to map and copy each page in the bvec.
>
> Suggested-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
> ---
> drivers/block/ublk_drv.c | 78 ++++++++++++----------------------------
> 1 file changed, 23 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 40eee3e15a4c..929d40fe0250 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -911,81 +911,49 @@ static const struct block_device_operations ub_fops = {
> .open = ublk_open,
> .free_disk = ublk_free_disk,
> .report_zones = ublk_report_zones,
> };
>
> -struct ublk_io_iter {
> - struct bio *bio;
> - struct bvec_iter iter;
> -};
> -
> -/* return how many bytes are copied */
> -static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
> - struct iov_iter *uiter, int dir)
> +/*
> + * Copy data between request pages and io_iter, and 'offset'
> + * is the start point of linear offset of request.
> + */
> +static size_t ublk_copy_user_pages(const struct request *req,
> + unsigned offset, struct iov_iter *uiter, int dir)
> {
> + struct req_iterator iter;
> + struct bio_vec bv;
> size_t done = 0;
>
> - for (;;) {
> - struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
> - void *bv_buf = bvec_kmap_local(&bv);
> + rq_for_each_bvec(bv, req, iter) {
> size_t copied;
>
> + if (offset >= bv.bv_len) {
> + offset -= bv.bv_len;
> + continue;
> + }
> +
> + bv.bv_offset += offset;
> + bv.bv_len -= offset;
> + bv.bv_page += bv.bv_offset / PAGE_SIZE;
> + bv.bv_offset %= PAGE_SIZE;
The above two lines are not needed because copy_page_*_iter() handles
it already.
> if (dir == ITER_DEST)
> - copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
> + copied = copy_page_to_iter(
> + bv.bv_page, bv.bv_offset, bv.bv_len, uiter);
WARN in page_copy_sane() is triggered because copy_page_*_iter() requires
all pages belong to one same compound page.
One quick fix is to replace rq_for_each_bvec() with rq_for_each_segment().
thanks,
Ming
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 2/2] ublk: use rq_for_each_bvec() for user copy
2025-11-06 4:27 ` Ming Lei
@ 2025-11-06 16:49 ` Caleb Sander Mateos
0 siblings, 0 replies; 6+ messages in thread
From: Caleb Sander Mateos @ 2025-11-06 16:49 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-block, linux-kernel
On Wed, Nov 5, 2025 at 8:27 PM Ming Lei <ming.lei@redhat.com> wrote:
>
> On Wed, Nov 05, 2025 at 01:28:23PM -0700, Caleb Sander Mateos wrote:
> > ublk_advance_io_iter() and ublk_copy_io_pages() currently open-code the
> > iteration over request's bvecs. Switch to the rq_for_each_bvec() macro
> > provided by blk-mq to avoid reaching into the bio internals and simplify
> > the code. Unlike bio_iter_iovec(), rq_for_each_bvec() can return
> > multi-page bvecs. So switch from copy_{to,from}_iter() to
> > copy_page_{to,from}_iter() to map and copy each page in the bvec.
> >
> > Suggested-by: Ming Lei <ming.lei@redhat.com>
> > Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
> > ---
> > drivers/block/ublk_drv.c | 78 ++++++++++++----------------------------
> > 1 file changed, 23 insertions(+), 55 deletions(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index 40eee3e15a4c..929d40fe0250 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -911,81 +911,49 @@ static const struct block_device_operations ub_fops = {
> > .open = ublk_open,
> > .free_disk = ublk_free_disk,
> > .report_zones = ublk_report_zones,
> > };
> >
> > -struct ublk_io_iter {
> > - struct bio *bio;
> > - struct bvec_iter iter;
> > -};
> > -
> > -/* return how many bytes are copied */
> > -static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
> > - struct iov_iter *uiter, int dir)
> > +/*
> > + * Copy data between request pages and io_iter, and 'offset'
> > + * is the start point of linear offset of request.
> > + */
> > +static size_t ublk_copy_user_pages(const struct request *req,
> > + unsigned offset, struct iov_iter *uiter, int dir)
> > {
> > + struct req_iterator iter;
> > + struct bio_vec bv;
> > size_t done = 0;
> >
> > - for (;;) {
> > - struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
> > - void *bv_buf = bvec_kmap_local(&bv);
> > + rq_for_each_bvec(bv, req, iter) {
> > size_t copied;
> >
> > + if (offset >= bv.bv_len) {
> > + offset -= bv.bv_len;
> > + continue;
> > + }
> > +
> > + bv.bv_offset += offset;
> > + bv.bv_len -= offset;
> > + bv.bv_page += bv.bv_offset / PAGE_SIZE;
> > + bv.bv_offset %= PAGE_SIZE;
>
> The above two lines are not needed because copy_page_*_iter() handles
> it already.
Yes, I realized that, but was trying to avoid the error in
page_copy_sane(). Though it sounds like updating the starting page
isn't sufficient, as the bvec may still span multiple pages.
>
> > if (dir == ITER_DEST)
> > - copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
> > + copied = copy_page_to_iter(
> > + bv.bv_page, bv.bv_offset, bv.bv_len, uiter);
>
> WARN in page_copy_sane() is triggered because copy_page_*_iter() requires
> all pages belong to one same compound page.
>
> One quick fix is to replace rq_for_each_bvec() with rq_for_each_segment().
Sure, I can switch to rq_for_each_segment(). But then there's not much
point in using copy_page_{to,from}_iter(), right? It allows skipping
the bvec_kmap_local()/kunmap_local(), but incurs additional overhead
to support copying multiple base pages within a single compound page,
which rq_for_each_segment() will never produce.
Best,
Caleb
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2025-11-06 16:49 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-05 20:28 [PATCH v2 0/2] ublk: simplify user copy Caleb Sander Mateos
2025-11-05 20:28 ` [PATCH v2 1/2] ublk: use copy_{to,from}_iter() for " Caleb Sander Mateos
2025-11-06 4:22 ` Ming Lei
2025-11-05 20:28 ` [PATCH v2 2/2] ublk: use rq_for_each_bvec() " Caleb Sander Mateos
2025-11-06 4:27 ` Ming Lei
2025-11-06 16:49 ` Caleb Sander Mateos
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).