From: Hannes Reinecke <hare@suse.de>
To: Keith Busch <kbusch@meta.com>,
linux-block@vger.kernel.org, linux-nvme@lists.infradead.org
Cc: Keith Busch <kbusch@kernel.org>
Subject: Re: [PATCH 4/5] block: add support for vectored copies
Date: Thu, 22 May 2025 15:58:18 +0200 [thread overview]
Message-ID: <4fdbe560-d646-496c-be51-49ea49d47449@suse.de> (raw)
In-Reply-To: <20250521223107.709131-5-kbusch@meta.com>
On 5/22/25 00:31, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> Copy offload can be used to defrad or garbage collect data spread across
Defrag?
> the disk. Most storage protocols provide a way to specifiy multiple
> sources in a single copy commnd, so introduce kernel and user space
> interfaces to accomplish that.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
> block/blk-lib.c | 50 ++++++++++++++++++++++++----------
> block/ioctl.c | 59 +++++++++++++++++++++++++++++++++++++++++
> include/linux/blkdev.h | 2 ++
> include/uapi/linux/fs.h | 14 ++++++++++
> 4 files changed, 111 insertions(+), 14 deletions(-)
>
Any specific reason why this is a different patch, and not folded into
patch 2? It really feels odd to continuously updating interfaces which
have been added with the same patchset...
> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index a538acbaa2cd7..7513b876a5399 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -424,26 +424,46 @@ static int __blkdev_copy(struct block_device *bdev, sector_t dst_sector,
> }
>
> static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
> - sector_t src_sector, sector_t nr_sects, gfp_t gfp)
> + struct bio_vec *bv, int nr_vecs, gfp_t gfp)
> {
> + unsigned size = 0;
> struct bio *bio;
> - int ret;
> -
> - struct bio_vec bv = {
> - .bv_sector = src_sector,
> - .bv_sectors = nr_sects,
> - };
> + int ret, i;
>
> - bio = bio_alloc(bdev, 1, REQ_OP_COPY, gfp);
> - bio_add_copy_src(bio, &bv);
> + bio = bio_alloc(bdev, nr_vecs, REQ_OP_COPY, gfp);
> + for (i = 0; i < nr_vecs; i++) {
> + size += bv[i].bv_sectors << SECTOR_SHIFT;
> + bio_add_copy_src(bio, &bv[i]);
> + }
> bio->bi_iter.bi_sector = dst_sector;
> - bio->bi_iter.bi_size = nr_sects << SECTOR_SHIFT;
> + bio->bi_iter.bi_size = size;
>
> ret = submit_bio_wait(bio);
> bio_put(bio);
> return ret;
> +}
> +
> +/**
> + * blkdev_copy_range - copy range of sectors to a destination
> + * @dst_sector: start sector of the destination to copy to
> + * @bv: vector of source sectors
> + * @nr_vecs: number of source sector vectors
> + * @gfp: allocation flags to use
> + */
> +int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector,
> + struct bio_vec *bv, int nr_vecs, gfp_t gfp)
> +{
> + int ret, i;
>
> + if (bdev_copy_sectors(bdev))
> + return blkdev_copy_offload(bdev, dst_sector, bv, nr_vecs, gfp);
> +
> + for (i = 0, ret = 0; i < nr_vecs && !ret; i++)
> + ret = __blkdev_copy(bdev, dst_sector, bv[i].bv_sector,
> + bv[i].bv_sectors, gfp);
> + return ret;
> }
> +EXPORT_SYMBOL_GPL(blkdev_copy_range);
>
> /**
> * blkdev_copy - copy source sectors to a destination on the same block device
> @@ -455,9 +475,11 @@ static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
> int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
> sector_t src_sector, sector_t nr_sects, gfp_t gfp)
> {
> - if (bdev_copy_sectors(bdev))
> - return blkdev_copy_offload(bdev, dst_sector, src_sector,
> - nr_sects, gfp);
> - return __blkdev_copy(bdev, dst_sector, src_sector, nr_sects, gfp);
> + struct bio_vec bv = {
> + .bv_sector = src_sector,
> + .bv_sectors = nr_sects,
> + };
> +
> + return blkdev_copy_range(bdev, dst_sector, &bv, 1, gfp);
> }
> EXPORT_SYMBOL_GPL(blkdev_copy);
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 6f03c65867348..4b5095be19e1a 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -241,6 +241,63 @@ static int blk_ioctl_copy(struct block_device *bdev, blk_mode_t mode,
> return blkdev_copy(bdev, dst, src, nr, GFP_KERNEL);
> }
>
> +static int blk_ioctl_copy_vec(struct block_device *bdev, blk_mode_t mode,
> + void __user *argp)
> +{
> + sector_t align = bdev_logical_block_size(bdev) >> SECTOR_SHIFT;
> + struct bio_vec *bv, fast_bv[UIO_FASTIOV];
> + struct copy_range cr;
> + int i, nr, ret;
> + __u64 dst;
> +
> + if (!(mode & BLK_OPEN_WRITE))
> + return -EBADF;
> + if (copy_from_user(&cr, argp, sizeof(cr)))
> + return -EFAULT;
> + if (!(IS_ALIGNED(cr.dst_sector, align)))
> + return -EINVAL;
> +
> + nr = cr.nr_ranges;
> + if (nr <= UIO_FASTIOV) {
> + bv = fast_bv;
> + } else {
> + bv = kmalloc_array(nr, sizeof(*bv), GFP_KERNEL);
> + if (!bv)
> + return -ENOMEM;
> + }
> +
> + dst = cr.dst_sector;
> + for (i = 0; i < nr; i++) {
> + struct copy_source csrc;
> + __u64 nr_sects, src;
> +
> + if (copy_from_user(&csrc,
> + (void __user *)(cr.sources + i * sizeof(csrc)),
> + sizeof(csrc))) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + nr_sects = csrc.nr_sectors;
> + src = csrc.src_sector;
> + if (!(IS_ALIGNED(src | nr_sects, align)) ||
> + (src < dst && src + nr_sects > dst) ||
> + (dst < src && dst + nr_sects > src)) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + bv[i].bv_sectors = nr_sects;
> + bv[i].bv_sector = src;
> + }
> +
> + ret = blkdev_copy_range(bdev, dst, bv, nr, GFP_KERNEL);
> +out:
> + if (bv != fast_bv)
> + kfree(bv);
> + return ret;
> +}
> +
> static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
> unsigned long arg)
> {
> @@ -605,6 +662,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
> return blk_ioctl_secure_erase(bdev, mode, argp);
> case BLKCPY:
> return blk_ioctl_copy(bdev, mode, argp);
> + case BLKCPY_VEC:
> + return blk_ioctl_copy_vec(bdev, mode, argp);
> case BLKZEROOUT:
> return blk_ioctl_zeroout(bdev, mode, arg);
> case BLKGETDISKSEQ:
And that makes it even worse; introducing two ioctls which basically do
the same thing (or where one is actually a special case of the other)
is probably not what we should be doing.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
next prev parent reply other threads:[~2025-05-22 13:58 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-21 22:31 [PATCH 0/5] block: another block copy offload Keith Busch
2025-05-21 22:31 ` [PATCH 1/5] block: new sector copy api Keith Busch
2025-05-22 10:02 ` Hannes Reinecke
2025-05-22 16:43 ` Keith Busch
2025-05-22 19:22 ` Bart Van Assche
2025-05-22 20:04 ` Keith Busch
2025-05-23 12:45 ` Christoph Hellwig
2025-05-23 17:02 ` Keith Busch
2025-05-26 5:18 ` Christoph Hellwig
2025-05-27 17:45 ` Keith Busch
2025-05-28 7:46 ` Christoph Hellwig
2025-05-28 22:41 ` Keith Busch
2025-06-02 4:58 ` Christoph Hellwig
2025-05-21 22:31 ` [PATCH 2/5] block: add support for copy offload Keith Busch
2025-05-22 13:49 ` Hannes Reinecke
2025-05-23 12:46 ` Christoph Hellwig
2025-05-23 13:26 ` Keith Busch
2025-05-23 13:37 ` Christoph Hellwig
2025-05-23 13:48 ` Keith Busch
2025-05-26 5:22 ` Christoph Hellwig
2025-05-27 21:33 ` Keith Busch
2025-05-28 7:47 ` Christoph Hellwig
2025-05-21 22:31 ` [PATCH 3/5] nvme: " Keith Busch
2025-05-22 0:47 ` Caleb Sander Mateos
2025-05-22 0:51 ` Caleb Sander Mateos
2025-05-22 3:23 ` Keith Busch
2025-05-22 3:41 ` Caleb Sander Mateos
2025-05-22 4:29 ` Keith Busch
2025-05-22 14:16 ` Caleb Sander Mateos
2025-05-23 12:49 ` Christoph Hellwig
2025-05-23 12:48 ` Christoph Hellwig
2025-05-22 13:54 ` Hannes Reinecke
2025-05-23 12:50 ` Christoph Hellwig
2025-05-23 14:22 ` Caleb Sander Mateos
2025-06-09 9:29 ` Niklas Cassel
2025-05-21 22:31 ` [PATCH 4/5] block: add support for vectored copies Keith Busch
2025-05-22 13:58 ` Hannes Reinecke [this message]
2025-05-22 16:36 ` Keith Busch
2025-05-21 22:31 ` [PATCH 5/5] nvmet: implement copy support for bdev backed target Keith Busch
2025-05-22 13:59 ` Hannes Reinecke
2025-05-23 13:18 ` Christoph Hellwig
2025-05-23 14:00 ` Keith Busch
2025-05-23 14:02 ` Christoph Hellwig
2025-05-22 15:52 ` [PATCH 0/5] block: another block copy offload Bart Van Assche
2025-05-23 12:53 ` Christoph Hellwig
2025-07-03 14:47 ` Niklas Cassel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4fdbe560-d646-496c-be51-49ea49d47449@suse.de \
--to=hare@suse.de \
--cc=kbusch@kernel.org \
--cc=kbusch@meta.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox