public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Nitesh Shetty <nj.shetty@samsung.com>
To: Jens Axboe <axboe@kernel.dk>, Jonathan Corbet <corbet@lwn.net>,
	Alasdair Kergon <agk@redhat.com>,
	Mike Snitzer <snitzer@kernel.org>,
	dm-devel@redhat.com, Keith Busch <kbusch@kernel.org>,
	Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>,
	Chaitanya Kulkarni <kch@nvidia.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>
Cc: martin.petersen@oracle.com, linux-scsi@vger.kernel.org,
	willy@infradead.org, hare@suse.de, djwong@kernel.org,
	bvanassche@acm.org, ming.lei@redhat.com, dlemoal@kernel.org,
	nitheshshetty@gmail.com, gost.dev@samsung.com,
	Nitesh Shetty <nj.shetty@samsung.com>,
	Vincent Fu <vincent.fu@samsung.com>,
	Anuj Gupta <anuj20.g@samsung.com>,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-nvme@lists.infradead.org,
	linux-fsdevel@vger.kernel.org
Subject: [PATCH v13 3/9] block: add emulation for copy
Date: Wed, 28 Jun 2023 00:06:17 +0530	[thread overview]
Message-ID: <20230627183629.26571-4-nj.shetty@samsung.com> (raw)
In-Reply-To: <20230627183629.26571-1-nj.shetty@samsung.com>

For the devices which does not support copy, copy emulation is added.
It is required for in-kernel users like fabrics, where file descriptor is
not available and hence they can't use copy_file_range.
Copy-emulation is implemented by reading from source into memory and
writing to the corresponding destination asynchronously.
Also emulation is used, if copy offload fails or partially completes.

Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
---
 block/blk-lib.c           | 183 +++++++++++++++++++++++++++++++++++++-
 block/blk-map.c           |   4 +-
 include/linux/blk_types.h |   5 ++
 include/linux/blkdev.h    |   3 +
 4 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 10c3eadd5bf6..09e0d5d51d03 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -234,6 +234,180 @@ static ssize_t __blkdev_copy_offload(
 	return blkdev_copy_wait_io_completion(cio);
 }
 
+static void *blkdev_copy_alloc_buf(sector_t req_size, sector_t *alloc_size,
+		gfp_t gfp_mask)
+{
+	int min_size = PAGE_SIZE;
+	void *buf;
+
+	while (req_size >= min_size) {
+		buf = kvmalloc(req_size, gfp_mask);
+		if (buf) {
+			*alloc_size = req_size;
+			return buf;
+		}
+		/* retry half the requested size */
+		req_size >>= 1;
+	}
+
+	return NULL;
+}
+
+static void blkdev_copy_emulate_write_endio(struct bio *bio)
+{
+	struct copy_ctx *ctx = bio->bi_private;
+	struct cio *cio = ctx->cio;
+	sector_t clen;
+
+	if (bio->bi_status) {
+		clen = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - cio->pos_out;
+		cio->comp_len = min_t(sector_t, clen, cio->comp_len);
+	}
+	kfree(bvec_virt(&bio->bi_io_vec[0]));
+	bio_map_kern_endio(bio);
+	kfree(ctx);
+	if (atomic_dec_and_test(&cio->refcount)) {
+		if (cio->endio) {
+			cio->endio(cio->private, cio->comp_len);
+			kfree(cio);
+		} else
+			blk_wake_io_task(cio->waiter);
+	}
+}
+
+static void blkdev_copy_emulate_read_endio(struct bio *read_bio)
+{
+	struct copy_ctx *ctx = read_bio->bi_private;
+	struct cio *cio = ctx->cio;
+	sector_t clen;
+
+	if (read_bio->bi_status) {
+		clen = (read_bio->bi_iter.bi_sector << SECTOR_SHIFT) -
+						cio->pos_in;
+		cio->comp_len = min_t(sector_t, clen, cio->comp_len);
+		kfree(bvec_virt(&read_bio->bi_io_vec[0]));
+		bio_map_kern_endio(read_bio);
+		kfree(ctx);
+
+		if (atomic_dec_and_test(&cio->refcount)) {
+			if (cio->endio) {
+				cio->endio(cio->private, cio->comp_len);
+				kfree(cio);
+			} else
+				blk_wake_io_task(cio->waiter);
+		}
+	}
+	schedule_work(&ctx->dispatch_work);
+	kfree(read_bio);
+}
+
+static void blkdev_copy_dispatch_work(struct work_struct *work)
+{
+	struct copy_ctx *ctx = container_of(work, struct copy_ctx,
+			dispatch_work);
+
+	submit_bio(ctx->write_bio);
+}
+
+/*
+ * If native copy offload feature is absent, this function tries to emulate,
+ * by copying data from source to a temporary buffer and from buffer to
+ * destination device.
+ * Returns the length of bytes copied or error if encountered
+ */
+static ssize_t __blkdev_copy_emulate(
+		struct block_device *bdev_in, loff_t pos_in,
+		struct block_device *bdev_out, loff_t pos_out,
+		size_t len, cio_iodone_t endio, void *private, gfp_t gfp_mask)
+{
+	struct request_queue *in = bdev_get_queue(bdev_in);
+	struct request_queue *out = bdev_get_queue(bdev_out);
+	struct bio *read_bio, *write_bio;
+	void *buf = NULL;
+	struct copy_ctx *ctx;
+	struct cio *cio;
+	sector_t buf_len, req_len, rem = 0;
+	sector_t max_src_hw_len = min_t(unsigned int,
+			queue_max_hw_sectors(in),
+			queue_max_segments(in) << (PAGE_SHIFT - SECTOR_SHIFT))
+			<< SECTOR_SHIFT;
+	sector_t max_dst_hw_len = min_t(unsigned int,
+		queue_max_hw_sectors(out),
+			queue_max_segments(out) << (PAGE_SHIFT - SECTOR_SHIFT))
+			<< SECTOR_SHIFT;
+	sector_t max_hw_len = min_t(unsigned int,
+			max_src_hw_len, max_dst_hw_len);
+
+	cio = kzalloc(sizeof(struct cio), GFP_KERNEL);
+	if (!cio)
+		return -ENOMEM;
+	atomic_set(&cio->refcount, 0);
+	cio->pos_in = pos_in;
+	cio->pos_out = pos_out;
+	cio->waiter = current;
+	cio->endio = endio;
+	cio->private = private;
+
+	for (rem = len; rem > 0; rem -= buf_len) {
+		req_len = min_t(int, max_hw_len, rem);
+
+		buf = blkdev_copy_alloc_buf(req_len, &buf_len, gfp_mask);
+		if (!buf)
+			goto err_alloc_buf;
+
+		ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask);
+		if (!ctx)
+			goto err_ctx;
+
+		read_bio = bio_map_kern(in, buf, buf_len, gfp_mask);
+		if (IS_ERR(read_bio))
+			goto err_read_bio;
+
+		write_bio = bio_map_kern(out, buf, buf_len, gfp_mask);
+		if (IS_ERR(write_bio))
+			goto err_write_bio;
+
+		ctx->cio = cio;
+		ctx->write_bio = write_bio;
+		INIT_WORK(&ctx->dispatch_work, blkdev_copy_dispatch_work);
+
+		read_bio->bi_iter.bi_sector = pos_in >> SECTOR_SHIFT;
+		read_bio->bi_iter.bi_size = buf_len;
+		read_bio->bi_opf = REQ_OP_READ | REQ_SYNC;
+		bio_set_dev(read_bio, bdev_in);
+		read_bio->bi_end_io = blkdev_copy_emulate_read_endio;
+		read_bio->bi_private = ctx;
+
+		write_bio->bi_iter.bi_size = buf_len;
+		write_bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+		bio_set_dev(write_bio, bdev_out);
+		write_bio->bi_end_io = blkdev_copy_emulate_write_endio;
+		write_bio->bi_iter.bi_sector = pos_out >> SECTOR_SHIFT;
+		write_bio->bi_private = ctx;
+
+		atomic_inc(&cio->refcount);
+		submit_bio(read_bio);
+
+		pos_in += buf_len;
+		pos_out += buf_len;
+	}
+	return blkdev_copy_wait_io_completion(cio);
+
+err_write_bio:
+	bio_put(read_bio);
+err_read_bio:
+	kfree(ctx);
+err_ctx:
+	kvfree(buf);
+err_alloc_buf:
+	cio->comp_len -= min_t(sector_t, cio->comp_len, len - rem);
+	if (!atomic_read(&cio->refcount)) {
+		kfree(cio);
+		return -ENOMEM;
+	}
+	return blkdev_copy_wait_io_completion(cio);
+}
+
 static inline ssize_t blkdev_copy_sanity_check(
 	struct block_device *bdev_in, loff_t pos_in,
 	struct block_device *bdev_out, loff_t pos_out,
@@ -284,9 +458,16 @@ ssize_t blkdev_copy_offload(
 	if (ret)
 		return ret;
 
-	if (blk_queue_copy(q_in) && blk_queue_copy(q_out))
+	if (blk_queue_copy(q_in) && blk_queue_copy(q_out)) {
 		ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
 			   len, endio, private, gfp_mask);
+		if (ret < 0)
+			ret = 0;
+	}
+
+	if (ret != len)
+		ret = __blkdev_copy_emulate(bdev_in, pos_in + ret, bdev_out,
+			 pos_out + ret, len - ret, endio, private, gfp_mask);
 
 	return ret;
 }
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..ceeb70a95fd1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -363,7 +363,7 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
 #endif
 }
 
-static void bio_map_kern_endio(struct bio *bio)
+void bio_map_kern_endio(struct bio *bio)
 {
 	bio_invalidate_vmalloc_pages(bio);
 	bio_uninit(bio);
@@ -380,7 +380,7 @@ static void bio_map_kern_endio(struct bio *bio)
  *	Map the kernel address into a bio suitable for io to a block
  *	device. Returns an error pointer in case of error.
  */
-static struct bio *bio_map_kern(struct request_queue *q, void *data,
+struct bio *bio_map_kern(struct request_queue *q, void *data,
 		unsigned int len, gfp_t gfp_mask)
 {
 	unsigned long kaddr = (unsigned long)data;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 336146798e56..f8c80940c7ad 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -562,4 +562,9 @@ struct cio {
 	atomic_t refcount;
 };
 
+struct copy_ctx {
+	struct cio *cio;
+	struct work_struct dispatch_work;
+	struct bio *write_bio;
+};
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 963f5c97dec0..c176bf6173c5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1047,6 +1047,9 @@ ssize_t blkdev_copy_offload(
 		struct block_device *bdev_in, loff_t pos_in,
 		struct block_device *bdev_out, loff_t pos_out,
 		size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask);
+struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
+		gfp_t gfp_mask);
+void bio_map_kern_endio(struct bio *bio);
 
 #define BLKDEV_ZERO_NOUNMAP	(1 << 0)  /* do not free blocks */
 #define BLKDEV_ZERO_NOFALLBACK	(1 << 1)  /* don't write explicit zeroes */
-- 
2.35.1.500.gb896f729e2


  parent reply	other threads:[~2023-06-28  8:59 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20230627183950epcas5p1b924785633509f612ffa5d9616bfe447@epcas5p1.samsung.com>
2023-06-27 18:36 ` [PATCH v13 0/9] Implement copy offload support Nitesh Shetty
2023-06-27 18:36   ` [PATCH v13 1/9] block: Introduce queue limits for copy-offload support Nitesh Shetty
2023-06-28  6:40     ` Damien Le Moal
2023-06-28 15:35       ` Nitesh Shetty
2023-07-20  7:06     ` Christoph Hellwig
2023-07-20  7:58     ` Christoph Hellwig
2023-06-27 18:36   ` [PATCH v13 2/9] block: Add copy offload support infrastructure Nitesh Shetty
2023-06-28  6:45     ` Damien Le Moal
2023-06-28 16:03       ` Nitesh Shetty
2023-07-20  7:42     ` Christoph Hellwig
2023-07-27 10:29       ` Nitesh Shetty
2023-06-27 18:36   ` Nitesh Shetty [this message]
2023-06-28  6:50     ` [PATCH v13 3/9] block: add emulation for copy Damien Le Moal
2023-06-28 16:10       ` Nitesh Shetty
2023-06-29  8:33     ` Ming Lei
2023-06-30 11:22       ` Nitesh Shetty
2023-07-20  7:50     ` Christoph Hellwig
2023-08-01 13:07       ` Nitesh Shetty
2023-08-02  6:31         ` Kent Overstreet
2023-06-27 18:36   ` [PATCH v13 4/9] fs, block: copy_file_range for def_blk_ops for direct block device Nitesh Shetty
2023-06-28  6:51     ` Damien Le Moal
2023-06-28 16:39       ` Nitesh Shetty
2023-07-20  7:57     ` Christoph Hellwig
2023-07-24  5:46       ` Nitesh Shetty
2023-06-27 18:36   ` [PATCH v13 5/9] nvme: add copy offload support Nitesh Shetty
2023-07-20  8:00     ` Christoph Hellwig
2023-06-27 18:36   ` [PATCH v13 6/9] nvmet: add copy command support for bdev and file ns Nitesh Shetty
2023-06-27 18:36   ` [PATCH v13 7/9] dm: Add support for copy offload Nitesh Shetty
2023-06-27 18:36   ` [PATCH v13 8/9] dm: Enable copy offload for dm-linear target Nitesh Shetty
2023-06-27 18:36   ` [PATCH v13 9/9] null_blk: add support for copy offload Nitesh Shetty
2023-06-28 12:11     ` kernel test robot
2023-06-28 12:52     ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230627183629.26571-4-nj.shetty@samsung.com \
    --to=nj.shetty@samsung.com \
    --cc=agk@redhat.com \
    --cc=anuj20.g@samsung.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=bvanassche@acm.org \
    --cc=corbet@lwn.net \
    --cc=djwong@kernel.org \
    --cc=dlemoal@kernel.org \
    --cc=dm-devel@redhat.com \
    --cc=gost.dev@samsung.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=kch@nvidia.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ming.lei@redhat.com \
    --cc=nitheshshetty@gmail.com \
    --cc=sagi@grimberg.me \
    --cc=snitzer@kernel.org \
    --cc=vincent.fu@samsung.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox