All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
	linux-nvme@lists.infradead.org, "Christoph Hellwig" <hch@lst.de>,
	"Nitesh Shetty" <nj.shetty@samsung.com>,
	"Bart Van Assche" <bvanassche@acm.org>,
	"Kanchan Joshi" <joshi.k@samsung.com>,
	"Javier González" <javier.gonz@samsung.com>,
	"Anuj Gupta" <anuj20.g@samsung.com>
Subject: [PATCH 08/12] nvme: Add copy offloading support
Date: Fri, 24 Apr 2026 15:41:57 -0700	[thread overview]
Message-ID: <20260424224201.1949243-9-bvanassche@acm.org> (raw)
In-Reply-To: <20260424224201.1949243-1-bvanassche@acm.org>

From: Nitesh Shetty <nj.shetty@samsung.com>

Add support for the NVMe Copy command. This command supports a single
destination range and up to 256 source ranges.

Add trace event support for nvme_copy_cmd.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Javier González <javier.gonz@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
[ bvanassche: generalized Copy support from one to 256 source ranges; fixed
  an endianness issue in nvme_config_copy(); renamed rsvd91 into rsvd81 and
  verified the offset with pahole ]
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/nvme/host/constants.c |   1 +
 drivers/nvme/host/core.c      | 106 ++++++++++++++++++++++++++++++++++
 drivers/nvme/host/trace.c     |  19 ++++++
 include/linux/nvme.h          |  46 ++++++++++++++-
 4 files changed, 169 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index dc90df9e13a2..b80c7c7fb629 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -19,6 +19,7 @@ static const char * const nvme_ops[] = {
 	[nvme_cmd_resv_report] = "Reservation Report",
 	[nvme_cmd_resv_acquire] = "Reservation Acquire",
 	[nvme_cmd_resv_release] = "Reservation Release",
+	[nvme_cmd_copy] = "Copy Offload",
 	[nvme_cmd_zone_mgmt_send] = "Zone Management Send",
 	[nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
 	[nvme_cmd_zone_append] = "Zone Append",
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1e33af94c24b..6f3c1fde112f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -6,6 +6,7 @@
 
 #include <linux/async.h>
 #include <linux/blkdev.h>
+#include <linux/blk-copy.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-integrity.h>
 #include <linux/compat.h>
@@ -821,6 +822,87 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
 	cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
 }
 
+/*
+ * Translate REQ_OP_COPY_SRC and REQ_OP_COPY_DST bios into an NVMe Copy command.
+ * The NVMe copy command supports multiple source LBA ranges, a single
+ * destination LBA range, and also supports copying across NVMe namespaces. This
+ * implementation supports all these features except copying across NVMe
+ * namespaces.
+ */
+static inline blk_status_t nvme_setup_copy_offload(struct nvme_ns *ns,
+						   struct request *req,
+						   struct nvme_command *cmnd)
+{
+	const u32 nr_range = blk_copy_bio_count(req, REQ_OP_COPY_SRC);
+	struct nvme_ns *src_ns, *dst_ns;
+	struct bio *src_bio = NULL, *dst_bio;
+	struct nvme_copy_range *range;
+	u16 control = 0;
+	u64 dlba;
+
+	dst_bio = blk_first_copy_bio(req, REQ_OP_COPY_DST);
+
+	if (WARN_ON_ONCE(!dst_bio))
+		return BLK_STS_IOERR;
+
+	/* TO DO: derive dst_ns from dst_bio. */
+	dst_ns = ns;
+	dlba = nvme_sect_to_lba(dst_ns->head, dst_bio->bi_iter.bi_sector);
+
+	if (req->cmd_flags & REQ_FUA)
+		control |= NVME_RW_FUA;
+
+	if (req->cmd_flags & REQ_FAILFAST_DEV)
+		control |= NVME_RW_LR;
+
+	*cmnd = (typeof(*cmnd)){
+		.copy = {
+			.opcode = nvme_cmd_copy,
+			.nsid = cpu_to_le32(dst_ns->head->ns_id),
+			.control = cpu_to_le16(control),
+			.sdlba = cpu_to_le64(dlba),
+			.desfmt_prinfor = 2, /* DESFMT=2 */
+			.nr_range = nr_range - 1, /* 0's based */
+		}
+	};
+
+	range = kmalloc_array(nr_range, sizeof(*range),
+			      GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN);
+	if (!range)
+		return BLK_STS_RESOURCE;
+
+	for (unsigned int i = 0; i < nr_range; i++) {
+		u64 slba;
+		u32 nslb;
+
+		if (!src_bio)
+			src_bio = blk_first_copy_bio(req, REQ_OP_COPY_SRC);
+		else
+			src_bio = blk_next_copy_bio(src_bio);
+		if (WARN_ON_ONCE(!src_bio))
+			goto free_range;
+		/* TO DO: derive src_ns from src_bio. */
+		src_ns = ns;
+		slba = nvme_sect_to_lba(src_ns->head,
+					src_bio->bi_iter.bi_sector);
+		nslb = src_bio->bi_iter.bi_size >> src_ns->head->lba_shift;
+		range[i].nsid = cpu_to_le32(src_ns->head->ns_id); /* requires DESFMT=2 */
+		range[i].slba = cpu_to_le64(slba);
+		range[i].nlb = cpu_to_le16(nslb - 1);
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * nr_range;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return BLK_STS_OK;
+
+free_range:
+	kfree(range);
+	return BLK_STS_IOERR;
+}
+
 static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmnd)
 {
@@ -1122,6 +1204,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
 	case REQ_OP_ZONE_APPEND:
 		ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
 		break;
+	case REQ_OP_COPY_DST:
+	case REQ_OP_COPY_SRC:
+		ret = nvme_setup_copy_offload(ns, req, cmd);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		return BLK_STS_IOERR;
@@ -1884,6 +1970,21 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
 	return true;
 }
 
+static void nvme_config_copy(struct nvme_ns *ns, struct nvme_id_ns *id,
+			     struct queue_limits *lim)
+{
+	struct nvme_ctrl *ctrl = ns->ctrl;
+
+	if (!(ctrl->oncs & NVME_CTRL_ONCS_COPY)) {
+		lim->max_copy_hw_sectors = 0;
+		return;
+	}
+	lim->max_copy_hw_sectors = nvme_lba_to_sect(ns->head,
+						    le16_to_cpu(id->mssrl));
+	lim->max_copy_src_segments = 256;
+	lim->max_copy_dst_segments = 1;
+}
+
 static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
 {
 	return uuid_equal(&a->uuid, &b->uuid) &&
@@ -2416,6 +2517,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	if (!nvme_update_disk_info(ns, id, nvm, &lim))
 		capacity = 0;
 
+	nvme_config_copy(ns, id, &lim);
 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
 	    ns->head->ids.csi == NVME_CSI_ZNS)
 		nvme_update_zone_info(ns, &lim, &zi);
@@ -2542,6 +2644,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
 		lim.physical_block_size = ns_lim->physical_block_size;
 		lim.io_min = ns_lim->io_min;
 		lim.io_opt = ns_lim->io_opt;
+		lim.max_copy_hw_sectors = UINT_MAX;
+		lim.max_copy_src_segments = U16_MAX;
+		lim.max_copy_dst_segments = U16_MAX;
 		queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
 					ns->head->disk->disk_name);
 		if (unsupported)
@@ -5368,6 +5473,7 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_copy_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index ad25ad1e4041..7096ade7740c 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -153,6 +153,23 @@ static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10)
 	return ret;
 }
 
+static const char *nvme_trace_copy(struct trace_seq *p, u8 *cdw10)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+	u64 sdlba = get_unaligned_le64(cdw10);
+	u8 nr_range = get_unaligned_le16(cdw10 + 8);
+	u16 control = get_unaligned_le16(cdw10 + 10);
+	u32 dsmgmt = get_unaligned_le32(cdw10 + 12);
+	u32 reftag = get_unaligned_le32(cdw10 + 16);
+
+	trace_seq_printf(p,
+		"sdlba=%llu, nr_range=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u",
+		sdlba, nr_range, control, dsmgmt, reftag);
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+
 static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10)
 {
 	const char *ret = trace_seq_buffer_ptr(p);
@@ -386,6 +403,8 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
 		return nvme_trace_resv_rel(p, cdw10);
 	case nvme_cmd_resv_report:
 		return nvme_trace_resv_report(p, cdw10);
+	case nvme_cmd_copy:
+		return nvme_trace_copy(p, cdw10);
 	default:
 		return nvme_trace_common(p, cdw10);
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 041f30931a90..ead8e5128e3b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -376,7 +376,7 @@ struct nvme_id_ctrl {
 	__u8			nvscc;
 	__u8			nwpc;
 	__le16			acwu;
-	__u8			rsvd534[2];
+	__le16			ocfs;
 	__le32			sgls;
 	__le32			mnan;
 	__u8			rsvd544[224];
@@ -404,6 +404,7 @@ enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_ONCS_RESERVATIONS		= 1 << 5,
 	NVME_CTRL_ONCS_TIMESTAMP		= 1 << 6,
+	NVME_CTRL_ONCS_COPY			= 1 << 8,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
 	NVME_CTRL_OACS_NS_MNGT_SUPP		= 1 << 3,
@@ -458,7 +459,10 @@ struct nvme_id_ns {
 	__le16			npdg;
 	__le16			npda;
 	__le16			nows;
-	__u8			rsvd74[18];
+	__le16			mssrl;
+	__le32			mcl;
+	__u8			msrc;
+	__u8			rsvd81[11];
 	__le32			anagrpid;
 	__u8			rsvd96[3];
 	__u8			nsattr;
@@ -967,6 +971,7 @@ enum nvme_opcode {
 	nvme_cmd_resv_acquire	= 0x11,
 	nvme_cmd_io_mgmt_recv	= 0x12,
 	nvme_cmd_resv_release	= 0x15,
+	nvme_cmd_copy		= 0x19,
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
 	nvme_cmd_zone_append	= 0x7d,
@@ -991,7 +996,8 @@ enum nvme_opcode {
 		nvme_opcode_name(nvme_cmd_resv_release),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_send),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_recv),	\
-		nvme_opcode_name(nvme_cmd_zone_append))
+		nvme_opcode_name(nvme_cmd_zone_append),		\
+		nvme_opcode_name(nvme_cmd_copy))
 
 
 
@@ -1169,6 +1175,39 @@ struct nvme_dsm_range {
 	__le64			slba;
 };
 
+struct nvme_copy_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	union nvme_data_ptr	dptr;
+	__le64			sdlba;
+	__u8			nr_range;
+	__u8			desfmt_prinfor;
+	__le16			control;
+	__le16			rsvd13;
+	__le16			dspec;
+	__le32			ilbrt;
+	__le16			lbat;
+	__le16			lbatm;
+};
+
+struct nvme_copy_range {
+	__le32			nsid;	/* DESFMT=2 only */
+	__le32			rsvd1;
+	__le64			slba;
+	__le16			nlb;
+	__le16			rsvd18;
+	__le32			rsvd20;
+	__le32			eilbrt;
+	__le16			elbat;
+	__le16			elbatm;
+};
+
+static_assert(sizeof(struct nvme_copy_range) == 32);
+
 struct nvme_write_zeroes_cmd {
 	__u8			opcode;
 	__u8			flags;
@@ -2001,6 +2040,7 @@ struct nvme_command {
 		struct nvme_download_firmware dlfw;
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
+		struct nvme_copy_command copy;
 		struct nvme_write_zeroes_cmd write_zeroes;
 		struct nvme_zone_mgmt_send_cmd zms;
 		struct nvme_zone_mgmt_recv_cmd zmr;

  parent reply	other threads:[~2026-04-24 22:42 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-24 22:41 [PATCH 00/12] Block storage copy offloading Bart Van Assche
2026-04-24 22:41 ` [PATCH 01/12] block: Introduce queue limits for " Bart Van Assche
2026-04-24 22:41 ` [PATCH 02/12] block: Add the REQ_OP_COPY_{SRC,DST} operations Bart Van Assche
2026-04-24 22:41 ` [PATCH 03/12] block: Introduce blkdev_copy_offload() Bart Van Assche
2026-04-24 22:41 ` [PATCH 04/12] block: Add an onloaded copy implementation Bart Van Assche
2026-04-24 22:41 ` [PATCH 05/12] block: Introduce accessor functions for copy offload bios Bart Van Assche
2026-04-24 22:41 ` [PATCH 06/12] fs/read_write: Generalize generic_copy_file_checks() Bart Van Assche
2026-04-24 22:41 ` [PATCH 07/12] fs, block: Add copy_file_range() support for block devices Bart Van Assche
2026-04-24 22:41 ` Bart Van Assche [this message]
2026-04-24 22:41 ` [PATCH 09/12] nvmet: Support the Copy command Bart Van Assche
2026-04-24 22:41 ` [PATCH 10/12] dm: Add support for copy offloading Bart Van Assche
2026-04-24 22:42 ` [PATCH 11/12] dm-linear: Enable " Bart Van Assche
2026-04-24 22:42 ` [PATCH 12/12] null_blk: Add support for REQ_OP_COPY_* Bart Van Assche
2026-05-22 12:00 ` [PATCH 00/12] Block storage copy offloading Shin'ichiro Kawasaki
2026-05-22 16:22   ` Bart Van Assche

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260424224201.1949243-9-bvanassche@acm.org \
    --to=bvanassche@acm.org \
    --cc=anuj20.g@samsung.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=javier.gonz@samsung.com \
    --cc=joshi.k@samsung.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=nj.shetty@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.