From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
linux-nvme@lists.infradead.org, "Christoph Hellwig" <hch@lst.de>,
"Nitesh Shetty" <nj.shetty@samsung.com>,
"Bart Van Assche" <bvanassche@acm.org>,
"Kanchan Joshi" <joshi.k@samsung.com>,
"Javier González" <javier.gonz@samsung.com>,
"Anuj Gupta" <anuj20.g@samsung.com>
Subject: [PATCH 08/12] nvme: Add copy offloading support
Date: Fri, 24 Apr 2026 15:41:57 -0700 [thread overview]
Message-ID: <20260424224201.1949243-9-bvanassche@acm.org> (raw)
In-Reply-To: <20260424224201.1949243-1-bvanassche@acm.org>
From: Nitesh Shetty <nj.shetty@samsung.com>
Add support for the NVMe Copy command. This command supports a single
destination range and up to 256 source ranges.
Add trace event support for nvme_copy_cmd.
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Javier González <javier.gonz@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
[ bvanassche: generalized Copy support from one to 256 source ranges; fixed
an endianness issue in nvme_config_copy(); renamed rsvd91 into rsvd81 and
verified the offset with pahole ]
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/nvme/host/constants.c | 1 +
drivers/nvme/host/core.c | 106 ++++++++++++++++++++++++++++++++++
drivers/nvme/host/trace.c | 19 ++++++
include/linux/nvme.h | 46 ++++++++++++++-
4 files changed, 169 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index dc90df9e13a2..b80c7c7fb629 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -19,6 +19,7 @@ static const char * const nvme_ops[] = {
[nvme_cmd_resv_report] = "Reservation Report",
[nvme_cmd_resv_acquire] = "Reservation Acquire",
[nvme_cmd_resv_release] = "Reservation Release",
+ [nvme_cmd_copy] = "Copy Offload",
[nvme_cmd_zone_mgmt_send] = "Zone Management Send",
[nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
[nvme_cmd_zone_append] = "Zone Append",
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1e33af94c24b..6f3c1fde112f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -6,6 +6,7 @@
#include <linux/async.h>
#include <linux/blkdev.h>
+#include <linux/blk-copy.h>
#include <linux/blk-mq.h>
#include <linux/blk-integrity.h>
#include <linux/compat.h>
@@ -821,6 +822,87 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
}
+/*
+ * Translate REQ_OP_COPY_SRC and REQ_OP_COPY_DST bios into an NVMe Copy command.
+ * The NVMe copy command supports multiple source LBA ranges, a single
+ * destination LBA range, and also supports copying across NVMe namespaces. This
+ * implementation supports all these features except copying across NVMe
+ * namespaces.
+ */
+static inline blk_status_t nvme_setup_copy_offload(struct nvme_ns *ns,
+ struct request *req,
+ struct nvme_command *cmnd)
+{
+ const u32 nr_range = blk_copy_bio_count(req, REQ_OP_COPY_SRC);
+ struct nvme_ns *src_ns, *dst_ns;
+ struct bio *src_bio = NULL, *dst_bio;
+ struct nvme_copy_range *range;
+ u16 control = 0;
+ u64 dlba;
+
+ dst_bio = blk_first_copy_bio(req, REQ_OP_COPY_DST);
+
+ if (WARN_ON_ONCE(!dst_bio))
+ return BLK_STS_IOERR;
+
+ /* TO DO: derive dst_ns from dst_bio. */
+ dst_ns = ns;
+ dlba = nvme_sect_to_lba(dst_ns->head, dst_bio->bi_iter.bi_sector);
+
+ if (req->cmd_flags & REQ_FUA)
+ control |= NVME_RW_FUA;
+
+ if (req->cmd_flags & REQ_FAILFAST_DEV)
+ control |= NVME_RW_LR;
+
+ *cmnd = (typeof(*cmnd)){
+ .copy = {
+ .opcode = nvme_cmd_copy,
+ .nsid = cpu_to_le32(dst_ns->head->ns_id),
+ .control = cpu_to_le16(control),
+ .sdlba = cpu_to_le64(dlba),
+ .desfmt_prinfor = 2, /* DESFMT=2 */
+ .nr_range = nr_range - 1, /* 0's based */
+ }
+ };
+
+ range = kmalloc_array(nr_range, sizeof(*range),
+ GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN);
+ if (!range)
+ return BLK_STS_RESOURCE;
+
+ for (unsigned int i = 0; i < nr_range; i++) {
+ u64 slba;
+ u32 nslb;
+
+ if (!src_bio)
+ src_bio = blk_first_copy_bio(req, REQ_OP_COPY_SRC);
+ else
+ src_bio = blk_next_copy_bio(src_bio);
+ if (WARN_ON_ONCE(!src_bio))
+ goto free_range;
+ /* TO DO: derive src_ns from src_bio. */
+ src_ns = ns;
+ slba = nvme_sect_to_lba(src_ns->head,
+ src_bio->bi_iter.bi_sector);
+ nslb = src_bio->bi_iter.bi_size >> src_ns->head->lba_shift;
+ range[i].nsid = cpu_to_le32(src_ns->head->ns_id); /* requires DESFMT=2 */
+ range[i].slba = cpu_to_le64(slba);
+ range[i].nlb = cpu_to_le16(nslb - 1);
+ }
+
+ req->special_vec.bv_page = virt_to_page(range);
+ req->special_vec.bv_offset = offset_in_page(range);
+ req->special_vec.bv_len = sizeof(*range) * nr_range;
+ req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+ return BLK_STS_OK;
+
+free_range:
+ kfree(range);
+ return BLK_STS_IOERR;
+}
+
static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
@@ -1122,6 +1204,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
break;
+ case REQ_OP_COPY_DST:
+ case REQ_OP_COPY_SRC:
+ ret = nvme_setup_copy_offload(ns, req, cmd);
+ break;
default:
WARN_ON_ONCE(1);
return BLK_STS_IOERR;
@@ -1884,6 +1970,21 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
return true;
}
+static void nvme_config_copy(struct nvme_ns *ns, struct nvme_id_ns *id,
+ struct queue_limits *lim)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ if (!(ctrl->oncs & NVME_CTRL_ONCS_COPY)) {
+ lim->max_copy_hw_sectors = 0;
+ return;
+ }
+ lim->max_copy_hw_sectors = nvme_lba_to_sect(ns->head,
+ le16_to_cpu(id->mssrl));
+ lim->max_copy_src_segments = 256;
+ lim->max_copy_dst_segments = 1;
+}
+
static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
{
return uuid_equal(&a->uuid, &b->uuid) &&
@@ -2416,6 +2517,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (!nvme_update_disk_info(ns, id, nvm, &lim))
capacity = 0;
+ nvme_config_copy(ns, id, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
nvme_update_zone_info(ns, &lim, &zi);
@@ -2542,6 +2644,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
lim.physical_block_size = ns_lim->physical_block_size;
lim.io_min = ns_lim->io_min;
lim.io_opt = ns_lim->io_opt;
+ lim.max_copy_hw_sectors = UINT_MAX;
+ lim.max_copy_src_segments = U16_MAX;
+ lim.max_copy_dst_segments = U16_MAX;
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
if (unsupported)
@@ -5368,6 +5473,7 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_copy_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index ad25ad1e4041..7096ade7740c 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -153,6 +153,23 @@ static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10)
return ret;
}
+static const char *nvme_trace_copy(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 sdlba = get_unaligned_le64(cdw10);
+ u8 nr_range = get_unaligned_le16(cdw10 + 8);
+ u16 control = get_unaligned_le16(cdw10 + 10);
+ u32 dsmgmt = get_unaligned_le32(cdw10 + 12);
+ u32 reftag = get_unaligned_le32(cdw10 + 16);
+
+ trace_seq_printf(p,
+ "sdlba=%llu, nr_range=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u",
+ sdlba, nr_range, control, dsmgmt, reftag);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -386,6 +403,8 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
return nvme_trace_resv_rel(p, cdw10);
case nvme_cmd_resv_report:
return nvme_trace_resv_report(p, cdw10);
+ case nvme_cmd_copy:
+ return nvme_trace_copy(p, cdw10);
default:
return nvme_trace_common(p, cdw10);
}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 041f30931a90..ead8e5128e3b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -376,7 +376,7 @@ struct nvme_id_ctrl {
__u8 nvscc;
__u8 nwpc;
__le16 acwu;
- __u8 rsvd534[2];
+ __le16 ocfs;
__le32 sgls;
__le32 mnan;
__u8 rsvd544[224];
@@ -404,6 +404,7 @@ enum {
NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_ONCS_RESERVATIONS = 1 << 5,
NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
+ NVME_CTRL_ONCS_COPY = 1 << 8,
NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3,
@@ -458,7 +459,10 @@ struct nvme_id_ns {
__le16 npdg;
__le16 npda;
__le16 nows;
- __u8 rsvd74[18];
+ __le16 mssrl;
+ __le32 mcl;
+ __u8 msrc;
+ __u8 rsvd81[11];
__le32 anagrpid;
__u8 rsvd96[3];
__u8 nsattr;
@@ -967,6 +971,7 @@ enum nvme_opcode {
nvme_cmd_resv_acquire = 0x11,
nvme_cmd_io_mgmt_recv = 0x12,
nvme_cmd_resv_release = 0x15,
+ nvme_cmd_copy = 0x19,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d,
@@ -991,7 +996,8 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
- nvme_opcode_name(nvme_cmd_zone_append))
+ nvme_opcode_name(nvme_cmd_zone_append), \
+ nvme_opcode_name(nvme_cmd_copy))
@@ -1169,6 +1175,39 @@ struct nvme_dsm_range {
__le64 slba;
};
+struct nvme_copy_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 sdlba;
+ __u8 nr_range;
+ __u8 desfmt_prinfor;
+ __le16 control;
+ __le16 rsvd13;
+ __le16 dspec;
+ __le32 ilbrt;
+ __le16 lbat;
+ __le16 lbatm;
+};
+
+struct nvme_copy_range {
+ __le32 nsid; /* DESFMT=2 only */
+ __le32 rsvd1;
+ __le64 slba;
+ __le16 nlb;
+ __le16 rsvd18;
+ __le32 rsvd20;
+ __le32 eilbrt;
+ __le16 elbat;
+ __le16 elbatm;
+};
+
+static_assert(sizeof(struct nvme_copy_range) == 32);
+
struct nvme_write_zeroes_cmd {
__u8 opcode;
__u8 flags;
@@ -2001,6 +2040,7 @@ struct nvme_command {
struct nvme_download_firmware dlfw;
struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm;
+ struct nvme_copy_command copy;
struct nvme_write_zeroes_cmd write_zeroes;
struct nvme_zone_mgmt_send_cmd zms;
struct nvme_zone_mgmt_recv_cmd zmr;
next prev parent reply other threads:[~2026-04-24 22:42 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-24 22:41 [PATCH 00/12] Block storage copy offloading Bart Van Assche
2026-04-24 22:41 ` [PATCH 01/12] block: Introduce queue limits for " Bart Van Assche
2026-04-24 22:41 ` [PATCH 02/12] block: Add the REQ_OP_COPY_{SRC,DST} operations Bart Van Assche
2026-04-24 22:41 ` [PATCH 03/12] block: Introduce blkdev_copy_offload() Bart Van Assche
2026-04-24 22:41 ` [PATCH 04/12] block: Add an onloaded copy implementation Bart Van Assche
2026-04-24 22:41 ` [PATCH 05/12] block: Introduce accessor functions for copy offload bios Bart Van Assche
2026-04-24 22:41 ` [PATCH 06/12] fs/read_write: Generalize generic_copy_file_checks() Bart Van Assche
2026-04-24 22:41 ` [PATCH 07/12] fs, block: Add copy_file_range() support for block devices Bart Van Assche
2026-04-24 22:41 ` Bart Van Assche [this message]
2026-04-24 22:41 ` [PATCH 09/12] nvmet: Support the Copy command Bart Van Assche
2026-04-24 22:41 ` [PATCH 10/12] dm: Add support for copy offloading Bart Van Assche
2026-04-24 22:42 ` [PATCH 11/12] dm-linear: Enable " Bart Van Assche
2026-04-24 22:42 ` [PATCH 12/12] null_blk: Add support for REQ_OP_COPY_* Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260424224201.1949243-9-bvanassche@acm.org \
--to=bvanassche@acm.org \
--cc=anuj20.g@samsung.com \
--cc=axboe@kernel.dk \
--cc=hch@lst.de \
--cc=javier.gonz@samsung.com \
--cc=joshi.k@samsung.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=linux-scsi@vger.kernel.org \
--cc=nj.shetty@samsung.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox