linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/7] nvmet: support of Fused operations
@ 2024-09-12  6:42 Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 1/7] nvmet: add support of Compare command Dmitry Bogdanov
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

Hi,

This patchset introduces a support of Atomic Test & Set (ATS) primitive.
It's implemented by means of Compare And Write fused commands in nvme target.

That allows ESXi to use Linux NVME Target as VMFS datastore storage.

The first 3 patches add a single Compare command support.
4th patch an refactoring to have a common point in the command execution flow.
5,6 patches adds support of Fused commands.
And the last patch allows to send fused commands by nvme io-passthru tool
for testing purposes.

The patchset based on nvme-6.12.

Dmitry Bogdanov (7):
  nvmet: add support of Compare command
  nvmet: bdev: add support of Compare command
  nvmet: file: add support of Compare command
  Revert "nvmet: Open code nvmet_req_execute()"
  nvmet: add support of fused operations
  nvmet: make fused commands be atomic
  nvme: allow send fused commands

 drivers/nvme/host/ioctl.c         |   6 +-
 drivers/nvme/target/admin-cmd.c   |   6 +-
 drivers/nvme/target/core.c        | 391 +++++++++++++++++++++++++++++-
 drivers/nvme/target/fc.c          |   6 +-
 drivers/nvme/target/io-cmd-bdev.c |  34 ++-
 drivers/nvme/target/io-cmd-file.c |  21 ++
 drivers/nvme/target/loop.c        |   2 +-
 drivers/nvme/target/nvmet.h       |  16 +-
 drivers/nvme/target/rdma.c        |   4 +-
 drivers/nvme/target/tcp.c         |   4 +-
 include/linux/nvme.h              |   2 +
 11 files changed, 473 insertions(+), 19 deletions(-)

-- 
2.25.1



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/7] nvmet: add support of Compare command
  2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
@ 2024-09-12  6:42 ` Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 2/7] nvmet: bdev: " Dmitry Bogdanov
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

Report that Compare command is supported.
Make a common function for comparision of datas from the command and
from the backstore.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/nvme/target/admin-cmd.c |  4 ++-
 drivers/nvme/target/core.c      | 55 +++++++++++++++++++++++++++++++++
 drivers/nvme/target/nvmet.h     |  2 ++
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index d64480f01f4a..e555b9efebfb 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -176,6 +176,7 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
 	log->iocs[nvme_cmd_read] =
 	log->iocs[nvme_cmd_flush] =
 	log->iocs[nvme_cmd_dsm]	=
+	log->iocs[nvme_cmd_compare] =
 		cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
 	log->iocs[nvme_cmd_write] =
 	log->iocs[nvme_cmd_write_zeroes] =
@@ -433,7 +434,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
 	id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
 	id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
-			NVME_CTRL_ONCS_WRITE_ZEROES);
+			NVME_CTRL_ONCS_WRITE_ZEROES |
+			NVME_CTRL_ONCS_COMPARE);
 
 	/* XXX: don't report vwc if the underlying device is write through */
 	id->vwc = NVME_CTRL_VWC_PRESENT;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ed2424f8a396..b3194d66f7dc 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -751,6 +751,59 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
 	req->cqe->status |= cpu_to_le16(1 << 14);
 }
 
+int nvmet_compare_sg(struct nvmet_req *req)
+{
+	unsigned int data_len = nvmet_rw_data_len(req);
+	unsigned int offset = 0;
+	struct scatterlist *sg;
+	unsigned int len;
+	u8 *datap = NULL;
+	u8 *buf = NULL;
+	int ret = 0;
+	int i;
+
+	buf = kmalloc(data_len, GFP_ATOMIC);
+	if (!buf)
+		return NVME_SC_INTERNAL;
+
+	/* Copy READ data to the buffer */
+	ret = nvmet_copy_from_sgl(req, 0, buf, data_len);
+	if (ret)
+		goto error;
+
+	/* Compare on-disk data with the data provided by the initiator */
+	for_each_sg(req->cmp_sg, sg, req->sg_cnt, i) {
+		datap = kmap_atomic(sg_page(sg));
+		if (!datap) {
+			ret = NVME_SC_INTERNAL;
+			goto error;
+		}
+
+		len = min_t(unsigned int, sg->length, data_len);
+
+		if (memcmp(datap, buf + offset, len) != 0) {
+			ret = NVME_SC_COMPARE_FAILED;
+			goto error;
+		}
+
+		kunmap_atomic(datap);
+
+		offset += len;
+		if (offset == data_len)
+			break;
+	}
+
+	kfree(buf);
+
+	return 0;
+error:
+	if (datap)
+		kunmap_atomic(datap);
+
+	kfree(buf);
+	return ret;
+}
+
 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 {
 	struct nvmet_ns *ns = req->ns;
@@ -887,6 +940,7 @@ static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
 		switch (req->cmd->common.opcode) {
 		case nvme_cmd_read:
 		case nvme_cmd_flush:
+		case nvme_cmd_compare:
 			break;
 		default:
 			return NVME_SC_NS_WRITE_PROTECTED;
@@ -953,6 +1007,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 	req->sq = sq;
 	req->ops = ops;
 	req->sg = NULL;
+	req->cmp_sg = NULL;
 	req->metadata_sg = NULL;
 	req->sg_cnt = 0;
 	req->metadata_sg_cnt = 0;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 190f55e6d753..8a26b8b4dc63 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -373,6 +373,7 @@ struct nvmet_req {
 	struct nvmet_ns		*ns;
 	struct scatterlist	*sg;
 	struct scatterlist	*metadata_sg;
+	struct scatterlist	*cmp_sg;
 	struct bio_vec		inline_bvec[NVMET_MAX_INLINE_BIOVEC];
 	union {
 		struct {
@@ -602,6 +603,7 @@ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
 void nvmet_file_ns_revalidate(struct nvmet_ns *ns);
 bool nvmet_ns_revalidate(struct nvmet_ns *ns);
 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
+int nvmet_compare_sg(struct nvmet_req *req);
 
 bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
 void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/7] nvmet: bdev: add support of Compare command
  2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 1/7] nvmet: add support of Compare command Dmitry Bogdanov
@ 2024-09-12  6:42 ` Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 3/7] nvmet: file: " Dmitry Bogdanov
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

Add handling of Compare command for block device backend.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/nvme/target/io-cmd-bdev.c | 34 ++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 0bda83d0fc3e..0bbe95585dcd 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -183,8 +183,19 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
 static void nvmet_bio_done(struct bio *bio)
 {
 	struct nvmet_req *req = bio->bi_private;
+	u16 status;
+
+	status = blk_to_nvme_status(req, bio->bi_status);
+
+	if (req->cmd->rw.opcode == nvme_cmd_compare) {
+		if (likely(!status))
+			status = nvmet_compare_sg(req);
+
+		sgl_free(req->sg);
+		req->sg = req->cmp_sg;
+	}
 
-	nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
+	nvmet_req_complete(req, status);
 	nvmet_req_bio_put(req, bio);
 }
 
@@ -262,14 +273,30 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		return;
 	}
 
-	if (req->cmd->rw.opcode == nvme_cmd_write) {
+	switch (req->cmd->rw.opcode) {
+	case nvme_cmd_write:
 		opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
 		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
 			opf |= REQ_FUA;
 		iter_flags = SG_MITER_TO_SG;
-	} else {
+		break;
+	case nvme_cmd_compare:
 		opf = REQ_OP_READ;
 		iter_flags = SG_MITER_FROM_SG;
+
+		req->cmp_sg = req->sg;
+		req->sg = sgl_alloc(nvmet_rw_data_len(req), GFP_KERNEL,
+					&req->sg_cnt);
+
+		if (unlikely(!req->sg || WARN_ON(sg_cnt != req->sg_cnt))) {
+			nvmet_req_complete(req, NVME_SC_INTERNAL);
+			return;
+		}
+		break;
+	default:
+		opf = REQ_OP_READ;
+		iter_flags = SG_MITER_FROM_SG;
+		break;
 	}
 
 	if (is_pci_p2pdma_page(sg_page(req->sg)))
@@ -458,6 +485,7 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
 	switch (req->cmd->common.opcode) {
 	case nvme_cmd_read:
 	case nvme_cmd_write:
+	case nvme_cmd_compare:
 		req->execute = nvmet_bdev_execute_rw;
 		if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
 			req->metadata_len = nvmet_rw_metadata_len(req);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/7] nvmet: file: add support of Compare command
  2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 1/7] nvmet: add support of Compare command Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 2/7] nvmet: bdev: " Dmitry Bogdanov
@ 2024-09-12  6:42 ` Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 4/7] Revert "nvmet: Open code nvmet_req_execute()" Dmitry Bogdanov
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

Add handling of Compare command in file backend.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/nvme/target/io-cmd-file.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 2d068439b129..27ceb1120d6a 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -114,6 +114,15 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret)
 
 	if (unlikely(ret != req->transfer_len))
 		status = errno_to_nvme_status(req, ret);
+
+	if (req->cmd->rw.opcode == nvme_cmd_compare) {
+		if (likely(!status))
+			status = nvmet_compare_sg(req);
+
+		sgl_free(req->sg);
+		req->sg = req->cmp_sg;
+	}
+
 	nvmet_req_complete(req, status);
 }
 
@@ -227,6 +236,17 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
 		return;
 	}
 
+	if (req->cmd->rw.opcode == nvme_cmd_compare) {
+		req->cmp_sg = req->sg;
+		req->sg = sgl_alloc(nvmet_rw_data_len(req), GFP_KERNEL,
+					&req->sg_cnt);
+
+		if (unlikely(!req->sg || WARN_ON(nr_bvec != req->sg_cnt))) {
+			nvmet_req_complete(req, NVME_SC_INTERNAL);
+			return;
+		}
+	}
+
 	if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
 		req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
 				GFP_KERNEL);
@@ -365,6 +385,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
 	switch (req->cmd->common.opcode) {
 	case nvme_cmd_read:
 	case nvme_cmd_write:
+	case nvme_cmd_compare:
 		req->execute = nvmet_file_execute_rw;
 		return 0;
 	case nvme_cmd_flush:
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/7] Revert "nvmet: Open code nvmet_req_execute()"
  2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
                   ` (2 preceding siblings ...)
  2024-09-12  6:42 ` [PATCH 3/7] nvmet: file: " Dmitry Bogdanov
@ 2024-09-12  6:42 ` Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 5/7] nvmet: add support of fused operations Dmitry Bogdanov
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

This reverts commit be3f3114ddd58d12f64b872247bb1bc46df56b36.

Common nvmet_req_execute() is needed for fused commands support.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/nvme/target/core.c  | 6 ++++++
 drivers/nvme/target/fc.c    | 4 ++--
 drivers/nvme/target/loop.c  | 2 +-
 drivers/nvme/target/nvmet.h | 1 +
 drivers/nvme/target/rdma.c  | 4 ++--
 drivers/nvme/target/tcp.c   | 4 ++--
 6 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b3194d66f7dc..b7b2bf7b460c 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1098,6 +1098,12 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
 	return true;
 }
 
+void nvmet_req_execute(struct nvmet_req *req)
+{
+	req->execute(req);
+}
+EXPORT_SYMBOL_GPL(nvmet_req_execute);
+
 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
 {
 	return req->transfer_len - req->metadata_len;
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 3ef4beacde32..a77f0e05c174 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2392,7 +2392,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
 		}
 
 		/* data transfer complete, resume with nvmet layer */
-		fod->req.execute(&fod->req);
+		nvmet_req_execute(&fod->req);
 		break;
 
 	case NVMET_FCOP_READDATA:
@@ -2603,7 +2603,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	 * can invoke the nvmet_layer now. If read data, cmd completion will
 	 * push the data
 	 */
-	fod->req.execute(&fod->req);
+	nvmet_req_execute(&fod->req);
 	return;
 
 transport_error:
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e32790d8fc26..18c80ff6d7b1 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -125,7 +125,7 @@ static void nvme_loop_execute_work(struct work_struct *work)
 	struct nvme_loop_iod *iod =
 		container_of(work, struct nvme_loop_iod, work);
 
-	iod->req.execute(&iod->req);
+	nvmet_req_execute(&iod->req);
 }
 
 static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8a26b8b4dc63..41e4eec9a251 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -480,6 +480,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 void nvmet_req_uninit(struct nvmet_req *req);
 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
+void nvmet_req_execute(struct nvmet_req *req);
 void nvmet_req_complete(struct nvmet_req *req, u16 status);
 int nvmet_req_alloc_sgls(struct nvmet_req *req);
 void nvmet_req_free_sgls(struct nvmet_req *req);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 1b6264fa5803..69fd6d7063da 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -773,7 +773,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
 	if (unlikely(status))
 		nvmet_req_complete(&rsp->req, status);
 	else
-		rsp->req.execute(&rsp->req);
+		nvmet_req_execute(&rsp->req);
 }
 
 static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -958,7 +958,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
 				queue->cm_id->port_num, &rsp->read_cqe, NULL))
 			nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
 	} else {
-		rsp->req.execute(&rsp->req);
+		nvmet_req_execute(&rsp->req);
 	}
 
 	return true;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 5bff0d5464d1..ac6cd73b6e91 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -598,7 +598,7 @@ static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
 	if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED))
 		nvmet_tcp_queue_response(&cmd->req);
 	else
-		cmd->req.execute(&cmd->req);
+		nvmet_req_execute(&cmd->req);
 }
 
 static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
@@ -1104,7 +1104,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 		goto out;
 	}
 
-	queue->cmd->req.execute(&queue->cmd->req);
+	nvmet_req_execute(&queue->cmd->req);
 out:
 	nvmet_prepare_receive_pdu(queue);
 	return ret;
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/7] nvmet: add support of fused operations
  2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
                   ` (3 preceding siblings ...)
  2024-09-12  6:42 ` [PATCH 4/7] Revert "nvmet: Open code nvmet_req_execute()" Dmitry Bogdanov
@ 2024-09-12  6:42 ` Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 6/7] nvmet: make fused commands be atomic Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 7/7] nvme: allow send fused commands Dmitry Bogdanov
  6 siblings, 0 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

Add support of Compare And Write fused operation.
There can be several different Fused pairs inflight.
Atomicity is garanteed for just one logical block.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/nvme/target/admin-cmd.c |   2 +
 drivers/nvme/target/core.c      | 326 +++++++++++++++++++++++++++++++-
 drivers/nvme/target/fc.c        |   2 +-
 drivers/nvme/target/nvmet.h     |  12 +-
 include/linux/nvme.h            |   2 +
 5 files changed, 338 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index e555b9efebfb..b1ca1e371212 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -437,6 +437,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 			NVME_CTRL_ONCS_WRITE_ZEROES |
 			NVME_CTRL_ONCS_COMPARE);
 
+	id->fuses = cpu_to_le16(NVME_CTRL_FUSES_COMPARE_AND_WRITE);
+
 	/* XXX: don't report vwc if the underlying device is write through */
 	id->vwc = NVME_CTRL_VWC_PRESENT;
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b7b2bf7b460c..b4fb66037e45 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -804,9 +804,98 @@ int nvmet_compare_sg(struct nvmet_req *req)
 	return ret;
 }
 
+static void nvmet_fused_failed(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, fused_work);
+
+	nvmet_req_complete(req, NVME_SC_FUSED_FAIL);
+}
+
+static void nvmet_fused_succeeded(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, fused_work);
+
+	req->execute(req);
+}
+
+static void __nvmet_fused_req_complete(struct nvmet_req *req,
+				       struct nvmet_req *fused_req,
+				       u16 status)
+{
+	struct nvmet_req *first_req, *second_req;
+	unsigned long flags;
+
+	if (req->cmd->common.flags & NVME_CMD_FUSE_FIRST) {
+		first_req = req;
+		second_req = fused_req;
+	} else {
+		first_req = fused_req;
+		second_req = req;
+	}
+
+	spin_lock_irqsave(&first_req->fused_lock, flags);
+	spin_lock(&second_req->fused_lock);
+
+	req->fused_state = NVMET_FUSED_STATE_COMPLETED;
+
+	switch (fused_req->fused_state) {
+	case NVMET_FUSED_STATE_INIT:
+		fallthrough;
+	case NVMET_FUSED_STATE_FAILED:
+		/* if second command is not yet executing then the first
+		 * was failed, so fail the second one
+		 */
+		WARN_ON(!status);
+		fused_req->fused_state = NVMET_FUSED_STATE_FAILED;
+		/* wait for fused request complete/execute */
+		spin_unlock(&second_req->fused_lock);
+		spin_unlock_irqrestore(&first_req->fused_lock, flags);
+		return;
+	case NVMET_FUSED_STATE_EXECUTING:
+		/* The second fused command is waiting for completion of the first
+		 * command. Continue handling of the second command depending on
+		 * the completion status.
+		 */
+		if (status)
+			INIT_WORK(&fused_req->fused_work, nvmet_fused_failed);
+		else
+			INIT_WORK(&fused_req->fused_work, nvmet_fused_succeeded);
+
+		queue_work(nvmet_wq, &fused_req->fused_work);
+
+		spin_unlock(&second_req->fused_lock);
+		spin_unlock_irqrestore(&first_req->fused_lock, flags);
+
+		return;
+	case NVMET_FUSED_STATE_COMPLETED:
+		/* both fused command completed - send responses */
+		spin_unlock(&second_req->fused_lock);
+		spin_unlock_irqrestore(&first_req->fused_lock, flags);
+
+		if (first_req->ns)
+			nvmet_put_namespace(first_req->ns);
+		first_req->ops->queue_response(first_req);
+
+		if (second_req->ns)
+			nvmet_put_namespace(second_req->ns);
+		second_req->ops->queue_response(second_req);
+
+		return;
+	}
+	/* must not be here */
+	WARN_ON(1);
+	spin_unlock(&second_req->fused_lock);
+	spin_unlock_irqrestore(&first_req->fused_lock, flags);
+
+	if (req->ns)
+		nvmet_put_namespace(req->ns);
+	req->ops->queue_response(req);
+}
+
 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 {
 	struct nvmet_ns *ns = req->ns;
+	unsigned long flags;
 
 	if (!req->sq->sqhd_disabled)
 		nvmet_update_sq_head(req);
@@ -818,6 +907,62 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 
 	trace_nvmet_req_complete(req);
 
+	if (unlikely(req->cmd->common.flags & NVME_CMD_FUSE_FIRST)) {
+		struct nvmet_req *fused_req = NULL;
+
+		spin_lock_irqsave(&req->sq->fused_lock, flags);
+		if (req->sq->first_fused_req == req) {
+			req->sq->first_fused_req = NULL;
+			spin_unlock_irqrestore(&req->sq->fused_lock, flags);
+			/* There is no the second fused command yet, so just
+			 * complete this one. The second command will fail with
+			 * MISSING code because sq->first_fused_req is NULL..
+			 */
+			goto queue_resp;
+		}
+		/* This command is not the first fused waiting for the second
+		 * fused, either the second is received or there was no second
+		 * and this one is failed.
+		 * req->fused_pair is safe to read without spin_lock, because
+		 * sq->first_fused_req != req and req->fused_pair will not set.
+		 */
+		fused_req = req->fused_pair;
+		if (!fused_req) {
+			spin_unlock_irqrestore(&req->sq->fused_lock, flags);
+			/* There is no the second fused command yet, so just
+			 * complete this one.
+			 */
+			goto queue_resp;
+		}
+		spin_unlock_irqrestore(&req->sq->fused_lock, flags);
+
+		/* There is the second fused command */
+
+		__nvmet_fused_req_complete(req, fused_req, status);
+
+		return;
+	} else if (unlikely(req->cmd->common.flags & NVME_CMD_FUSE_SECOND)) {
+		struct nvmet_req *fused_req = NULL;
+
+		spin_lock_irqsave(&req->fused_lock, flags);
+		if (!req->fused_pair) {
+			/* There is no the first fused command, so just
+			 * complete this one
+			 */
+			spin_unlock_irqrestore(&req->fused_lock, flags);
+			goto queue_resp;
+		}
+		fused_req = req->fused_pair;
+		req->fused_state = NVMET_FUSED_STATE_COMPLETED;
+		spin_unlock_irqrestore(&req->fused_lock, flags);
+
+		__nvmet_fused_req_complete(req, fused_req, status);
+
+		/* unlock LBA */
+		return;
+	}
+
+queue_resp:
 	req->ops->queue_response(req);
 	if (ns)
 		nvmet_put_namespace(ns);
@@ -866,6 +1011,11 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
 	 */
 	if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
 		nvmet_async_events_failall(ctrl);
+
+	/* Complete the first fused command waiting for the second one */
+	if (sq->first_fused_req)
+		nvmet_req_complete(sq->first_fused_req, NVME_SC_FUSED_MISSING);
+
 	percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
 	wait_for_completion(&sq->confirm_done);
 	wait_for_completion(&sq->free_done);
@@ -915,6 +1065,7 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 	init_completion(&sq->free_done);
 	init_completion(&sq->confirm_done);
 	nvmet_auth_sq_init(sq);
+	spin_lock_init(&sq->fused_lock);
 
 	return 0;
 }
@@ -950,6 +1101,107 @@ static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
 	return 0;
 }
 
+/* Fail the first fused command that does not have a fused pair */
+static void nvmet_fail_first_fused(struct nvmet_req *req)
+{
+	spin_lock(&req->fused_lock);
+	if (req->fused_state == NVMET_FUSED_STATE_INIT) {
+		/* command still collects a data, just set state */
+		req->fused_state = NVMET_FUSED_STATE_FAILED;
+		spin_unlock(&req->fused_lock);
+		return;
+	} else if (req->fused_state == NVMET_FUSED_STATE_EXECUTING) {
+		req->fused_state = NVMET_FUSED_STATE_FAILED;
+		spin_unlock(&req->fused_lock);
+		nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+		return;
+	}
+	spin_unlock(&req->fused_lock);
+}
+
+/* This function is called always in the same thread,
+ * but req->sq->first_fused_req and req->fused_* are used in different threads.
+ * Therefore locks are used to synchronize an access to them.
+ */
+static u16 nvmet_cmd_check_fused(struct nvmet_req *req)
+{
+	struct nvmet_req *first_req;
+
+	if (likely(!(req->cmd->common.flags & NVME_CMD_FUSE_MASK))) {
+		if (unlikely(req->sq->first_fused_req)) {
+			spin_lock(&req->sq->fused_lock);
+			first_req = req->sq->first_fused_req;
+			if (req->sq->first_fused_req) {
+				req->sq->first_fused_req = NULL;
+				spin_unlock(&req->sq->fused_lock);
+				nvmet_fail_first_fused(first_req);
+			} else {
+				spin_unlock(&req->sq->fused_lock);
+			}
+		}
+		return 0;
+	} else if ((req->cmd->common.flags & NVME_CMD_FUSE_MASK) == NVME_CMD_FUSE_MASK) {
+		return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+	}
+
+	spin_lock(&req->sq->fused_lock);
+	first_req = req->sq->first_fused_req;
+	if (req->cmd->common.flags & NVME_CMD_FUSE_FIRST) {
+		if (first_req) {
+			req->sq->first_fused_req = NULL;
+			spin_unlock(&req->sq->fused_lock);
+
+			nvmet_fail_first_fused(first_req);
+
+			spin_lock(&req->sq->fused_lock);
+		}
+
+		if (req->cmd->common.opcode != nvme_cmd_compare) {
+			spin_unlock(&req->sq->fused_lock);
+			return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+		}
+
+		/* only one logical block is supported for Compare And Write */
+		if (req->cmd->rw.length > 0) {
+			spin_unlock(&req->sq->fused_lock);
+			return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+		}
+
+		req->sq->first_fused_req = req;
+	} else if (unlikely(req->cmd->common.flags & NVME_CMD_FUSE_SECOND)) {
+		if (!first_req) {
+			spin_unlock(&req->sq->fused_lock);
+			return NVME_SC_FUSED_MISSING;
+		}
+
+		if (req->cmd->common.opcode != nvme_cmd_write) {
+			req->sq->first_fused_req = NULL;
+			spin_unlock(&req->sq->fused_lock);
+
+			nvmet_fail_first_fused(first_req);
+			return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+		}
+
+		if (req->cmd->rw.length > 0 ||
+		    req->cmd->rw.slba != first_req->cmd->rw.slba) {
+			req->sq->first_fused_req = NULL;
+			spin_unlock(&req->sq->fused_lock);
+
+			nvmet_fail_first_fused(first_req);
+			return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+		}
+
+		req->fused_pair = first_req;
+		first_req->fused_pair = req;
+		req->sq->first_fused_req = NULL;
+	}
+
+	req->fused_state = NVMET_FUSED_STATE_INIT;
+	spin_unlock(&req->sq->fused_lock);
+
+	return 0;
+}
+
 static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
@@ -1019,11 +1272,14 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 	req->ns = NULL;
 	req->error_loc = NVMET_NO_ERROR_LOC;
 	req->error_slba = 0;
+	req->fused_pair = NULL;
+
+	spin_lock_init(&req->fused_lock);
 
-	/* no support for fused commands yet */
-	if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
+
+	status = nvmet_cmd_check_fused(req);
+	if (unlikely(status)) {
 		req->error_loc = offsetof(struct nvme_common_command, flags);
-		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
 		goto fail;
 	}
 
@@ -1100,7 +1356,69 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
 
 void nvmet_req_execute(struct nvmet_req *req)
 {
-	req->execute(req);
+	if (likely(!(req->cmd->common.flags & NVME_CMD_FUSE_MASK))) {
+		req->execute(req);
+		return;
+	}
+
+	if (req->cmd->common.flags & NVME_CMD_FUSE_FIRST) {
+		spin_lock(&req->fused_lock);
+		if (req->fused_state == NVMET_FUSED_STATE_FAILED) {
+			spin_unlock(&req->fused_lock);
+			nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+			return;
+		}
+
+		req->fused_state = NVMET_FUSED_STATE_EXECUTING;
+
+		if (req->fused_pair) {
+			spin_lock(&req->fused_pair->fused_lock);
+			if (req->fused_pair->fused_state == NVMET_FUSED_STATE_EXECUTING) {
+				spin_unlock(&req->fused_pair->fused_lock);
+				spin_unlock(&req->fused_lock);
+				req->execute(req);
+				return;
+			}
+			spin_unlock(&req->fused_pair->fused_lock);
+		}
+		spin_unlock(&req->fused_lock);
+		/* wait for both fused command ready to execute */
+		return;
+	} else if (req->cmd->common.flags & NVME_CMD_FUSE_SECOND) {
+		struct nvmet_req *first_req = NULL;
+
+		spin_lock(&req->fused_lock);
+		if (!req->fused_pair) {
+			spin_unlock(&req->fused_lock);
+			nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+			return;
+		}
+
+		if (req->fused_state == NVMET_FUSED_STATE_FAILED) {
+			spin_unlock(&req->fused_lock);
+			nvmet_req_complete(req, NVME_SC_FUSED_MISSING);
+			return;
+		}
+		first_req = req->fused_pair;
+		spin_unlock(&req->fused_lock);
+
+		/* take locks in the same order */
+		spin_lock(&first_req->fused_lock);
+		spin_lock(&req->fused_lock);
+		req->fused_state = NVMET_FUSED_STATE_EXECUTING;
+
+		if (first_req->fused_state == NVMET_FUSED_STATE_EXECUTING) {
+			spin_unlock(&req->fused_lock);
+			spin_unlock(&first_req->fused_lock);
+			/* both fused command are ready to execute */
+			first_req->execute(first_req);
+			return;
+		}
+
+		spin_unlock(&req->fused_lock);
+		spin_unlock(&first_req->fused_lock);
+		/* wait for both fused command ready to execute */
+	}
 }
 EXPORT_SYMBOL_GPL(nvmet_req_execute);
 
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index a77f0e05c174..804c2d0be6bc 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2186,7 +2186,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
 	    nvme_is_fabrics((struct nvme_command *) sqe) ||
 	    xfr_length != fod->req.transfer_len ||
 	    (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] ||
-	    (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) ||
+	    (sqe->flags & NVME_CMD_FUSE_MASK) ||
 	    queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head)))
 		send_ersp = true;
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 41e4eec9a251..1278b81c85a7 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -102,6 +102,7 @@ struct nvmet_cq {
 	u16			size;
 };
 
+struct nvmet_req;
 struct nvmet_sq {
 	struct nvmet_ctrl	*ctrl;
 	struct percpu_ref	ref;
@@ -124,6 +125,8 @@ struct nvmet_sq {
 #endif
 	struct completion	free_done;
 	struct completion	confirm_done;
+	struct nvmet_req	*first_fused_req;
+	spinlock_t		fused_lock;
 };
 
 struct nvmet_ana_group {
@@ -340,7 +343,6 @@ struct nvmet_subsys_link {
 	struct nvmet_subsys	*subsys;
 };
 
-struct nvmet_req;
 struct nvmet_fabrics_ops {
 	struct module *owner;
 	unsigned int type;
@@ -374,6 +376,14 @@ struct nvmet_req {
 	struct scatterlist	*sg;
 	struct scatterlist	*metadata_sg;
 	struct scatterlist	*cmp_sg;
+	struct nvmet_req	*fused_pair;
+	struct work_struct	fused_work;
+	spinlock_t		fused_lock;
+	int			fused_state;
+#define NVMET_FUSED_STATE_INIT		0
+#define NVMET_FUSED_STATE_EXECUTING	1
+#define NVMET_FUSED_STATE_COMPLETED	2
+#define NVMET_FUSED_STATE_FAILED	3
 	struct bio_vec		inline_bvec[NVMET_MAX_INLINE_BIOVEC];
 	union {
 		struct {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b58d9405d65e..af6fbff74dcc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -374,6 +374,7 @@ enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_ONCS_RESERVATIONS		= 1 << 5,
 	NVME_CTRL_ONCS_TIMESTAMP		= 1 << 6,
+	NVME_CTRL_FUSES_COMPARE_AND_WRITE	= 1 << 0,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
 	NVME_CTRL_OACS_NS_MNGT_SUPP		= 1 << 3,
@@ -949,6 +950,7 @@ union nvme_data_ptr {
 enum {
 	NVME_CMD_FUSE_FIRST	= (1 << 0),
 	NVME_CMD_FUSE_SECOND	= (1 << 1),
+	NVME_CMD_FUSE_MASK	= NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND,
 
 	NVME_CMD_SGL_METABUF	= (1 << 6),
 	NVME_CMD_SGL_METASEG	= (1 << 7),
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 6/7] nvmet: make fused commands be atomic
  2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
                   ` (4 preceding siblings ...)
  2024-09-12  6:42 ` [PATCH 5/7] nvmet: add support of fused operations Dmitry Bogdanov
@ 2024-09-12  6:42 ` Dmitry Bogdanov
  2024-09-12  6:42 ` [PATCH 7/7] nvme: allow send fused commands Dmitry Bogdanov
  6 siblings, 0 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

Ensure that the fused commands are executed in atomic manner. One Fused
pair per whole namespace.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/nvme/target/core.c  | 6 ++++++
 drivers/nvme/target/nvmet.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index d5cc760ba2f4..1ab6d8f7a7b1 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -705,6 +705,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 	uuid_gen(&ns->uuid);
 	ns->buffered_io = false;
 	ns->csi = NVME_CSI_NVM;
+	sema_init(&ns->caw_lock, 1);
 
 	return ns;
 }
@@ -872,6 +873,8 @@ static void __nvmet_fused_req_complete(struct nvmet_req *req,
 		spin_unlock(&second_req->fused_lock);
 		spin_unlock_irqrestore(&first_req->fused_lock, flags);
 
+		up(&req->ns->caw_lock);
+
 		if (first_req->ns)
 			nvmet_put_namespace(first_req->ns);
 		first_req->ops->queue_response(first_req);
@@ -1376,6 +1379,8 @@ void nvmet_req_execute(struct nvmet_req *req)
 			if (req->fused_pair->fused_state == NVMET_FUSED_STATE_EXECUTING) {
 				spin_unlock(&req->fused_pair->fused_lock);
 				spin_unlock(&req->fused_lock);
+
+				down(&req->ns->caw_lock);
 				req->execute(req);
 				return;
 			}
@@ -1411,6 +1416,7 @@ void nvmet_req_execute(struct nvmet_req *req)
 			spin_unlock(&req->fused_lock);
 			spin_unlock(&first_req->fused_lock);
 			/* both fused command are ready to execute */
+			down(&req->ns->caw_lock);
 			first_req->execute(first_req);
 			return;
 		}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 1278b81c85a7..1dc9814c7a8c 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -85,6 +85,7 @@ struct nvmet_ns {
 	int			pi_type;
 	int			metadata_size;
 	u8			csi;
+	struct semaphore	caw_lock;
 };
 
 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 7/7] nvme: allow send fused commands
  2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
                   ` (5 preceding siblings ...)
  2024-09-12  6:42 ` [PATCH 6/7] nvmet: make fused commands be atomic Dmitry Bogdanov
@ 2024-09-12  6:42 ` Dmitry Bogdanov
  2024-09-12 16:36   ` Caleb Sander
  6 siblings, 1 reply; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-12  6:42 UTC (permalink / raw)
  To: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart
  Cc: linux-nvme, linux, Dmitry Bogdanov

Allow a user to pass fused flags for the commands.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/nvme/host/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 1d769c842fbf..affedac90f04 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -220,7 +220,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
-	if (io.flags)
+	if (io.flags & 0xFC)
 		return -EINVAL;
 
 	switch (io.opcode) {
@@ -299,7 +299,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 
 	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
-	if (cmd.flags)
+	if (cmd.flags & 0xFC)
 		return -EINVAL;
 	if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
 		return -EINVAL;
@@ -346,7 +346,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 
 	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
-	if (cmd.flags)
+	if (cmd.flags & 0xFC)
 		return -EINVAL;
 	if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
 		return -EINVAL;
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 7/7] nvme: allow send fused commands
  2024-09-12  6:42 ` [PATCH 7/7] nvme: allow send fused commands Dmitry Bogdanov
@ 2024-09-12 16:36   ` Caleb Sander
  2024-09-13 11:20     ` Dmitry Bogdanov
  0 siblings, 1 reply; 10+ messages in thread
From: Caleb Sander @ 2024-09-12 16:36 UTC (permalink / raw)
  To: Dmitry Bogdanov
  Cc: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart,
	linux-nvme, linux

There is prior discussion about exactly this change at
https://github.com/linux-nvme/nvme-cli/issues/318. The concerns raised
there still appear valid 6 years later. The main issue is that this
interface requires 2 separate ioctls/io_uring operations to submit a
Fused Compare and Write. That means the 2 commands could be submitted
to different I/O queues, which wouldn't work. Not to mention that 2
threads are required in order to submit the commands concurrently if
the blocking ioctls are used. Additionally, another application using
the NVMe controller concurrently could submit a command in between the
fused commands, which would also break the Fused Compare and Write.

FYI there was a set of patches sent to the mailing list
https://lore.kernel.org/linux-nvme/20210105224939.1336-1-clay.mayers@kioxia.com/T/#u
that attempted to implement a new passthru ioctl for submitting fused
commands. But it adds memory overhead to each NVMe request and there
are a lot of corner cases to consider around error handling.

As an implementer of Fused Compare and Write on the target side, I can
say the concept of "fused commands" is pretty horrible to work with.
It breaks the model where each NVMe command is an independent
operation and requires a ton of additional error propagation logic.
Not to mention that the NSID, SLBA, and NLB parameters are now
duplicated between the 2 commands... As far as I can tell, the only
reason to make the operation 2 commands is to allow a separate PRP/SGL
for each command. I guess this avoids the memcpy that would be
required if a single concatenated data buffer was used? But at what
cost???

On Wed, Sep 11, 2024 at 11:45 PM Dmitry Bogdanov <d.bogdanov@yadro.com> wrote:
>
> Allow a user to pass fused flags for the commands.
>
> Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
> ---
>  drivers/nvme/host/ioctl.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
> index 1d769c842fbf..affedac90f04 100644
> --- a/drivers/nvme/host/ioctl.c
> +++ b/drivers/nvme/host/ioctl.c
> @@ -220,7 +220,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
>
>         if (copy_from_user(&io, uio, sizeof(io)))
>                 return -EFAULT;
> -       if (io.flags)
> +       if (io.flags & 0xFC)
>                 return -EINVAL;
>
>         switch (io.opcode) {
> @@ -299,7 +299,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
>
>         if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
>                 return -EFAULT;
> -       if (cmd.flags)
> +       if (cmd.flags & 0xFC)
>                 return -EINVAL;
>         if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
>                 return -EINVAL;
> @@ -346,7 +346,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
>
>         if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
>                 return -EFAULT;
> -       if (cmd.flags)
> +       if (cmd.flags & 0xFC)
>                 return -EINVAL;
>         if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
>                 return -EINVAL;
> --
> 2.25.1
>
>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 7/7] nvme: allow send fused commands
  2024-09-12 16:36   ` Caleb Sander
@ 2024-09-13 11:20     ` Dmitry Bogdanov
  0 siblings, 0 replies; 10+ messages in thread
From: Dmitry Bogdanov @ 2024-09-13 11:20 UTC (permalink / raw)
  To: Caleb Sander
  Cc: Christoph Hellwig, Sagi Grimberg, Chaitanya Kulkarni, James Smart,
	linux-nvme, linux

Hi,

> There is prior discussion about exactly this change at
> https://github.com/linux-nvme/nvme-cli/issues/318. The concerns raised
> there still appear valid 6 years later. The main issue is that this
> interface requires 2 separate ioctls/io_uring operations to submit a
> Fused Compare and Write. That means the 2 commands could be submitted
> to different I/O queues, which wouldn't work. Not to mention that 2
> threads are required in order to submit the commands concurrently if
> the blocking ioctls are used. Additionally, another application using
> the NVMe controller concurrently could submit a command in between the
> fused commands, which would also break the Fused Compare and Write.

This patch is not for a production usage of Fused commands from the host,
but instead it is just for the purpose to test the target side. Of course,
it has some limitations like working only with one IO queue and one
user, but all of that is under the user's control:

# nvme connect-all -i 1 -t tcp 
# nvme io-passthru --o 0x5 -n 1 -f 0x1 -w --cdw10=0 --cdw12=0x0 -l 4096 -i ./4k0 /dev/nvme0n1 & nvme io-passthru --o 0x1 -n 1 -f 0x2 -w --cdw10=0 --cdw12=0x0 -l 4096 -i ./4k1 /dev/nvme0n1
NVMe command result:00000000
NVMe command result:00000000

And the patch provides enough kernel functionality to test the target side.

> 
> FYI there was a set of patches sent to the mailing list
> https://lore.kernel.org/linux-nvme/20210105224939.1336-1-clay.mayers@kioxia.com/T/#u
> that attempted to implement a new passthru ioctl for submitting fused
> commands. But it adds memory overhead to each NVMe request and there
> are a lot of corner cases to consider around error handling.
> 
> As an implementer of Fused Compare and Write on the target side, I can
> say the concept of "fused commands" is pretty horrible to work with.
> It breaks the model where each NVMe command is an independent
> operation and requires a ton of additional error propagation logic.
> Not to mention that the NSID, SLBA, and NLB parameters are now
> duplicated between the 2 commands... As far as I can tell, the only
> reason to make the operation 2 commands is to allow a separate PRP/SGL
> for each command. I guess this avoids the memcpy that would be
> required if a single concatenated data buffer was used? But at what
> cost???
> 
> On Wed, Sep 11, 2024 at 11:45 PM Dmitry Bogdanov <d.bogdanov@yadro.com> wrote:
> >
> > Allow a user to pass fused flags for the commands.
> >
> > Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-09-13 11:21 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-12  6:42 [PATCH 0/7] nvmet: support of Fused operations Dmitry Bogdanov
2024-09-12  6:42 ` [PATCH 1/7] nvmet: add support of Compare command Dmitry Bogdanov
2024-09-12  6:42 ` [PATCH 2/7] nvmet: bdev: " Dmitry Bogdanov
2024-09-12  6:42 ` [PATCH 3/7] nvmet: file: " Dmitry Bogdanov
2024-09-12  6:42 ` [PATCH 4/7] Revert "nvmet: Open code nvmet_req_execute()" Dmitry Bogdanov
2024-09-12  6:42 ` [PATCH 5/7] nvmet: add support of fused operations Dmitry Bogdanov
2024-09-12  6:42 ` [PATCH 6/7] nvmet: make fused commands be atomic Dmitry Bogdanov
2024-09-12  6:42 ` [PATCH 7/7] nvme: allow send fused commands Dmitry Bogdanov
2024-09-12 16:36   ` Caleb Sander
2024-09-13 11:20     ` Dmitry Bogdanov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).