From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org, Peter Maydell <peter.maydell@linaro.org>
Cc: Fam Zheng <fam@euphon.net>, Kevin Wolf <kwolf@redhat.com>,
qemu-block@nongnu.org, Klaus Jensen <k.jensen@samsung.com>,
Gollu Appalanaidu <anaidu.gollu@samsung.com>,
Max Reitz <mreitz@redhat.com>, Keith Busch <kbusch@kernel.org>,
Stefan Hajnoczi <stefanha@redhat.com>,
Klaus Jensen <its@irrelevant.dk>
Subject: [PULL v2 16/38] hw/block/nvme: add broadcast nsid support flush command
Date: Tue, 9 Mar 2021 12:44:50 +0100 [thread overview]
Message-ID: <20210309114512.536489-17-its@irrelevant.dk> (raw)
In-Reply-To: <20210309114512.536489-1-its@irrelevant.dk>
From: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Add support for using the broadcast nsid to issue a flush on all
namespaces through a single command.
Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
include/block/nvme.h | 8 +++
hw/block/nvme.c | 124 +++++++++++++++++++++++++++++++++++++++---
hw/block/trace-events | 2 +
3 files changed, 127 insertions(+), 7 deletions(-)
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 9f8eb3988c0e..b23f3ae2279f 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1062,6 +1062,14 @@ enum NvmeIdCtrlOcfs {
NVME_OCFS_COPY_FORMAT_0 = 1 << 0,
};
+enum NvmeIdctrlVwc {
+ NVME_VWC_PRESENT = 1 << 0,
+ NVME_VWC_NSID_BROADCAST_NO_SUPPORT = 0 << 1,
+ NVME_VWC_NSID_BROADCAST_RESERVED = 1 << 1,
+ NVME_VWC_NSID_BROADCAST_CTRL_SPEC = 2 << 1,
+ NVME_VWC_NSID_BROADCAST_SUPPORT = 3 << 1,
+};
+
enum NvmeIdCtrlFrmw {
NVME_FRMW_SLOT1_RO = 1 << 0,
};
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index a54ef34ce5e7..db1a3aabd8e8 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1466,6 +1466,41 @@ static void nvme_rw_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+struct nvme_aio_flush_ctx {
+ NvmeRequest *req;
+ NvmeNamespace *ns;
+ BlockAcctCookie acct;
+};
+
+static void nvme_aio_flush_cb(void *opaque, int ret)
+{
+ struct nvme_aio_flush_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
+
+ BlockBackend *blk = ctx->ns->blkconf.blk;
+ BlockAcctCookie *acct = &ctx->acct;
+ BlockAcctStats *stats = blk_get_stats(blk);
+
+ trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk));
+
+ if (!ret) {
+ block_acct_done(stats, acct);
+ } else {
+ block_acct_failed(stats, acct);
+ nvme_aio_err(req, ret);
+ }
+
+ (*num_flushes)--;
+ g_free(ctx);
+
+ if (*num_flushes) {
+ return;
+ }
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
static void nvme_aio_discard_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -1949,10 +1984,56 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
{
- block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
- BLOCK_ACCT_FLUSH);
- req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req);
- return NVME_NO_COMPLETE;
+ uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
+ uint16_t status;
+ struct nvme_aio_flush_ctx *ctx;
+ NvmeNamespace *ns;
+
+ trace_pci_nvme_flush(nvme_cid(req), nsid);
+
+ if (nsid != NVME_NSID_BROADCAST) {
+ req->ns = nvme_ns(n, nsid);
+ if (unlikely(!req->ns)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
+ BLOCK_ACCT_FLUSH);
+ req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req);
+ return NVME_NO_COMPLETE;
+ }
+
+ /* 1-initialize; see comment in nvme_dsm */
+ *num_flushes = 1;
+
+ for (int i = 1; i <= n->num_namespaces; i++) {
+ ns = nvme_ns(n, i);
+ if (!ns) {
+ continue;
+ }
+
+ ctx = g_new(struct nvme_aio_flush_ctx, 1);
+ ctx->req = req;
+ ctx->ns = ns;
+
+ (*num_flushes)++;
+
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0,
+ BLOCK_ACCT_FLUSH);
+ blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx);
+ }
+
+ /* account for the 1-initialization */
+ (*num_flushes)--;
+
+ if (*num_flushes) {
+ status = NVME_NO_COMPLETE;
+ } else {
+ status = req->status;
+ }
+
+ return status;
}
static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
@@ -2608,6 +2689,29 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return NVME_INVALID_NSID | NVME_DNR;
}
+ /*
+ * In the base NVM command set, Flush may apply to all namespaces
+ * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used
+ * along with TP 4056 (Namespace Types), it may be pretty screwed up.
+ *
+ * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the
+ * opcode with a specific command since we cannot determine a unique I/O
+ * command set. Opcode 0x0 could have any other meaning than something
+ * equivalent to flushing and say it DOES have completely different
+ * semantics in some other command set - does an NSID of 0xFFFFFFFF then
+ * mean "for all namespaces, apply whatever command set specific command
+ * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply
+ * whatever command that uses the 0x0 opcode if, and only if, it allows
+ * NSID to be 0xFFFFFFFF"?
+ *
+ * Anyway (and luckily), for now, we do not care about this since the
+ * device only supports namespace types that includes the NVM Flush command
+ * (NVM and Zoned), so always do an NVM Flush.
+ */
+ if (req->cmd.opcode == NVME_CMD_FLUSH) {
+ return nvme_flush(n, req);
+ }
+
req->ns = nvme_ns(n, nsid);
if (unlikely(!req->ns)) {
return NVME_INVALID_FIELD | NVME_DNR;
@@ -2619,8 +2723,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
}
switch (req->cmd.opcode) {
- case NVME_CMD_FLUSH:
- return nvme_flush(n, req);
case NVME_CMD_WRITE_ZEROES:
return nvme_write_zeroes(n, req);
case NVME_CMD_ZONE_APPEND:
@@ -4750,7 +4852,15 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
NVME_ONCS_FEATURES | NVME_ONCS_DSM |
NVME_ONCS_COMPARE | NVME_ONCS_COPY);
- id->vwc = (0x2 << 1) | 0x1;
+ /*
+ * NOTE: If this device ever supports a command set that does NOT use 0x0
+ * as a Flush-equivalent operation, support for the broadcast NSID in Flush
+ * should probably be removed.
+ *
+ * See comment in nvme_io_cmd.
+ */
+ id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
+
id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 4b5ee04024f4..b04f7a3e1890 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -40,6 +40,7 @@ pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2,
pci_nvme_map_sgl(uint16_t cid, uint8_t typ, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" len %"PRIu64""
pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
+pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
@@ -55,6 +56,7 @@ pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
+pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
--
2.30.1
next prev parent reply other threads:[~2021-03-09 12:02 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-09 11:44 [PULL v2 00/38] emulated nvme device updates Klaus Jensen
2021-03-09 11:44 ` [PULL v2 01/38] hw/block/nvme: introduce nvme-subsys device Klaus Jensen
2021-03-09 11:44 ` [PULL v2 02/38] hw/block/nvme: support to map controller to a subsystem Klaus Jensen
2021-03-09 11:44 ` [PULL v2 03/38] hw/block/nvme: add CMIC enum value for Identify Controller Klaus Jensen
2021-03-09 11:44 ` [PULL v2 04/38] hw/block/nvme: support for multi-controller in subsystem Klaus Jensen
2021-03-09 11:44 ` [PULL v2 05/38] hw/block/nvme: add NMIC enum value for Identify Namespace Klaus Jensen
2021-03-09 11:44 ` [PULL v2 06/38] hw/block/nvme: support for shared namespace in subsystem Klaus Jensen
2021-03-09 11:44 ` [PULL v2 07/38] hw/block/nvme: remove unused parameter in check zone write Klaus Jensen
2021-03-09 11:44 ` [PULL v2 08/38] hw/block/nvme: refactor zone resource management Klaus Jensen
2021-03-09 11:44 ` [PULL v2 09/38] hw/block/nvme: pull write pointer advancement to separate function Klaus Jensen
2021-03-09 11:44 ` [PULL v2 10/38] nvme: updated shared header for copy command Klaus Jensen
2021-03-09 11:44 ` [PULL v2 11/38] hw/block/nvme: add simple " Klaus Jensen
2021-03-09 11:44 ` [PULL v2 12/38] hw/block/nvme: fix Close Zone Klaus Jensen
2021-03-09 11:44 ` [PULL v2 13/38] hw/block/nvme: add missing mor/mar constraint checks Klaus Jensen
2021-03-09 11:44 ` [PULL v2 14/38] hw/block/nvme: improve invalid zasl value reporting Klaus Jensen
2021-03-09 11:44 ` [PULL v2 15/38] hw/block/nvme: use locally assigned QEMU IEEE OUI Klaus Jensen
2021-03-09 11:44 ` Klaus Jensen [this message]
2021-03-09 11:44 ` [PULL v2 17/38] hw/block/nvme: document 'mdts' nvme device parameter Klaus Jensen
2021-03-09 11:44 ` [PULL v2 18/38] hw/block/nvme: deduplicate bad mdts trace event Klaus Jensen
2021-03-09 11:44 ` [PULL v2 19/38] hw/block/nvme: align zoned.zasl with mdts Klaus Jensen
2021-03-12 13:07 ` Peter Maydell
2021-03-12 15:11 ` Klaus Jensen
2021-03-09 11:44 ` [PULL v2 20/38] hw/block/nvme: remove unnecessary endian conversion Klaus Jensen
2021-03-09 11:44 ` [PULL v2 21/38] hw/block/nvme: add identify trace event Klaus Jensen
2021-03-09 11:44 ` [PULL v2 22/38] hw/block/nvme: fix potential compilation error Klaus Jensen
2021-03-09 11:44 ` [PULL v2 23/38] hw/block/nvme: add trace event for zone read check Klaus Jensen
2021-03-09 11:44 ` [PULL v2 24/38] hw/block/nvme: report non-mdts command size limit for dsm Klaus Jensen
2021-03-09 11:44 ` [PULL v2 25/38] hw/block/nvme: remove redundant len member in compare context Klaus Jensen
2021-03-09 11:45 ` [PULL v2 26/38] hw/block/nvme: remove block accounting for write zeroes Klaus Jensen
2021-03-09 11:45 ` [PULL v2 27/38] hw/block/nvme: fix strerror printing Klaus Jensen
2021-03-09 11:45 ` [PULL v2 28/38] hw/block/nvme: try to deal with the iov/qsg duality Klaus Jensen
2021-03-09 11:45 ` [PULL v2 29/38] hw/block/nvme: remove the req dependency in map functions Klaus Jensen
2021-03-09 11:45 ` [PULL v2 30/38] hw/block/nvme: refactor nvme_dma Klaus Jensen
2021-03-09 11:45 ` [PULL v2 31/38] hw/block/nvme: support namespace detach Klaus Jensen
2021-03-09 11:45 ` [PULL v2 32/38] hw/block/nvme: fix namespaces array to 1-based Klaus Jensen
2021-03-09 11:45 ` [PULL v2 33/38] hw/block/nvme: fix allocated namespace list to 256 Klaus Jensen
2021-03-09 11:45 ` [PULL v2 34/38] hw/block/nvme: support allocated namespace type Klaus Jensen
2021-03-09 11:45 ` [PULL v2 35/38] hw/block/nvme: refactor nvme_select_ns_iocs Klaus Jensen
2021-03-09 11:45 ` [PULL v2 36/38] hw/block/nvme: support namespace attachment command Klaus Jensen
2021-03-12 13:12 ` Peter Maydell
2021-03-12 15:10 ` Klaus Jensen
2021-03-09 11:45 ` [PULL v2 37/38] hw/block/nvme: support changed namespace asynchronous event Klaus Jensen
2021-03-09 11:45 ` [PULL v2 38/38] hw/block/nvme: support Identify NS Attached Controller List Klaus Jensen
2021-03-11 9:51 ` [PULL v2 00/38] emulated nvme device updates Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210309114512.536489-17-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=anaidu.gollu@samsung.com \
--cc=fam@euphon.net \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).