From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
qemu-block@nongnu.org, Klaus Jensen <k.jensen@samsung.com>,
Gollu Appalanaidu <anaidu.gollu@samsung.com>,
Max Reitz <mreitz@redhat.com>, Klaus Jensen <its@irrelevant.dk>,
Keith Busch <kbusch@kernel.org>
Subject: [PATCH 2/2] hw/block/nvme: add write uncorrectable command
Date: Wed, 10 Feb 2021 08:06:46 +0100 [thread overview]
Message-ID: <20210210070646.730110-3-its@irrelevant.dk> (raw)
In-Reply-To: <20210210070646.730110-1-its@irrelevant.dk>
From: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Add support for marking blocks invalid with the Write Uncorrectable
command. Block status is tracked in a (non-persistent) bitmap that is
checked on all reads and written to on all writes. This is potentially
expensive, so keep Write Uncorrectable disabled by default.
Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
docs/specs/nvme.txt | 3 ++
hw/block/nvme-ns.h | 2 ++
hw/block/nvme.h | 1 +
hw/block/nvme-ns.c | 2 ++
hw/block/nvme.c | 65 +++++++++++++++++++++++++++++++++++++------
hw/block/trace-events | 1 +
6 files changed, 66 insertions(+), 8 deletions(-)
diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
index 56d393884e7a..88f9cc278d4c 100644
--- a/docs/specs/nvme.txt
+++ b/docs/specs/nvme.txt
@@ -19,5 +19,8 @@ Known issues
* The accounting numbers in the SMART/Health are reset across power cycles
+* Marking blocks invalid with the Write Uncorrectable is not persisted across
+ power cycles.
+
* Interrupt Coalescing is not supported and is disabled by default in volation
of the specification.
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 7af6884862b5..15fa422ded03 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -72,6 +72,8 @@ typedef struct NvmeNamespace {
struct {
uint32_t err_rec;
} features;
+
+ unsigned long *uncorrectable;
} NvmeNamespace;
static inline uint32_t nvme_nsid(NvmeNamespace *ns)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 98082b2dfba3..9b8f85b9cf16 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -68,6 +68,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
+ case NVME_CMD_WRITE_UNCOR: return "NVME_CMD_WRITE_UNCOR";
case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index ade46e2f3739..742bbc4b4b62 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -72,6 +72,8 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->mcl = cpu_to_le32(ns->params.mcl);
id_ns->msrc = ns->params.msrc;
+ ns->uncorrectable = bitmap_new(id_ns->nsze);
+
return 0;
}
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index e5f6666725d7..56048046c193 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1112,6 +1112,20 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
return NVME_SUCCESS;
}
+static inline uint16_t nvme_check_uncor(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb)
+{
+ uint64_t elba = nlb + slba;
+
+ if (ns->uncorrectable) {
+ if (find_next_bit(ns->uncorrectable, elba, slba) < elba) {
+ return NVME_UNRECOVERED_READ | NVME_DNR;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
static void nvme_aio_err(NvmeRequest *req, int ret)
{
uint16_t status = NVME_SUCCESS;
@@ -1423,14 +1437,24 @@ static void nvme_rw_cb(void *opaque, int ret)
BlockAcctCookie *acct = &req->acct;
BlockAcctStats *stats = blk_get_stats(blk);
+ bool is_write = nvme_is_write(req);
+
trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
- if (ns->params.zoned && nvme_is_write(req)) {
+ if (ns->params.zoned && is_write) {
nvme_finalize_zoned_write(ns, req);
}
if (!ret) {
block_acct_done(stats, acct);
+
+ if (is_write) {
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+
+ bitmap_clear(ns->uncorrectable, slba, nlb);
+ }
} else {
block_acct_failed(stats, acct);
nvme_aio_err(req, ret);
@@ -1521,13 +1545,13 @@ static void nvme_copy_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
NvmeNamespace *ns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ uint64_t sdlba = le64_to_cpu(copy->sdlba);
struct nvme_copy_ctx *ctx = req->opaque;
trace_pci_nvme_copy_cb(nvme_cid(req));
if (ns->params.zoned) {
- NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
- uint64_t sdlba = le64_to_cpu(copy->sdlba);
NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
__nvme_advance_zone_wp(ns, zone, ctx->nlb);
@@ -1535,6 +1559,7 @@ static void nvme_copy_cb(void *opaque, int ret)
if (!ret) {
block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+ bitmap_clear(ns->uncorrectable, sdlba, ctx->nlb);
} else {
block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
nvme_aio_err(req, ret);
@@ -1953,6 +1978,12 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
+ status = nvme_check_uncor(ns, slba, nlb);
+ if (status) {
+ trace_pci_nvme_err_unrecoverable_read(slba, nlb);
+ return status;
+ }
+
if (ns->params.zoned) {
status = nvme_check_zone_read(ns, slba, nlb);
if (status) {
@@ -1992,7 +2023,7 @@ invalid:
}
static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
- bool wrz)
+ bool wrz, bool uncor)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
@@ -2008,7 +2039,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
nvme_nsid(ns), nlb, data_size, slba);
- if (!wrz) {
+ if (!wrz && !uncor) {
status = nvme_check_mdts(n, data_size);
if (status) {
trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
@@ -2055,6 +2086,11 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
zone->w_ptr += nlb;
}
+ if (uncor) {
+ bitmap_set(ns->uncorrectable, slba, nlb);
+ return NVME_SUCCESS;
+ }
+
data_offset = nvme_l2b(ns, slba);
if (!wrz) {
@@ -2087,17 +2123,22 @@ invalid:
static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
{
- return nvme_do_write(n, req, false, false);
+ return nvme_do_write(n, req, false, false, false);
}
static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
{
- return nvme_do_write(n, req, false, true);
+ return nvme_do_write(n, req, false, true, false);
}
static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
{
- return nvme_do_write(n, req, true, false);
+ return nvme_do_write(n, req, true, false, false);
+}
+
+static inline uint16_t nvme_write_uncor(NvmeCtrl *n, NvmeRequest *req)
+{
+ return nvme_do_write(n, req, false, false, true);
}
static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
@@ -2596,6 +2637,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_flush(n, req);
case NVME_CMD_WRITE_ZEROES:
return nvme_write_zeroes(n, req);
+ case NVME_CMD_WRITE_UNCOR:
+ return nvme_write_uncor(n, req);
case NVME_CMD_ZONE_APPEND:
return nvme_zone_append(n, req);
case NVME_CMD_WRITE:
@@ -4514,6 +4557,11 @@ static void nvme_init_cse_iocs(NvmeCtrl *n)
n->iocs.nvm[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC;
n->iocs.nvm[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP;
+ if (oncs & NVME_ONCS_WRITE_UNCORR) {
+ n->iocs.nvm[NVME_CMD_WRITE_UNCOR] = NVME_CMD_EFF_CSUPP |
+ NVME_CMD_EFF_LBCC;
+ }
+
if (oncs & NVME_ONCS_WRITE_ZEROES) {
n->iocs.nvm[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP |
NVME_CMD_EFF_LBCC;
@@ -4853,6 +4901,7 @@ static void nvme_exit(PCIDevice *pci_dev)
}
nvme_ns_cleanup(ns);
+ g_free(ns->uncorrectable);
}
g_free(n->cq);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 4b5ee04024f4..f30ef220c26a 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -128,6 +128,7 @@ pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PR
pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
+pci_nvme_err_unrecoverable_read(uint64_t start, uint32_t nlb) "islba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
--
2.30.0
next prev parent reply other threads:[~2021-02-10 7:12 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-10 7:06 [PATCH 0/2] hw/block/nvme: oncs and write uncorrectable support Klaus Jensen
2021-02-10 7:06 ` [PATCH 1/2] hw/block/nvme: add oncs device parameter Klaus Jensen
2021-02-10 11:06 ` Minwoo Im
2021-02-10 7:06 ` Klaus Jensen [this message]
2021-02-10 11:14 ` [PATCH 2/2] hw/block/nvme: add write uncorrectable command Minwoo Im
2021-02-10 11:42 ` Klaus Jensen
2021-02-10 15:28 ` Minwoo Im
2021-02-11 3:37 ` Keith Busch
2021-02-11 8:43 ` Klaus Jensen
2021-02-11 15:37 ` Keith Busch
2021-02-11 17:54 ` Klaus Jensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210210070646.730110-3-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=anaidu.gollu@samsung.com \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).