From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
qemu-block@nongnu.org, Klaus Jensen <k.jensen@samsung.com>,
Gollu Appalanaidu <anaidu.gollu@samsung.com>,
Max Reitz <mreitz@redhat.com>, Klaus Jensen <its@irrelevant.dk>,
Keith Busch <kbusch@kernel.org>
Subject: [PATCH 2/2] hw/block/nvme: add write uncorrectable command
Date: Wed, 10 Feb 2021 08:06:46 +0100 [thread overview]
Message-ID: <20210210070646.730110-3-its@irrelevant.dk> (raw)
In-Reply-To: <20210210070646.730110-1-its@irrelevant.dk>
From: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Add support for marking blocks invalid with the Write Uncorrectable
command. Block status is tracked in a (non-persistent) bitmap that is
checked on all reads and written to on all writes. This is potentially
expensive, so keep Write Uncorrectable disabled by default.
Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
docs/specs/nvme.txt | 3 ++
hw/block/nvme-ns.h | 2 ++
hw/block/nvme.h | 1 +
hw/block/nvme-ns.c | 2 ++
hw/block/nvme.c | 65 +++++++++++++++++++++++++++++++++++++------
hw/block/trace-events | 1 +
6 files changed, 66 insertions(+), 8 deletions(-)
diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
index 56d393884e7a..88f9cc278d4c 100644
--- a/docs/specs/nvme.txt
+++ b/docs/specs/nvme.txt
@@ -19,5 +19,8 @@ Known issues
* The accounting numbers in the SMART/Health are reset across power cycles
+* Marking blocks invalid with the Write Uncorrectable is not persisted across
+ power cycles.
+
* Interrupt Coalescing is not supported and is disabled by default in volation
of the specification.
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 7af6884862b5..15fa422ded03 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -72,6 +72,8 @@ typedef struct NvmeNamespace {
struct {
uint32_t err_rec;
} features;
+
+ unsigned long *uncorrectable;
} NvmeNamespace;
static inline uint32_t nvme_nsid(NvmeNamespace *ns)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 98082b2dfba3..9b8f85b9cf16 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -68,6 +68,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
+ case NVME_CMD_WRITE_UNCOR: return "NVME_CMD_WRITE_UNCOR";
case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index ade46e2f3739..742bbc4b4b62 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -72,6 +72,8 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->mcl = cpu_to_le32(ns->params.mcl);
id_ns->msrc = ns->params.msrc;
+ ns->uncorrectable = bitmap_new(id_ns->nsze);
+
return 0;
}
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index e5f6666725d7..56048046c193 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1112,6 +1112,20 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
return NVME_SUCCESS;
}
+static inline uint16_t nvme_check_uncor(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb)
+{
+ uint64_t elba = nlb + slba;
+
+ if (ns->uncorrectable) {
+ if (find_next_bit(ns->uncorrectable, elba, slba) < elba) {
+ return NVME_UNRECOVERED_READ | NVME_DNR;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
static void nvme_aio_err(NvmeRequest *req, int ret)
{
uint16_t status = NVME_SUCCESS;
@@ -1423,14 +1437,24 @@ static void nvme_rw_cb(void *opaque, int ret)
BlockAcctCookie *acct = &req->acct;
BlockAcctStats *stats = blk_get_stats(blk);
+ bool is_write = nvme_is_write(req);
+
trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
- if (ns->params.zoned && nvme_is_write(req)) {
+ if (ns->params.zoned && is_write) {
nvme_finalize_zoned_write(ns, req);
}
if (!ret) {
block_acct_done(stats, acct);
+
+ if (is_write) {
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+
+ bitmap_clear(ns->uncorrectable, slba, nlb);
+ }
} else {
block_acct_failed(stats, acct);
nvme_aio_err(req, ret);
@@ -1521,13 +1545,13 @@ static void nvme_copy_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
NvmeNamespace *ns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ uint64_t sdlba = le64_to_cpu(copy->sdlba);
struct nvme_copy_ctx *ctx = req->opaque;
trace_pci_nvme_copy_cb(nvme_cid(req));
if (ns->params.zoned) {
- NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
- uint64_t sdlba = le64_to_cpu(copy->sdlba);
NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
__nvme_advance_zone_wp(ns, zone, ctx->nlb);
@@ -1535,6 +1559,7 @@ static void nvme_copy_cb(void *opaque, int ret)
if (!ret) {
block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+ bitmap_clear(ns->uncorrectable, sdlba, ctx->nlb);
} else {
block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
nvme_aio_err(req, ret);
@@ -1953,6 +1978,12 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
+ status = nvme_check_uncor(ns, slba, nlb);
+ if (status) {
+ trace_pci_nvme_err_unrecoverable_read(slba, nlb);
+ return status;
+ }
+
if (ns->params.zoned) {
status = nvme_check_zone_read(ns, slba, nlb);
if (status) {
@@ -1992,7 +2023,7 @@ invalid:
}
static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
- bool wrz)
+ bool wrz, bool uncor)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
@@ -2008,7 +2039,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
nvme_nsid(ns), nlb, data_size, slba);
- if (!wrz) {
+ if (!wrz && !uncor) {
status = nvme_check_mdts(n, data_size);
if (status) {
trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
@@ -2055,6 +2086,11 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
zone->w_ptr += nlb;
}
+ if (uncor) {
+ bitmap_set(ns->uncorrectable, slba, nlb);
+ return NVME_SUCCESS;
+ }
+
data_offset = nvme_l2b(ns, slba);
if (!wrz) {
@@ -2087,17 +2123,22 @@ invalid:
static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
{
- return nvme_do_write(n, req, false, false);
+ return nvme_do_write(n, req, false, false, false);
}
static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
{
- return nvme_do_write(n, req, false, true);
+ return nvme_do_write(n, req, false, true, false);
}
static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
{
- return nvme_do_write(n, req, true, false);
+ return nvme_do_write(n, req, true, false, false);
+}
+
+static inline uint16_t nvme_write_uncor(NvmeCtrl *n, NvmeRequest *req)
+{
+ return nvme_do_write(n, req, false, false, true);
}
static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
@@ -2596,6 +2637,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_flush(n, req);
case NVME_CMD_WRITE_ZEROES:
return nvme_write_zeroes(n, req);
+ case NVME_CMD_WRITE_UNCOR:
+ return nvme_write_uncor(n, req);
case NVME_CMD_ZONE_APPEND:
return nvme_zone_append(n, req);
case NVME_CMD_WRITE:
@@ -4514,6 +4557,11 @@ static void nvme_init_cse_iocs(NvmeCtrl *n)
n->iocs.nvm[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC;
n->iocs.nvm[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP;
+ if (oncs & NVME_ONCS_WRITE_UNCORR) {
+ n->iocs.nvm[NVME_CMD_WRITE_UNCOR] = NVME_CMD_EFF_CSUPP |
+ NVME_CMD_EFF_LBCC;
+ }
+
if (oncs & NVME_ONCS_WRITE_ZEROES) {
n->iocs.nvm[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP |
NVME_CMD_EFF_LBCC;
@@ -4853,6 +4901,7 @@ static void nvme_exit(PCIDevice *pci_dev)
}
nvme_ns_cleanup(ns);
+ g_free(ns->uncorrectable);
}
g_free(n->cq);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 4b5ee04024f4..f30ef220c26a 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -128,6 +128,7 @@ pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PR
pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
+pci_nvme_err_unrecoverable_read(uint64_t start, uint32_t nlb) "islba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
--
2.30.0
next prev parent reply other threads:[~2021-02-10 7:12 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-10 7:06 [PATCH 0/2] hw/block/nvme: oncs and write uncorrectable support Klaus Jensen
2021-02-10 7:06 ` [PATCH 1/2] hw/block/nvme: add oncs device parameter Klaus Jensen
2021-02-10 11:06 ` Minwoo Im
2021-02-10 7:06 ` Klaus Jensen [this message]
2021-02-10 11:14 ` [PATCH 2/2] hw/block/nvme: add write uncorrectable command Minwoo Im
2021-02-10 11:42 ` Klaus Jensen
2021-02-10 15:28 ` Minwoo Im
2021-02-11 3:37 ` Keith Busch
2021-02-11 8:43 ` Klaus Jensen
2021-02-11 15:37 ` Keith Busch
2021-02-11 17:54 ` Klaus Jensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210210070646.730110-3-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=anaidu.gollu@samsung.com \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.