From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
qemu-block@nongnu.org, Dmitry Fomichev <dmitry.fomichev@wdc.com>,
Klaus Jensen <k.jensen@samsung.com>,
Max Reitz <mreitz@redhat.com>, Klaus Jensen <its@irrelevant.dk>,
Keith Busch <kbusch@kernel.org>
Subject: [PATCH 4/6] hw/block/nvme: zero out zones on reset
Date: Mon, 11 Jan 2021 13:32:21 +0100 [thread overview]
Message-ID: <20210111123223.76248-5-its@irrelevant.dk> (raw)
In-Reply-To: <20210111123223.76248-1-its@irrelevant.dk>
From: Klaus Jensen <k.jensen@samsung.com>
The zoned command set specification states that "All logical blocks in a
zone *shall* be marked as deallocated when [the zone is reset]". Since
the device guarantees 0x00 to be read from deallocated blocks we have to
issue a pwrite_zeroes since we cannot be sure that a discard will do
anything. But typically, this will be achieved with an efficient
unmap/discard operation.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/block/nvme.c | 150 +++++++++++++++++++++++++++++++-----------
hw/block/trace-events | 1 +
2 files changed, 113 insertions(+), 38 deletions(-)
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 7c2ec17ad7d9..b3658595fe1b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1371,6 +1371,53 @@ static void nvme_aio_discard_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+struct nvme_zone_reset_ctx {
+ NvmeRequest *req;
+ NvmeZone *zone;
+};
+
+static void nvme_aio_zone_reset_cb(void *opaque, int ret)
+{
+ struct nvme_zone_reset_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeZone *zone = ctx->zone;
+ uintptr_t *resets = (uintptr_t *)&req->opaque;
+
+ g_free(ctx);
+
+ trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba);
+
+ if (!ret) {
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
+ case NVME_ZONE_STATE_FULL:
+ zone->w_ptr = zone->d.zslba;
+ zone->d.wp = zone->w_ptr;
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
+ /* fall through */
+ default:
+ break;
+ }
+ } else {
+ nvme_aio_err(req, ret);
+ }
+
+ (*resets)--;
+
+ if (*resets) {
+ return;
+ }
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
struct nvme_compare_ctx {
QEMUIOVector iov;
uint8_t *bounce;
@@ -1735,7 +1782,8 @@ static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
return NVME_SUCCESS;
}
-typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState);
+typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState,
+ NvmeRequest *);
enum NvmeZoneProcessingMask {
NVME_PROC_CURRENT_ZONE = 0,
@@ -1746,7 +1794,7 @@ enum NvmeZoneProcessingMask {
};
static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState state)
+ NvmeZoneState state, NvmeRequest *req)
{
uint16_t status;
@@ -1779,7 +1827,7 @@ static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
}
static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState state)
+ NvmeZoneState state, NvmeRequest *req)
{
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
@@ -1795,7 +1843,7 @@ static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
}
static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState state)
+ NvmeZoneState state, NvmeRequest *req)
{
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
@@ -1818,30 +1866,42 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
}
static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState state)
+ NvmeZoneState state, NvmeRequest *req)
{
+ uintptr_t *resets = (uintptr_t *)&req->opaque;
+ struct nvme_zone_reset_ctx *ctx;
+
switch (state) {
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- nvme_aor_dec_open(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- nvme_aor_dec_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_FULL:
- zone->w_ptr = zone->d.zslba;
- zone->d.wp = zone->w_ptr;
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
- /* fall through */
case NVME_ZONE_STATE_EMPTY:
return NVME_SUCCESS;
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_FULL:
+ break;
default:
return NVME_ZONE_INVAL_TRANSITION;
}
+
+ /*
+ * The zone reset aio callback needs to know the zone that is being reset
+ * in order to transition the zone on completion.
+ */
+ ctx = g_new(struct nvme_zone_reset_ctx, 1);
+ ctx->req = req;
+ ctx->zone = zone;
+
+ (*resets)++;
+
+ blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zone->d.zslba),
+ nvme_l2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
+ nvme_aio_zone_reset_cb, ctx);
+
+ return NVME_NO_COMPLETE;
}
static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState state)
+ NvmeZoneState state, NvmeRequest *req)
{
switch (state) {
case NVME_ZONE_STATE_READ_ONLY:
@@ -1875,7 +1935,7 @@ static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone)
static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
enum NvmeZoneProcessingMask proc_mask,
- op_handler_t op_hndlr)
+ op_handler_t op_hndlr, NvmeRequest *req)
{
uint16_t status = NVME_SUCCESS;
NvmeZoneState zs = nvme_get_zone_state(zone);
@@ -1900,7 +1960,7 @@ static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
}
if (proc_zone) {
- status = op_hndlr(ns, zone, zs);
+ status = op_hndlr(ns, zone, zs, req);
}
return status;
@@ -1908,42 +1968,46 @@ static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
enum NvmeZoneProcessingMask proc_mask,
- op_handler_t op_hndlr)
+ op_handler_t op_hndlr, NvmeRequest *req)
{
NvmeZone *next;
uint16_t status = NVME_SUCCESS;
int i;
if (!proc_mask) {
- status = op_hndlr(ns, zone, nvme_get_zone_state(zone));
+ status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req);
} else {
if (proc_mask & NVME_PROC_CLOSED_ZONES) {
QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
- status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr);
- if (status != NVME_SUCCESS) {
+ status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+ req);
+ if (status && status != NVME_NO_COMPLETE) {
goto out;
}
}
}
if (proc_mask & NVME_PROC_OPENED_ZONES) {
QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
- status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr);
- if (status != NVME_SUCCESS) {
+ status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+ req);
+ if (status && status != NVME_NO_COMPLETE) {
goto out;
}
}
QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
- status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr);
- if (status != NVME_SUCCESS) {
+ status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+ req);
+ if (status && status != NVME_NO_COMPLETE) {
goto out;
}
}
}
if (proc_mask & NVME_PROC_FULL_ZONES) {
QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) {
- status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr);
- if (status != NVME_SUCCESS) {
+ status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+ req);
+ if (status && status != NVME_NO_COMPLETE) {
goto out;
}
}
@@ -1951,8 +2015,9 @@ static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
if (proc_mask & NVME_PROC_READ_ONLY_ZONES) {
for (i = 0; i < ns->num_zones; i++, zone++) {
- status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr);
- if (status != NVME_SUCCESS) {
+ status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+ req);
+ if (status && status != NVME_NO_COMPLETE) {
goto out;
}
}
@@ -1968,6 +2033,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
NvmeZone *zone;
+ uintptr_t *resets;
uint8_t *zd_ext;
uint32_t dw13 = le32_to_cpu(cmd->cdw13);
uint64_t slba = 0;
@@ -2002,7 +2068,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
proc_mask = NVME_PROC_CLOSED_ZONES;
}
trace_pci_nvme_open_zone(slba, zone_idx, all);
- status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone);
+ status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req);
break;
case NVME_ZONE_ACTION_CLOSE:
@@ -2010,7 +2076,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
proc_mask = NVME_PROC_OPENED_ZONES;
}
trace_pci_nvme_close_zone(slba, zone_idx, all);
- status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone);
+ status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req);
break;
case NVME_ZONE_ACTION_FINISH:
@@ -2018,24 +2084,32 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES;
}
trace_pci_nvme_finish_zone(slba, zone_idx, all);
- status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone);
+ status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req);
break;
case NVME_ZONE_ACTION_RESET:
+ resets = (uintptr_t *)&req->opaque;
+
if (all) {
proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES |
NVME_PROC_FULL_ZONES;
}
trace_pci_nvme_reset_zone(slba, zone_idx, all);
- status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone);
- break;
+
+ *resets = 1;
+
+ status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone, req);
+
+ (*resets)--;
+
+ return *resets ? NVME_NO_COMPLETE : req->status;
case NVME_ZONE_ACTION_OFFLINE:
if (all) {
proc_mask = NVME_PROC_READ_ONLY_ZONES;
}
trace_pci_nvme_offline_zone(slba, zone_idx, all);
- status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone);
+ status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req);
break;
case NVME_ZONE_ACTION_SET_ZD_EXT:
diff --git a/hw/block/trace-events b/hw/block/trace-events
index deaacdae5097..78d76b0a71c1 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -49,6 +49,7 @@ pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb
pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
--
2.30.0
next prev parent reply other threads:[~2021-01-11 12:54 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-11 12:32 [PATCH 0/6] hw/block/nvme: zoned misc fixes Klaus Jensen
2021-01-11 12:32 ` [PATCH 1/6] hw/block/nvme: fix shutdown/reset logic Klaus Jensen
2021-01-14 23:49 ` Keith Busch
2021-01-11 12:32 ` [PATCH 2/6] hw/block/nvme: merge implicitly/explicitly opened processing masks Klaus Jensen
2021-01-11 12:32 ` [PATCH 3/6] hw/block/nvme: enum style fix Klaus Jensen
2021-01-11 12:32 ` Klaus Jensen [this message]
2021-01-18 3:35 ` [PATCH 4/6] hw/block/nvme: zero out zones on reset Dmitry Fomichev
2021-01-11 12:32 ` [PATCH 5/6] hw/block/nvme: add missing string representations for commands Klaus Jensen
2021-01-11 12:32 ` [PATCH 6/6] hw/block/nvme: remove unnecessary check for append Klaus Jensen
2021-01-18 3:35 ` [PATCH 0/6] hw/block/nvme: zoned misc fixes Dmitry Fomichev
2021-01-18 5:58 ` Klaus Jensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210111123223.76248-5-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=dmitry.fomichev@wdc.com \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).