From: Klaus Jensen <its@irrelevant.dk>
To: Peter Maydell <peter.maydell@linaro.org>, qemu-devel@nongnu.org
Cc: Fam Zheng <fam@euphon.net>, Kevin Wolf <kwolf@redhat.com>,
Eduardo Habkost <ehabkost@redhat.com>,
qemu-block@nongnu.org, Klaus Jensen <k.jensen@samsung.com>,
Max Reitz <mreitz@redhat.com>, Keith Busch <kbusch@kernel.org>,
Stefan Hajnoczi <stefanha@redhat.com>,
Klaus Jensen <its@irrelevant.dk>
Subject: [PULL 16/23] hw/nvme: reimplement zone reset to allow cancellation
Date: Tue, 29 Jun 2021 20:47:36 +0200 [thread overview]
Message-ID: <20210629184743.230173-17-its@irrelevant.dk> (raw)
In-Reply-To: <20210629184743.230173-1-its@irrelevant.dk>
From: Klaus Jensen <k.jensen@samsung.com>
Prior to this patch, the aios associated with zone reset are submitted
anonymously (no reference saved to the aiocb from the blk_aio call).
Fix this by resetting the zones one after another, saving a reference to
the aiocb for each reset.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
hw/nvme/ctrl.c | 288 +++++++++++++++++++++++++------------------
hw/nvme/trace-events | 2 +-
2 files changed, 169 insertions(+), 121 deletions(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index b0cc8c44d271..5b550ec1a1b4 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1691,6 +1691,29 @@ static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
}
}
+static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone)
+{
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fallthrough */
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fallthrough */
+ case NVME_ZONE_STATE_FULL:
+ zone->w_ptr = zone->d.zslba;
+ zone->d.wp = zone->w_ptr;
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
+ /* fallthrough */
+ case NVME_ZONE_STATE_EMPTY:
+ return NVME_SUCCESS;
+
+ default:
+ return NVME_ZONE_INVAL_TRANSITION;
+ }
+}
+
static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
{
NvmeZone *zone;
@@ -2020,79 +2043,6 @@ out:
nvme_verify_cb(ctx, ret);
}
-struct nvme_zone_reset_ctx {
- NvmeRequest *req;
- NvmeZone *zone;
-};
-
-static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret)
-{
- struct nvme_zone_reset_ctx *ctx = opaque;
- NvmeRequest *req = ctx->req;
- NvmeNamespace *ns = req->ns;
- NvmeZone *zone = ctx->zone;
- uintptr_t *resets = (uintptr_t *)&req->opaque;
-
- if (ret) {
- nvme_aio_err(req, ret);
- goto out;
- }
-
- switch (nvme_get_zone_state(zone)) {
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- nvme_aor_dec_open(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- nvme_aor_dec_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_FULL:
- zone->w_ptr = zone->d.zslba;
- zone->d.wp = zone->w_ptr;
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
- /* fall through */
- default:
- break;
- }
-
-out:
- g_free(ctx);
-
- (*resets)--;
-
- if (*resets) {
- return;
- }
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_aio_zone_reset_cb(void *opaque, int ret)
-{
- struct nvme_zone_reset_ctx *ctx = opaque;
- NvmeRequest *req = ctx->req;
- NvmeNamespace *ns = req->ns;
- NvmeZone *zone = ctx->zone;
-
- trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba);
-
- if (ret) {
- goto out;
- }
-
- if (ns->lbaf.ms) {
- int64_t offset = nvme_moff(ns, zone->d.zslba);
-
- blk_aio_pwrite_zeroes(ns->blkconf.blk, offset,
- nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
- nvme_aio_zone_reset_complete_cb, ctx);
- return;
- }
-
-out:
- nvme_aio_zone_reset_complete_cb(opaque, ret);
-}
-
struct nvme_compare_ctx {
struct {
QEMUIOVector iov;
@@ -3395,41 +3345,6 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
return nvme_zrm_finish(ns, zone);
}
-static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState state, NvmeRequest *req)
-{
- uintptr_t *resets = (uintptr_t *)&req->opaque;
- struct nvme_zone_reset_ctx *ctx;
-
- switch (state) {
- case NVME_ZONE_STATE_EMPTY:
- return NVME_SUCCESS;
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- case NVME_ZONE_STATE_CLOSED:
- case NVME_ZONE_STATE_FULL:
- break;
- default:
- return NVME_ZONE_INVAL_TRANSITION;
- }
-
- /*
- * The zone reset aio callback needs to know the zone that is being reset
- * in order to transition the zone on completion.
- */
- ctx = g_new(struct nvme_zone_reset_ctx, 1);
- ctx->req = req;
- ctx->zone = zone;
-
- (*resets)++;
-
- blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zone->d.zslba),
- nvme_l2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
- nvme_aio_zone_reset_cb, ctx);
-
- return NVME_NO_COMPLETE;
-}
-
static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
@@ -3558,12 +3473,144 @@ out:
return status;
}
+typedef struct NvmeZoneResetAIOCB {
+ BlockAIOCB common;
+ BlockAIOCB *aiocb;
+ NvmeRequest *req;
+ QEMUBH *bh;
+ int ret;
+
+ bool all;
+ int idx;
+ NvmeZone *zone;
+} NvmeZoneResetAIOCB;
+
+static void nvme_zone_reset_cancel(BlockAIOCB *aiocb)
+{
+ NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common);
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+
+ iocb->idx = ns->num_zones;
+
+ iocb->ret = -ECANCELED;
+
+ if (iocb->aiocb) {
+ blk_aio_cancel_async(iocb->aiocb);
+ iocb->aiocb = NULL;
+ }
+}
+
+static const AIOCBInfo nvme_zone_reset_aiocb_info = {
+ .aiocb_size = sizeof(NvmeZoneResetAIOCB),
+ .cancel_async = nvme_zone_reset_cancel,
+};
+
+static void nvme_zone_reset_bh(void *opaque)
+{
+ NvmeZoneResetAIOCB *iocb = opaque;
+
+ iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+ qemu_aio_unref(iocb);
+}
+
+static void nvme_zone_reset_cb(void *opaque, int ret);
+
+static void nvme_zone_reset_epilogue_cb(void *opaque, int ret)
+{
+ NvmeZoneResetAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+ int64_t moff;
+ int count;
+
+ if (ret < 0) {
+ nvme_zone_reset_cb(iocb, ret);
+ return;
+ }
+
+ if (!ns->lbaf.ms) {
+ nvme_zone_reset_cb(iocb, 0);
+ return;
+ }
+
+ moff = nvme_moff(ns, iocb->zone->d.zslba);
+ count = nvme_m2b(ns, ns->zone_size);
+
+ iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count,
+ BDRV_REQ_MAY_UNMAP,
+ nvme_zone_reset_cb, iocb);
+ return;
+}
+
+static void nvme_zone_reset_cb(void *opaque, int ret)
+{
+ NvmeZoneResetAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto done;
+ }
+
+ if (iocb->zone) {
+ nvme_zrm_reset(ns, iocb->zone);
+
+ if (!iocb->all) {
+ goto done;
+ }
+ }
+
+ while (iocb->idx < ns->num_zones) {
+ NvmeZone *zone = &ns->zone_array[iocb->idx++];
+
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EMPTY:
+ if (!iocb->all) {
+ goto done;
+ }
+
+ continue;
+
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_FULL:
+ iocb->zone = zone;
+ break;
+
+ default:
+ continue;
+ }
+
+ trace_pci_nvme_zns_zone_reset(zone->d.zslba);
+
+ iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk,
+ nvme_l2b(ns, zone->d.zslba),
+ nvme_l2b(ns, ns->zone_size),
+ BDRV_REQ_MAY_UNMAP,
+ nvme_zone_reset_epilogue_cb,
+ iocb);
+ return;
+ }
+
+done:
+ iocb->aiocb = NULL;
+ if (iocb->bh) {
+ qemu_bh_schedule(iocb->bh);
+ }
+}
+
static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
{
NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
NvmeZone *zone;
- uintptr_t *resets;
+ NvmeZoneResetAIOCB *iocb;
uint8_t *zd_ext;
uint32_t dw13 = le32_to_cpu(cmd->cdw13);
uint64_t slba = 0;
@@ -3574,7 +3621,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
action = dw13 & 0xff;
- all = dw13 & 0x100;
+ all = !!(dw13 & 0x100);
req->status = NVME_SUCCESS;
@@ -3618,21 +3665,22 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
break;
case NVME_ZONE_ACTION_RESET:
- resets = (uintptr_t *)&req->opaque;
-
- if (all) {
- proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES |
- NVME_PROC_FULL_ZONES;
- }
trace_pci_nvme_reset_zone(slba, zone_idx, all);
- *resets = 1;
+ iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk,
+ nvme_misc_cb, req);
- status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone, req);
+ iocb->req = req;
+ iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb);
+ iocb->ret = 0;
+ iocb->all = all;
+ iocb->idx = zone_idx;
+ iocb->zone = NULL;
- (*resets)--;
+ req->aiocb = &iocb->common;
+ nvme_zone_reset_cb(iocb, 0);
- return *resets ? NVME_NO_COMPLETE : req->status;
+ return NVME_NO_COMPLETE;
case NVME_ZONE_ACTION_OFFLINE:
if (all) {
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index cd65f8b28895..dc00c2860db7 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -44,7 +44,6 @@ pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
@@ -100,6 +99,7 @@ pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%
pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_zns_zone_reset(uint64_t zslba) "zslba 0x%"PRIx64""
pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
--
2.32.0
next prev parent reply other threads:[~2021-06-29 19:23 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-29 18:47 [PULL 00/23] hw/nvme patches Klaus Jensen
2021-06-29 18:47 ` [PULL 01/23] hw/nvme: fix style Klaus Jensen
2021-06-29 18:47 ` [PULL 02/23] hw/nvme: add identify namespace flbas/mc enums Klaus Jensen
2021-06-29 18:47 ` [PULL 03/23] hw/nvme: fix lbaf formats initialization Klaus Jensen
2021-06-29 18:47 ` [PULL 04/23] hw/nvme: add param to control auto zone transitioning to zone state closed Klaus Jensen
2021-06-29 18:47 ` [PULL 05/23] hw/nvme: fix csi field for cns 0x00 and 0x11 Klaus Jensen
2021-06-29 18:47 ` [PULL 06/23] hw/nvme: namespace parameter for EUI-64 Klaus Jensen
2021-08-09 10:18 ` Peter Maydell
2021-08-09 10:44 ` Klaus Jensen
2021-06-29 18:47 ` [PULL 07/23] hw/nvme: default for namespace EUI-64 Klaus Jensen
2021-06-29 18:47 ` [PULL 08/23] hw/nvme: reimplement flush to allow cancellation Klaus Jensen
2021-06-29 18:47 ` [PULL 09/23] hw/nvme: add nvme_block_status_all helper Klaus Jensen
2021-06-29 18:47 ` [PULL 10/23] hw/nvme: reimplement dsm to allow cancellation Klaus Jensen
2021-06-29 18:47 ` [PULL 11/23] hw/nvme: save reftag when generating pi Klaus Jensen
2021-06-29 18:47 ` [PULL 12/23] hw/nvme: remove assert from nvme_get_zone_by_slba Klaus Jensen
2021-06-29 18:47 ` [PULL 13/23] hw/nvme: use prinfo directly in nvme_check_prinfo and nvme_dif_check Klaus Jensen
2021-06-29 18:47 ` [PULL 14/23] hw/nvme: add dw0/1 to the req completion trace event Klaus Jensen
2021-06-29 18:47 ` [PULL 15/23] hw/nvme: reimplement the copy command to allow aio cancellation Klaus Jensen
2021-06-29 18:47 ` Klaus Jensen [this message]
2021-06-29 18:47 ` [PULL 17/23] hw/nvme: reimplement format nvm to allow cancellation Klaus Jensen
2021-06-29 18:47 ` [PULL 18/23] Partially revert "hw/block/nvme: drain namespaces on sq deletion" Klaus Jensen
2021-06-29 18:47 ` [PULL 19/23] hw/nvme: fix endianess conversion and add controller list Klaus Jensen
2021-06-29 18:47 ` [PULL 20/23] hw/nvme: documentation fix Klaus Jensen
2021-06-29 18:47 ` [PULL 21/23] hw/nvme: fix missing check for PMR capability Klaus Jensen
2021-06-29 18:47 ` [PULL 22/23] hw/nvme: fix pin-based interrupt behavior (again) Klaus Jensen
2021-06-29 18:47 ` [PULL 23/23] hw/nvme: add 'zoned.zasl' to documentation Klaus Jensen
2021-07-01 9:07 ` [PULL 00/23] hw/nvme patches Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210629184743.230173-17-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=ehabkost@redhat.com \
--cc=fam@euphon.net \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).