From: Sam Li <faithilikerun@gmail.com>
To: qemu-devel@nongnu.org
Cc: qemu-block@nongnu.org, "Klaus Jensen" <its@irrelevant.dk>,
"Markus Armbruster" <armbru@redhat.com>,
"Hanna Reitz" <hreitz@redhat.com>, "Peter Xu" <peterx@redhat.com>,
"David Hildenbrand" <david@redhat.com>,
dlemoal@kernel.org, "Keith Busch" <kbusch@kernel.org>,
"Philippe Mathieu-Daudé" <philmd@linaro.org>,
"Eric Blake" <eblake@redhat.com>,
hare@suse.de, "Kevin Wolf" <kwolf@redhat.com>,
stefanha@redhat.com, "Paolo Bonzini" <pbonzini@redhat.com>,
dmitry.fomichev@wdc.com, "Sam Li" <faithilikerun@gmail.com>
Subject: [RFC 5/5] hw/nvme: make ZDED persistent
Date: Wed, 16 Aug 2023 15:08:42 +0800 [thread overview]
Message-ID: <20230816070842.5423-2-faithilikerun@gmail.com> (raw)
In-Reply-To: <20230816070842.5423-1-faithilikerun@gmail.com>
Zone descriptor extension data (ZDED) is not persistent across QEMU
restarts. The zone descriptor extension valid bit (ZDEV) is part of
zone attributes, which sets to one when the ZDED is associated with
the zone.
With the qcow2-ZNS file as the backing file, the NVMe ZNS device stores
the zone attributes at the following eight bit of zoned bit of write
pointers for each zone. The ZDED is stored as part of zoned metadata as
write pointers.
Signed-off-by: Sam Li <faithilikerun@gmail.com>
---
block/qcow2.c | 44 +++++++++++++++++++++++++++++++++++-
hw/nvme/ctrl.c | 6 +----
include/block/block-common.h | 1 +
3 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/block/qcow2.c b/block/qcow2.c
index 5a038792f1..ac5ecef559 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "block/qdict.h"
+#include "block/nvme.h"
#include "sysemu/block-backend.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
@@ -214,6 +215,17 @@ static inline void qcow2_set_wp(uint64_t *wp, BlockZoneState zs)
*wp = addr;
}
+static inline void qcow2_set_za(uint64_t *wp, uint8_t za)
+{
+ /*
+ * The zone attribute takes up one byte. Store it after the zoned
+ * bit.
+ */
+ uint64_t addr = *wp;
+ addr |= ((uint64_t)za << 51);
+ *wp = addr;
+}
+
/*
* File wp tracking: reset zone, finish zone and append zone can
* change the value of write pointer. All zone operations will change
@@ -308,7 +320,7 @@ static int qcow2_check_open(BlockDriverState *bs)
/*
* The zoned device has limited zone resources of open, closed, active
- * zones.
+ * zones. Check if we can manage a zone without exceeding those limits.
*/
static int qcow2_check_zone_resources(BlockDriverState *bs,
BlockZoneState zs)
@@ -4801,6 +4813,33 @@ unlock:
return ret;
}
+static int qcow2_zns_set_zded(BlockDriverState *bs, uint32_t index)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret;
+
+ qemu_co_mutex_lock(&s->wps->colock);
+ uint64_t *wp = &s->wps->wp[index];
+ BlockZoneState zs = qcow2_get_zs(*wp);
+ if (zs == BLK_ZS_EMPTY) {
+ ret = qcow2_check_zone_resources(bs, zs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qcow2_set_za(wp, NVME_ZA_ZD_EXT_VALID);
+ ret = qcow2_write_wp_at(bs, wp, index, BLK_ZO_CLOSE);
+ if (ret < 0) {
+ error_report("Failed to set zone extension at 0x%" PRIx64 "", *wp);
+ return ret;
+ }
+ s->nr_zones_closed++;
+ return ret;
+ }
+
+ return NVME_ZONE_INVAL_TRANSITION;
+}
+
static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
int64_t offset, int64_t len)
{
@@ -4857,6 +4896,9 @@ static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
case BLK_ZO_OFFLINE:
ret = qcow2_write_wp_at(bs, &wps->wp[index], index, BLK_ZO_OFFLINE);
break;
+ case BLK_ZO_SET_ZDED:
+ ret = qcow2_zns_set_zded(bs, index);
+ break;
default:
error_report("Unsupported zone op: 0x%x", op);
ret = -ENOTSUP;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 3932b516ed..fcd774e3f7 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -3425,11 +3425,6 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
NvmeNamespace *ns = req->ns;
NvmeZoneMgmtAIOCB *iocb;
uint64_t slba = 0;
- uint64_t offset;
- BlockBackend *blk = ns->blkconf.blk;
- uint32_t zone_size = blk_get_zone_size(blk);
- uint64_t size = zone_size * blk_get_nr_zones(blk);
- int64_t len;
uint32_t zone_idx = 0;
uint16_t status;
uint8_t action = cmd->zsa;
@@ -3485,6 +3480,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
break;
case NVME_ZONE_ACTION_SET_ZD_EXT:
+ op = BLK_ZO_SET_ZDED;
int zd_ext_size = blk_get_zd_ext_size(blk);
trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
if (all || !zd_ext_size) {
diff --git a/include/block/block-common.h b/include/block/block-common.h
index 0cbed607a8..b369e77607 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -84,6 +84,7 @@ typedef enum BlockZoneOp {
BLK_ZO_FINISH,
BLK_ZO_RESET,
BLK_ZO_OFFLINE,
+ BLK_ZO_SET_ZDED,
} BlockZoneOp;
typedef enum BlockZoneModel {
--
2.40.1
prev parent reply other threads:[~2023-08-16 7:09 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-16 7:08 [RFC 4/5] hw/nvme: refactor zone append writes using block layer APIs Sam Li
2023-08-16 7:08 ` Sam Li [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230816070842.5423-2-faithilikerun@gmail.com \
--to=faithilikerun@gmail.com \
--cc=armbru@redhat.com \
--cc=david@redhat.com \
--cc=dlemoal@kernel.org \
--cc=dmitry.fomichev@wdc.com \
--cc=eblake@redhat.com \
--cc=hare@suse.de \
--cc=hreitz@redhat.com \
--cc=its@irrelevant.dk \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=philmd@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).