From: Clay Mayers <Clay.Mayers@kioxia.com>
To: Klaus Jensen <its@irrelevant.dk>
Cc: "qemu-devel@nongnu.org" <qemu-devel@nongnu.org>,
"Keith Busch" <kbusch@kernel.org>, "Fam Zheng" <fam@euphon.net>,
"Phlippe Mathieu-Daudé" <f4bug@amsat.org>
Subject: RE: [PATCH 2/4] hw/block/nvme: add zone descriptor changed log page
Date: Fri, 21 Oct 2022 15:24:17 +0000 [thread overview]
Message-ID: <9986c4bc157a4d1d961fd6c62a576de4@kioxia.com> (raw)
In-Reply-To: <Y1I7ftMSoKsN7u1T@cormorant.local>
> From: Klaus Jensen <its@irrelevant.dk>
> Sent: Thursday, October 20, 2022 11:26 PM
>
> On Okt 20 17:18, clay.mayers@kioxia.com wrote:
> > From: Clay Mayers <clay.mayers@kioxia.com>
> >
> > Zones marked with ZONE_FINISH_RECOMMENDED are added to the zone
> > descriptor changed log page. Once read with RAE cleared, they are
> > removed from the list.
> >
> > Zones stay in the list regardless of what other states the zones may
> > go through so applications must be aware of ABA issues where finish
> > may be recommended, the zone freed and re-opened and now the attribute
> > is now clear.
> >
> > Signed-off-by: Clay Mayers <clay.mayers@kioxia.com>
> > ---
> > hw/nvme/ctrl.c | 50 ++++++++++++++++++++++++++++++++++++++++++++
> > hw/nvme/ns.c | 6 ++++++
> > hw/nvme/nvme.h | 8 +++++++
> > hw/nvme/trace-events | 1 +
> > include/block/nvme.h | 8 +++++++
> > 5 files changed, 73 insertions(+)
> >
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index d7e9fae0b0..3ffd0fb469 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -1516,15 +1516,42 @@ static void nvme_clear_events(NvmeCtrl *n,
> uint8_t event_type)
> > }
> > }
> >
> > +static void nvme_zdc_list(NvmeNamespace *ns, NvmeZoneIdList *zlist, bool
> reset)
> > +{
> > + NvmeZdc *zdc;
> > + NvmeZdc *next;
> > + int index = 0;
> > +
> > + QTAILQ_FOREACH_SAFE(zdc, &ns->zdc_list, entry, next) {
> > + if (index >= ARRAY_SIZE(zlist->zids)) {
> > + break;
> > + }
>
> I could've sweared that a previous revision of the spec stated that this
> required `nzid` to be set to 0xffff and the log page be zeroed in this
> case.
>
> However, upon reading it again, that doesnt seem to be the case, so this
> seems to be just fine.
My reading was it only does that if the list can't be returned.
>
> > + zlist->zids[index++] = zdc->zone->d.zslba;
> > + if (reset) {
> > + QTAILQ_REMOVE(&ns->zdc_list, zdc, entry);
> > + zdc->zone->zdc_entry = NULL;
> > + g_free(zdc);
> > + }
> > + }
> > + zlist->nzid = cpu_to_le16(index);
> > +}
> > +
> > static void nvme_check_finish(NvmeNamespace *ns, NvmeZoneListHead
> *list)
> > {
> > int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > NvmeZone *zone;
> > + NvmeZdc *zdc;
> >
> > QTAILQ_FOREACH(zone, list, entry) {
> > if (zone->finish_ms <= now) {
> > zone->finish_ms = INT64_MAX;
> > zone->d.za |= NVME_ZA_FINISH_RECOMMENDED;
> > + if (!zone->zdc_entry) {
> > + zdc = g_malloc0(sizeof(*zdc));
> > + zdc->zone = zone;
> > + zone->zdc_entry = zdc;
> > + QTAILQ_INSERT_TAIL(&ns->zdc_list, zdc, entry);
> > + }
> > } else if (zone->finish_ms != INT64_MAX) {
> > timer_mod_anticipate(ns->active_timer, zone->finish_ms);
> > }
> > @@ -4675,6 +4702,27 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n,
> uint8_t csi, uint32_t buf_len,
> > return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
> > }
> >
> > +static uint16_t nvme_changed_zones(NvmeCtrl *n, uint8_t rae, uint32_t
> buf_len,
> > + uint64_t off, NvmeRequest *req)
> > +{
> > + NvmeNamespace *ns;
> > + NvmeCmd *cmd = &req->cmd;
> > + uint32_t nsid = le32_to_cpu(cmd->nsid);
> > + NvmeZoneIdList zlist = { };
> > + uint32_t trans_len = MIN(sizeof(zlist) - off, buf_len);
>
> I believe this is unsafe if off >= sizeof(zlist).
Yep, I copied that from how most of the other log pages are
handled, but missed the check for off > sizeof(zlist).
>
> > +
> > + nsid = le32_to_cpu(cmd->nsid);
> > + trace_pci_nvme_changed_zones(nsid);
> > +
> > + ns = nvme_ns(n, nsid);
> > + if (!ns) {
> > + return NVME_INVALID_NSID | NVME_DNR;
> > + }
> > + nvme_zdc_list(ns, &zlist, !rae);
> > +
> > + return nvme_c2h(n, ((uint8_t *)&zlist) + off, trans_len, req);
> > +}
> > +
> > static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
> > {
> > NvmeCmd *cmd = &req->cmd;
> > @@ -4722,6 +4770,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n,
> NvmeRequest *req)
> > return nvme_changed_nslist(n, rae, len, off, req);
> > case NVME_LOG_CMD_EFFECTS:
> > return nvme_cmd_effects(n, csi, len, off, req);
> > + case NVME_LOG_CHANGED_ZONE:
> > + return nvme_changed_zones(n, rae, len, off, req);
> > default:
> > trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
> > return NVME_INVALID_FIELD | NVME_DNR;
> > diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> > index b577f2d8e0..25cd490c99 100644
> > --- a/hw/nvme/ns.c
> > +++ b/hw/nvme/ns.c
> > @@ -240,6 +240,7 @@ static void
> nvme_ns_zoned_init_state(NvmeNamespace *ns)
> > QTAILQ_INIT(&ns->imp_open_zones);
> > QTAILQ_INIT(&ns->closed_zones);
> > QTAILQ_INIT(&ns->full_zones);
> > + QTAILQ_INIT(&ns->zdc_list);
> >
> > zone = ns->zone_array;
> > for (i = 0; i < ns->num_zones; i++, zone++) {
> > @@ -526,8 +527,13 @@ void nvme_ns_shutdown(NvmeNamespace *ns)
> >
> > void nvme_ns_cleanup(NvmeNamespace *ns)
> > {
> > + NvmeZdc *zdc;
> > +
> > if (ns->params.zoned) {
> > timer_free(ns->active_timer);
> > + while ((zdc = QTAILQ_FIRST(&ns->zdc_list))) {
> > + g_free(zdc);
> > + }
> > g_free(ns->id_ns_zoned);
> > g_free(ns->zone_array);
> > g_free(ns->zd_extensions);
> > diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> > index 9a54dcdb32..ae65226150 100644
> > --- a/hw/nvme/nvme.h
> > +++ b/hw/nvme/nvme.h
> > @@ -32,6 +32,7 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES >
> NVME_NSID_BROADCAST - 1);
> >
> > typedef struct NvmeCtrl NvmeCtrl;
> > typedef struct NvmeNamespace NvmeNamespace;
> > +typedef struct NvmeZone NvmeZone;
> >
> > #define TYPE_NVME_BUS "nvme-bus"
> > OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
> > @@ -90,10 +91,16 @@ static inline NvmeNamespace
> *nvme_subsys_ns(NvmeSubsystem *subsys,
> > #define NVME_NS(obj) \
> > OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
> >
> > +typedef struct NvmeZdc {
> > + QTAILQ_ENTRY(NvmeZdc) entry;
> > + NvmeZone *zone;
> > +} NvmeZdc;
> > +
> > typedef struct NvmeZone {
> > NvmeZoneDescr d;
> > uint64_t w_ptr;
> > int64_t finish_ms;
> > + NvmeZdc *zdc_entry;
> > QTAILQ_ENTRY(NvmeZone) entry;
> > } NvmeZone;
> >
> > @@ -172,6 +179,7 @@ typedef struct NvmeNamespace {
> >
> > int64_t fto_ms;
> > QEMUTimer *active_timer;
> > + QTAILQ_HEAD(, NvmeZdc) zdc_list;
> >
> > NvmeNamespaceParams params;
> >
> > diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
> > index fccb79f489..337927e607 100644
> > --- a/hw/nvme/trace-events
> > +++ b/hw/nvme/trace-events
> > @@ -64,6 +64,7 @@ pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
> > pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16",
> csi=0x%"PRIx8""
> > pci_nvme_identify_cmd_set(void) "identify i/o command set"
> > pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
> > +pci_nvme_changed_zones(uint32_t ns) "nsid %"PRIu32""
> > pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t
> len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8"
> len %"PRIu32" off %"PRIu64""
> > pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel,
> uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel
> 0x%"PRIx8" cdw11 0x%"PRIx32""
> > pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save,
> uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save
> 0x%"PRIx8" cdw11 0x%"PRIx32""
> > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > index 8027b7126b..c747cc4948 100644
> > --- a/include/block/nvme.h
> > +++ b/include/block/nvme.h
> > @@ -1010,6 +1010,7 @@ enum NvmeLogIdentifier {
> > NVME_LOG_FW_SLOT_INFO = 0x03,
> > NVME_LOG_CHANGED_NSLIST = 0x04,
> > NVME_LOG_CMD_EFFECTS = 0x05,
> > + NVME_LOG_CHANGED_ZONE = 0xbf,
> > };
> >
> > typedef struct QEMU_PACKED NvmePSD {
> > @@ -1617,6 +1618,12 @@ typedef enum NvmeVirtualResourceType {
> > NVME_VIRT_RES_INTERRUPT = 0x01,
> > } NvmeVirtualResourceType;
> >
> > +typedef struct QEMU_PACKED NvmeZoneIdList {
> > + uint16_t nzid;
> > + uint16_t rsvd2[3];
> > + uint64_t zids[511];
> > +} NvmeZoneIdList;
> > +
> > static inline void _nvme_check_size(void)
> > {
> > QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
> > @@ -1655,5 +1662,6 @@ static inline void _nvme_check_size(void)
> > QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
> > QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
> > QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
> > + QEMU_BUILD_BUG_ON(sizeof(NvmeZoneIdList) != 4096);
> > }
> > #endif
> > --
> > 2.27.0
> >
>
> --
> One of us - No more doubt, silence or taboo about mental illness.
next prev parent reply other threads:[~2022-10-21 15:44 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-21 0:18 [PATCH 0/4] hw/block/nvme: Implement ZNS finish-zone ZDC AEN clay.mayers
2022-10-21 0:18 ` [PATCH 1/4] hw/block/nvme: add ZONE_FINISH_RECOMMENDED functionality clay.mayers
2022-10-21 6:36 ` Klaus Jensen
2022-10-21 0:18 ` [PATCH 2/4] hw/block/nvme: add zone descriptor changed log page clay.mayers
2022-10-21 6:26 ` Klaus Jensen
2022-10-21 15:24 ` Clay Mayers [this message]
2022-10-21 0:18 ` [PATCH 3/4] hw/block/nvme: supply dw1 for aen result clay.mayers
2022-10-21 5:59 ` Klaus Jensen
2022-10-21 15:25 ` Clay Mayers
2022-10-21 0:18 ` [PATCH 4/4] hw/block/nvme: add zone descriptor changed AEN clay.mayers
2022-10-21 6:41 ` Klaus Jensen
2022-10-21 16:39 ` Clay Mayers
2022-10-21 5:57 ` [PATCH 0/4] hw/block/nvme: Implement ZNS finish-zone ZDC AEN Klaus Jensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9986c4bc157a4d1d961fd6c62a576de4@kioxia.com \
--to=clay.mayers@kioxia.com \
--cc=f4bug@amsat.org \
--cc=fam@euphon.net \
--cc=its@irrelevant.dk \
--cc=kbusch@kernel.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).