qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Klaus Jensen <its@irrelevant.dk>
To: clay.mayers@kioxia.com
Cc: qemu-devel@nongnu.org, "Keith Busch" <kbusch@kernel.org>,
	"Fam Zheng" <fam@euphon.net>,
	"Phlippe Mathieu-Daudé" <f4bug@amsat.org>
Subject: Re: [PATCH 2/4] hw/block/nvme: add zone descriptor changed log page
Date: Fri, 21 Oct 2022 08:26:06 +0200	[thread overview]
Message-ID: <Y1I7ftMSoKsN7u1T@cormorant.local> (raw)
In-Reply-To: <20221021001835.942642-3-clay.mayers@kioxia.com>

[-- Attachment #1: Type: text/plain, Size: 8582 bytes --]

On Okt 20 17:18, clay.mayers@kioxia.com wrote:
> From: Clay Mayers <clay.mayers@kioxia.com>
> 
> Zones marked with ZONE_FINISH_RECOMMENDED are added to the zone
> descriptor changed log page.  Once read with RAE cleared, they are
> removed from the list.
> 
> Zones stay in the list regardless of what other states the zones may
> go through so applications must be aware of ABA issues where finish
> may be recommended, the zone freed and re-opened and now the attribute
> is now clear.
> 
> Signed-off-by: Clay Mayers <clay.mayers@kioxia.com>
> ---
>  hw/nvme/ctrl.c       | 50 ++++++++++++++++++++++++++++++++++++++++++++
>  hw/nvme/ns.c         |  6 ++++++
>  hw/nvme/nvme.h       |  8 +++++++
>  hw/nvme/trace-events |  1 +
>  include/block/nvme.h |  8 +++++++
>  5 files changed, 73 insertions(+)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index d7e9fae0b0..3ffd0fb469 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -1516,15 +1516,42 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
>      }
>  }
>  
> +static void nvme_zdc_list(NvmeNamespace *ns, NvmeZoneIdList *zlist, bool reset)
> +{
> +    NvmeZdc *zdc;
> +    NvmeZdc *next;
> +    int index = 0;
> +
> +    QTAILQ_FOREACH_SAFE(zdc, &ns->zdc_list, entry, next) {
> +        if (index >= ARRAY_SIZE(zlist->zids)) {
> +            break;
> +        }

I could've sweared that a previous revision of the spec stated that this
required `nzid` to be set to 0xffff and the log page be zeroed in this
case.

However, upon reading it again, that doesnt seem to be the case, so this
seems to be just fine.

> +        zlist->zids[index++] = zdc->zone->d.zslba;
> +        if (reset) {
> +            QTAILQ_REMOVE(&ns->zdc_list, zdc, entry);
> +            zdc->zone->zdc_entry = NULL;
> +            g_free(zdc);
> +        }
> +    }
> +    zlist->nzid = cpu_to_le16(index);
> +}
> +
>  static void nvme_check_finish(NvmeNamespace *ns, NvmeZoneListHead *list)
>  {
>      int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
>      NvmeZone *zone;
> +    NvmeZdc  *zdc;
>  
>      QTAILQ_FOREACH(zone, list, entry) {
>          if (zone->finish_ms <= now) {
>              zone->finish_ms = INT64_MAX;
>              zone->d.za |= NVME_ZA_FINISH_RECOMMENDED;
> +            if (!zone->zdc_entry) {
> +                zdc = g_malloc0(sizeof(*zdc));
> +                zdc->zone = zone;
> +                zone->zdc_entry = zdc;
> +                QTAILQ_INSERT_TAIL(&ns->zdc_list, zdc, entry);
> +            }
>          } else if (zone->finish_ms != INT64_MAX) {
>              timer_mod_anticipate(ns->active_timer, zone->finish_ms);
>          }
> @@ -4675,6 +4702,27 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
>      return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
>  }
>  
> +static uint16_t nvme_changed_zones(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
> +                                    uint64_t off, NvmeRequest *req)
> +{
> +    NvmeNamespace *ns;
> +    NvmeCmd *cmd = &req->cmd;
> +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> +    NvmeZoneIdList zlist = { };
> +    uint32_t trans_len = MIN(sizeof(zlist) - off, buf_len);

I believe this is unsafe if off >= sizeof(zlist).

> +
> +    nsid = le32_to_cpu(cmd->nsid);
> +    trace_pci_nvme_changed_zones(nsid);
> +
> +    ns = nvme_ns(n, nsid);
> +    if (!ns) {
> +        return NVME_INVALID_NSID | NVME_DNR;
> +    }
> +    nvme_zdc_list(ns, &zlist, !rae);
> +
> +    return nvme_c2h(n, ((uint8_t *)&zlist) + off, trans_len, req);
> +}
> +
>  static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
>  {
>      NvmeCmd *cmd = &req->cmd;
> @@ -4722,6 +4770,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
>          return nvme_changed_nslist(n, rae, len, off, req);
>      case NVME_LOG_CMD_EFFECTS:
>          return nvme_cmd_effects(n, csi, len, off, req);
> +    case NVME_LOG_CHANGED_ZONE:
> +        return nvme_changed_zones(n, rae, len, off, req);
>      default:
>          trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
>          return NVME_INVALID_FIELD | NVME_DNR;
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index b577f2d8e0..25cd490c99 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -240,6 +240,7 @@ static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
>      QTAILQ_INIT(&ns->imp_open_zones);
>      QTAILQ_INIT(&ns->closed_zones);
>      QTAILQ_INIT(&ns->full_zones);
> +    QTAILQ_INIT(&ns->zdc_list);
>  
>      zone = ns->zone_array;
>      for (i = 0; i < ns->num_zones; i++, zone++) {
> @@ -526,8 +527,13 @@ void nvme_ns_shutdown(NvmeNamespace *ns)
>  
>  void nvme_ns_cleanup(NvmeNamespace *ns)
>  {
> +    NvmeZdc *zdc;
> +
>      if (ns->params.zoned) {
>          timer_free(ns->active_timer);
> +        while ((zdc = QTAILQ_FIRST(&ns->zdc_list))) {
> +            g_free(zdc);
> +        }
>          g_free(ns->id_ns_zoned);
>          g_free(ns->zone_array);
>          g_free(ns->zd_extensions);
> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> index 9a54dcdb32..ae65226150 100644
> --- a/hw/nvme/nvme.h
> +++ b/hw/nvme/nvme.h
> @@ -32,6 +32,7 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
>  
>  typedef struct NvmeCtrl NvmeCtrl;
>  typedef struct NvmeNamespace NvmeNamespace;
> +typedef struct NvmeZone NvmeZone;
>  
>  #define TYPE_NVME_BUS "nvme-bus"
>  OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
> @@ -90,10 +91,16 @@ static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
>  #define NVME_NS(obj) \
>      OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
>  
> +typedef struct NvmeZdc {
> +    QTAILQ_ENTRY(NvmeZdc) entry;
> +    NvmeZone *zone;
> +} NvmeZdc;
> +
>  typedef struct NvmeZone {
>      NvmeZoneDescr   d;
>      uint64_t        w_ptr;
>      int64_t         finish_ms;
> +    NvmeZdc         *zdc_entry;
>      QTAILQ_ENTRY(NvmeZone) entry;
>  } NvmeZone;
>  
> @@ -172,6 +179,7 @@ typedef struct NvmeNamespace {
>  
>      int64_t         fto_ms;
>      QEMUTimer       *active_timer;
> +    QTAILQ_HEAD(, NvmeZdc) zdc_list;
>  
>      NvmeNamespaceParams params;
>  
> diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
> index fccb79f489..337927e607 100644
> --- a/hw/nvme/trace-events
> +++ b/hw/nvme/trace-events
> @@ -64,6 +64,7 @@ pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
>  pci_nvme_identify_cmd_set(void) "identify i/o command set"
>  pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
> +pci_nvme_changed_zones(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
>  pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32""
>  pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32""
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 8027b7126b..c747cc4948 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -1010,6 +1010,7 @@ enum NvmeLogIdentifier {
>      NVME_LOG_FW_SLOT_INFO   = 0x03,
>      NVME_LOG_CHANGED_NSLIST = 0x04,
>      NVME_LOG_CMD_EFFECTS    = 0x05,
> +    NVME_LOG_CHANGED_ZONE   = 0xbf,
>  };
>  
>  typedef struct QEMU_PACKED NvmePSD {
> @@ -1617,6 +1618,12 @@ typedef enum NvmeVirtualResourceType {
>      NVME_VIRT_RES_INTERRUPT     = 0x01,
>  } NvmeVirtualResourceType;
>  
> +typedef struct QEMU_PACKED NvmeZoneIdList {
> +    uint16_t nzid;
> +    uint16_t rsvd2[3];
> +    uint64_t zids[511];
> +} NvmeZoneIdList;
> +
>  static inline void _nvme_check_size(void)
>  {
>      QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
> @@ -1655,5 +1662,6 @@ static inline void _nvme_check_size(void)
>      QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeZoneIdList) != 4096);
>  }
>  #endif
> -- 
> 2.27.0
> 

-- 
One of us - No more doubt, silence or taboo about mental illness.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2022-10-21  6:32 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-21  0:18 [PATCH 0/4] hw/block/nvme: Implement ZNS finish-zone ZDC AEN clay.mayers
2022-10-21  0:18 ` [PATCH 1/4] hw/block/nvme: add ZONE_FINISH_RECOMMENDED functionality clay.mayers
2022-10-21  6:36   ` Klaus Jensen
2022-10-21  0:18 ` [PATCH 2/4] hw/block/nvme: add zone descriptor changed log page clay.mayers
2022-10-21  6:26   ` Klaus Jensen [this message]
2022-10-21 15:24     ` Clay Mayers
2022-10-21  0:18 ` [PATCH 3/4] hw/block/nvme: supply dw1 for aen result clay.mayers
2022-10-21  5:59   ` Klaus Jensen
2022-10-21 15:25     ` Clay Mayers
2022-10-21  0:18 ` [PATCH 4/4] hw/block/nvme: add zone descriptor changed AEN clay.mayers
2022-10-21  6:41   ` Klaus Jensen
2022-10-21 16:39     ` Clay Mayers
2022-10-21  5:57 ` [PATCH 0/4] hw/block/nvme: Implement ZNS finish-zone ZDC AEN Klaus Jensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y1I7ftMSoKsN7u1T@cormorant.local \
    --to=its@irrelevant.dk \
    --cc=clay.mayers@kioxia.com \
    --cc=f4bug@amsat.org \
    --cc=fam@euphon.net \
    --cc=kbusch@kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).