qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Klaus Jensen <its@irrelevant.dk>
To: qemu-block@nongnu.org
Cc: "Kevin Wolf" <kwolf@redhat.com>,
	"Niklas Cassel" <niklas.cassel@wdc.com>,
	"Damien Le Moal" <damien.lemoal@wdc.com>,
	"Dmitry Fomichev" <dmitry.fomichev@wdc.com>,
	"Klaus Jensen" <k.jensen@samsung.com>,
	qemu-devel@nongnu.org, "Max Reitz" <mreitz@redhat.com>,
	"Klaus Jensen" <its@irrelevant.dk>,
	"Keith Busch" <kbusch@kernel.org>,
	"Javier Gonzalez" <javier.gonz@samsung.com>,
	"Maxim Levitsky" <mlevitsk@redhat.com>,
	"Philippe Mathieu-Daudé" <philmd@redhat.com>,
	"Matias Bjorling" <matias.bjorling@wdc.com>
Subject: [PATCH 09/10] hw/block/nvme: allow zone excursions
Date: Tue, 30 Jun 2020 12:01:38 +0200	[thread overview]
Message-ID: <20200630100139.1483002-10-its@irrelevant.dk> (raw)
In-Reply-To: <20200630100139.1483002-1-its@irrelevant.dk>

Allow the controller to release active resources by transitioning zones
to the full state.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme-ns.h    |   2 +
 hw/block/nvme.c       | 171 ++++++++++++++++++++++++++++++++++++++----
 hw/block/trace-events |   4 +
 include/block/nvme.h  |  10 +++
 4 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 6d3a6dc07cd8..6acda5c2cf3f 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -75,6 +75,8 @@ typedef struct NvmeNamespace {
             QTAILQ_HEAD(, NvmeZone) lru_open;
             QTAILQ_HEAD(, NvmeZone) lru_active;
         } resources;
+
+        NvmeChangedZoneList changed_list;
     } zns;
 } NvmeNamespace;
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f7b4618bc805..6db6daa62bc5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -859,10 +859,11 @@ static void nvme_process_aers(void *opaque)
 
         req = n->aer_reqs[n->outstanding_aers];
 
-        result = (NvmeAerResult *) &req->cqe.dw0;
+        result = (NvmeAerResult *) &req->cqe.qw0;
         result->event_type = event->result.event_type;
         result->event_info = event->result.event_info;
         result->log_page = event->result.log_page;
+        result->nsid = event->result.nsid;
         g_free(event);
 
         req->status = NVME_SUCCESS;
@@ -874,8 +875,9 @@ static void nvme_process_aers(void *opaque)
     }
 }
 
-static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
-                               uint8_t event_info, uint8_t log_page)
+static void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns,
+                               uint8_t event_type, uint8_t event_info,
+                               uint8_t log_page)
 {
     NvmeAsyncEvent *event;
 
@@ -893,6 +895,11 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
         .log_page   = log_page,
     };
 
+    if (event_info == NVME_AER_INFO_NOTICE_ZONE_DESCR_CHANGED) {
+        assert(ns);
+        event->result.nsid = ns->params.nsid;
+    }
+
     QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
     n->aer_queued++;
 
@@ -1187,15 +1194,50 @@ static void nvme_update_zone_descr(NvmeNamespace *ns, NvmeRequest *req,
     nvme_req_add_aio(req, aio);
 }
 
+static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone)
+{
+    uint16_t num_ids = le16_to_cpu(ns->zns.changed_list.num_ids);
+
+    trace_pci_nvme_zone_changed(ns->params.nsid, nvme_zslba(zone));
+
+    if (num_ids < NVME_CHANGED_ZONE_LIST_MAX_IDS) {
+        ns->zns.changed_list.ids[num_ids] = zone->zd.zslba;
+        ns->zns.changed_list.num_ids = cpu_to_le16(num_ids + 1);
+    } else {
+        memset(&ns->zns.changed_list, 0x0, sizeof(NvmeChangedZoneList));
+        ns->zns.changed_list.num_ids = cpu_to_le16(0xffff);
+    }
+
+    nvme_enqueue_event(n, ns, NVME_AER_TYPE_NOTICE,
+                       NVME_AER_INFO_NOTICE_ZONE_DESCR_CHANGED,
+                       NVME_LOG_CHANGED_ZONE_LIST);
+}
+
 static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
                                     NvmeZone *zone, NvmeZoneState to,
                                     NvmeRequest *req);
 
+static void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+    NvmeRequest *req)
+{
+    trace_pci_nvme_zone_excursion(ns->params.nsid, nvme_zslba(zone),
+                                  nvme_zs_str(zone));
+
+    assert(nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSF, req) == NVME_SUCCESS);
+
+    NVME_ZA_SET_ZFC(zone->zd.za, 0x1);
+
+    nvme_zone_changed(n, ns, zone);
+
+    nvme_update_zone_info(ns, req, zone);
+}
+
 static uint16_t nvme_zrm_release_open(NvmeCtrl *n, NvmeNamespace *ns,
                                       NvmeRequest *req)
 {
     NvmeZone *candidate;
     NvmeZoneState zs;
+    uint16_t status;
 
     trace_pci_nvme_zone_zrm_release_open(nvme_cid(req), ns->params.nsid);
 
@@ -1216,12 +1258,73 @@ static uint16_t nvme_zrm_release_open(NvmeCtrl *n, NvmeNamespace *ns,
             continue;
         }
 
-        return nvme_zrm_transition(n, ns, candidate, NVME_ZS_ZSC, req);
+        status = nvme_zrm_transition(n, ns, candidate, NVME_ZS_ZSC, req);
+        if (status) {
+            return status;
+        }
+
+        nvme_update_zone_info(ns, req, candidate);
+        return NVME_SUCCESS;
     }
 
     return NVME_TOO_MANY_OPEN_ZONES;
 }
 
+static uint16_t nvme_zrm_release_active(NvmeCtrl *n, NvmeNamespace *ns,
+    NvmeRequest *req)
+{
+    NvmeIdNsZns *id_ns_zns = nvme_ns_id_zoned(ns);
+    NvmeZone *candidate = NULL;
+    NvmeZoneDescriptor *zd;
+    NvmeZoneState zs;
+
+    trace_pci_nvme_zone_zrm_release_active(nvme_cid(req), ns->params.nsid);
+
+    /* bail out if Zone Active Excursions are not permitted */
+    if (!(le16_to_cpu(id_ns_zns->zoc) & NVME_ID_NS_ZNS_ZOC_ZAE)) {
+        trace_pci_nvme_zone_zrm_excursion_not_allowed(nvme_cid(req),
+                                                      ns->params.nsid);
+        return NVME_TOO_MANY_ACTIVE_ZONES;
+    }
+
+    QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_active, lru_entry) {
+        zd = &candidate->zd;
+        zs = nvme_zs(candidate);
+
+        trace_pci_nvme_zone_zrm_candidate(nvme_cid(req), ns->params.nsid,
+                                          nvme_zslba(candidate),
+                                          nvme_wp(candidate), zs);
+
+        goto out;
+    }
+
+    /*
+     * If all zone resources are tied up on open zones we have to transition
+     * one of those to full.
+     */
+    QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_open, lru_entry) {
+        zd = &candidate->zd;
+        zs = nvme_zs(candidate);
+
+        trace_pci_nvme_zone_zrm_candidate(nvme_cid(req), ns->params.nsid,
+                                          nvme_zslba(candidate),
+                                          nvme_wp(candidate), zs);
+
+        /* the zone cannot be finished if it is currently writing */
+        if (candidate->wp_staging != le64_to_cpu(zd->wp)) {
+            continue;
+        }
+
+        break;
+    }
+
+    assert(candidate);
+
+out:
+    nvme_zone_excursion(n, ns, candidate, req);
+    return NVME_SUCCESS;
+}
+
 /*
  * nvme_zrm_transition validates zone state transitions under the constraint of
  * the Number of Active and Open Resources (NAR and NOR) limits as reported by
@@ -1253,8 +1356,10 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
 
         case NVME_ZS_ZSC:
             if (!ns->zns.resources.active) {
-                trace_pci_nvme_err_too_many_active_zones(nvme_cid(req));
-                return NVME_TOO_MANY_ACTIVE_ZONES;
+                status = nvme_zrm_release_active(n, ns, req);
+                if (status) {
+                    return status;
+                }
             }
 
             ns->zns.resources.active--;
@@ -1266,8 +1371,10 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
         case NVME_ZS_ZSIO:
         case NVME_ZS_ZSEO:
             if (!ns->zns.resources.active) {
-                trace_pci_nvme_err_too_many_active_zones(nvme_cid(req));
-                return NVME_TOO_MANY_ACTIVE_ZONES;
+                status = nvme_zrm_release_active(n, ns, req);
+                if (status) {
+                    return status;
+                }
             }
 
             if (!ns->zns.resources.open) {
@@ -2716,6 +2823,41 @@ static uint16_t nvme_effects_log(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
                     DMA_DIRECTION_FROM_DEVICE, req);
 }
 
+static uint16_t nvme_changed_zone_info(NvmeCtrl *n, uint32_t buf_len,
+    uint64_t off, NvmeRequest *req)
+{
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+    NvmeNamespace *ns = nvme_ns(n, nsid);
+    uint32_t trans_len;
+    uint16_t status;
+
+    if (unlikely(!ns)) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    if (!nvme_ns_zoned(ns)) {
+        return NVME_INVALID_LOG_ID | NVME_DNR;
+    }
+
+    if (off > 4096) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trans_len = MIN(4096 - off, buf_len);
+
+    status = nvme_dma(n, (uint8_t *) &ns->zns.changed_list + off, trans_len,
+                      DMA_DIRECTION_FROM_DEVICE, req);
+    if (status) {
+        return status;
+    }
+
+    memset(&ns->zns.changed_list, 0x0, sizeof(NvmeChangedZoneList));
+
+    nvme_clear_events(n, NVME_AER_TYPE_NOTICE);
+
+    return NVME_SUCCESS;
+}
+
 static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
 {
     NvmeCmd *cmd = &req->cmd;
@@ -2761,6 +2903,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
         return nvme_fw_log_info(n, len, off, req);
     case NVME_LOG_EFFECTS:
         return nvme_effects_log(n, len, off, req);
+    case NVME_LOG_CHANGED_ZONE_LIST:
+        return nvme_changed_zone_info(n, len, off, req);
     default:
         trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -3359,7 +3503,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
         if (((n->temperature >= n->features.temp_thresh_hi) ||
             (n->temperature <= n->features.temp_thresh_low)) &&
             NVME_AEC_SMART(n->features.async_config) & NVME_SMART_TEMPERATURE) {
-            nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
+            nvme_enqueue_event(n, NULL, NVME_AER_TYPE_SMART,
                                NVME_AER_INFO_SMART_TEMP_THRESH,
                                NVME_LOG_SMART_INFO);
         }
@@ -3924,7 +4068,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            " sqid=%"PRIu32", ignoring", qid);
 
             if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR,
                                    NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
                                    NVME_LOG_ERROR_INFO);
             }
@@ -3941,7 +4085,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            qid, new_head);
 
             if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR,
                                    NVME_AER_INFO_ERR_INVALID_DB_VALUE,
                                    NVME_LOG_ERROR_INFO);
             }
@@ -3978,7 +4122,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            " sqid=%"PRIu32", ignoring", qid);
 
             if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR,
                                    NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
                                    NVME_LOG_ERROR_INFO);
             }
@@ -3995,7 +4139,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            qid, new_tail);
 
             if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR,
                                    NVME_AER_INFO_ERR_INVALID_DB_VALUE,
                                    NVME_LOG_ERROR_INFO);
             }
@@ -4286,6 +4430,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     id->mdts = n->params.mdts;
     id->ver = cpu_to_le32(NVME_SPEC_VER);
     id->cntrltype = 0x1;
+    id->oaes = cpu_to_le32(NVME_OAES_ZDCN);
     id->oacs = cpu_to_le16(0);
 
     /*
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 4b4f2ed7605f..c4c80644f782 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -101,6 +101,10 @@ pci_nvme_update_zone_descr(uint16_t cid, uint32_t nsid, uint64_t zslba) "cid %"P
 pci_nvme_zone_zrm_transition(uint16_t cid, uint32_t nsid, uint64_t zslba, uint8_t from, uint8_t to) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" from 0x%"PRIx8" to 0x%"PRIx8""
 pci_nvme_zone_zrm_candidate(uint16_t cid, uint32_t nsid, uint64_t zslba, uint64_t wp, uint8_t zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" wp 0x%"PRIx64" zc 0x%"PRIx8""
 pci_nvme_zone_zrm_release_open(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_zone_zrm_release_active(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_zone_zrm_excursion_not_allowed(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_zone_changed(uint32_t nsid, uint64_t zslba) "nsid %"PRIu32" zslba 0x%"PRIx64""
+pci_nvme_zone_excursion(uint32_t nsid, uint64_t zslba, const char *zc) "nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
 pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 68dac2582b06..688ee5496168 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -778,6 +778,7 @@ typedef struct NvmeDsmRange {
 enum NvmeAsyncEventRequest {
     NVME_AER_TYPE_ERROR                     = 0,
     NVME_AER_TYPE_SMART                     = 1,
+    NVME_AER_TYPE_NOTICE                    = 2,
     NVME_AER_TYPE_IO_SPECIFIC               = 6,
     NVME_AER_TYPE_VENDOR_SPECIFIC           = 7,
     NVME_AER_INFO_ERR_INVALID_DB_REGISTER   = 0,
@@ -993,6 +994,14 @@ typedef struct NvmeZoneDescriptor {
 #define NVME_ZS(zs) (((zs) >> 4) & 0xf)
 #define NVME_ZS_SET(zs, state) ((zs) = ((state) << 4))
 
+#define NVME_CHANGED_ZONE_LIST_MAX_IDS 511
+
+typedef struct NvmeChangedZoneList {
+    uint16_t num_ids;
+    uint8_t  rsvd2[6];
+    uint64_t ids[NVME_CHANGED_ZONE_LIST_MAX_IDS];
+} NvmeChangedZoneList;
+
 #define NVME_ZA_ZFC(za)  ((za) & (1 << 0))
 #define NVME_ZA_FZR(za)  ((za) & (1 << 1))
 #define NVME_ZA_RZR(za)  ((za) & (1 << 2))
@@ -1428,5 +1437,6 @@ static inline void _nvme_check_size(void)
     QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
     QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescriptor) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeChangedZoneList) != 4096);
 }
 #endif
-- 
2.27.0



  parent reply	other threads:[~2020-06-30 10:09 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-30 10:01 [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces Klaus Jensen
2020-06-30 10:01 ` [PATCH 01/10] hw/block/nvme: support I/O Command Sets Klaus Jensen
2020-06-30 10:01 ` [PATCH 02/10] hw/block/nvme: add zns specific fields and types Klaus Jensen
2020-06-30 10:01 ` [PATCH 03/10] hw/block/nvme: add basic read/write for zoned namespaces Klaus Jensen
2020-06-30 10:01 ` [PATCH 04/10] hw/block/nvme: add the zone management receive command Klaus Jensen
2020-06-30 10:01 ` [PATCH 05/10] hw/block/nvme: add the zone management send command Klaus Jensen
2020-06-30 10:01 ` [PATCH 06/10] hw/block/nvme: add the zone append command Klaus Jensen
2020-06-30 10:01 ` [PATCH 07/10] hw/block/nvme: track and enforce zone resources Klaus Jensen
2020-06-30 10:01 ` [PATCH 08/10] hw/block/nvme: allow open to close transitions by controller Klaus Jensen
2020-06-30 10:01 ` Klaus Jensen [this message]
2020-06-30 10:01 ` [PATCH 10/10] hw/block/nvme: support reset/finish recommended limits Klaus Jensen
2020-06-30 12:59 ` [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces Niklas Cassel
2020-06-30 14:09   ` Philippe Mathieu-Daudé
2020-06-30 15:42     ` Keith Busch
2020-06-30 20:36       ` Klaus Jensen
2020-07-01 10:34         ` nvme emulation merge process (was: Re: [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces) Kevin Wolf
2020-07-01 13:18           ` Klaus Jensen
2020-07-01 13:29             ` Maxim Levitsky
2020-07-01 13:57             ` Philippe Mathieu-Daudé
2020-07-01 14:21               ` Keith Busch
2020-07-02 20:29               ` nvme emulation merge process Andrzej Jakowski
2020-07-02 21:13                 ` Keith Busch
2020-06-30 20:29   ` [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces Klaus Jensen
2020-07-01  1:10     ` Dmitry Fomichev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200630100139.1483002-10-its@irrelevant.dk \
    --to=its@irrelevant.dk \
    --cc=damien.lemoal@wdc.com \
    --cc=dmitry.fomichev@wdc.com \
    --cc=javier.gonz@samsung.com \
    --cc=k.jensen@samsung.com \
    --cc=kbusch@kernel.org \
    --cc=kwolf@redhat.com \
    --cc=matias.bjorling@wdc.com \
    --cc=mlevitsk@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=niklas.cassel@wdc.com \
    --cc=philmd@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).