From: Klaus Jensen <its@irrelevant.dk>
To: qemu-block@nongnu.org
Cc: "Kevin Wolf" <kwolf@redhat.com>,
"Niklas Cassel" <niklas.cassel@wdc.com>,
"Damien Le Moal" <damien.lemoal@wdc.com>,
"Dmitry Fomichev" <dmitry.fomichev@wdc.com>,
"Klaus Jensen" <k.jensen@samsung.com>,
qemu-devel@nongnu.org, "Max Reitz" <mreitz@redhat.com>,
"Klaus Jensen" <its@irrelevant.dk>,
"Keith Busch" <kbusch@kernel.org>,
"Javier Gonzalez" <javier.gonz@samsung.com>,
"Maxim Levitsky" <mlevitsk@redhat.com>,
"Philippe Mathieu-Daudé" <philmd@redhat.com>,
"Matias Bjorling" <matias.bjorling@wdc.com>
Subject: [PATCH 10/10] hw/block/nvme: support reset/finish recommended limits
Date: Tue, 30 Jun 2020 12:01:39 +0200 [thread overview]
Message-ID: <20200630100139.1483002-11-its@irrelevant.dk> (raw)
In-Reply-To: <20200630100139.1483002-1-its@irrelevant.dk>
Add the rrl and frl device parameters. The parameters specify the number
of seconds before the device may perform an internal operation to
"clear" the Reset Zone Recommended and Finish Zone Recommended
attributes respectively.
When the attibutes are set is governed by the rrld and frld parameters
(Reset/Finish Recomended Limit Delay). The Reset Zone Recommended Delay
starts when a zone becomes full. The Finish Zone Recommended Delay
starts when the zone is first activated. When the limits are reached,
the attributes are cleared again and the process is restarted.
If zone excursions are enabled (they are by default), when the Finish
Recommended Limit is reached, the device will finish the zone.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/block/nvme-ns.c | 105 ++++++++++++++++++++++++++++++++++++++++++
hw/block/nvme-ns.h | 13 ++++++
hw/block/nvme.c | 49 +++++++++++++-------
hw/block/nvme.h | 7 +++
hw/block/trace-events | 3 +-
5 files changed, 160 insertions(+), 17 deletions(-)
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 3b9fa91c7af8..7f9b1d526197 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -25,6 +25,7 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-core.h"
+#include "trace.h"
#include "nvme.h"
#include "nvme-ns.h"
@@ -48,6 +49,91 @@ const char *nvme_zs_to_str(NvmeZoneState zs)
return NULL;
}
+static void nvme_ns_process_timer(void *opaque)
+{
+ NvmeNamespace *ns = opaque;
+ BusState *s = qdev_get_parent_bus(&ns->parent_obj);
+ NvmeCtrl *n = NVME(s->parent);
+ NvmeZone *zone;
+
+ trace_pci_nvme_ns_process_timer(ns->params.nsid);
+
+ int64_t next_timer = INT64_MAX, now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ QTAILQ_FOREACH(zone, &ns->zns.resources.lru_open, lru_entry) {
+ int64_t activated_ns = now - zone->stats.activated_ns;
+ if (activated_ns < ns->zns.frld_ns) {
+ next_timer = MIN(next_timer, zone->stats.activated_ns +
+ ns->zns.frld_ns);
+
+ break;
+ }
+
+ if (activated_ns < ns->zns.frld_ns + ns->zns.frl_ns) {
+ NVME_ZA_SET_FZR(zone->zd.za, 0x1);
+ nvme_zone_changed(n, ns, zone);
+
+ next_timer = MIN(next_timer, now + ns->zns.frl_ns);
+
+ continue;
+ }
+
+ if (zone->wp_staging != le64_to_cpu(zone->zd.wp)) {
+ next_timer = now + 500;
+ continue;
+ }
+
+ nvme_zone_excursion(n, ns, zone, NULL);
+ }
+
+ QTAILQ_FOREACH(zone, &ns->zns.resources.lru_active, lru_entry) {
+ int64_t activated_ns = now - zone->stats.activated_ns;
+ if (activated_ns < ns->zns.frld_ns) {
+ next_timer = MIN(next_timer, zone->stats.activated_ns +
+ ns->zns.frld_ns);
+
+ break;
+ }
+
+ if (activated_ns < ns->zns.frld_ns + ns->zns.frl_ns) {
+ NVME_ZA_SET_FZR(zone->zd.za, 0x1);
+ nvme_zone_changed(n, ns, zone);
+
+ next_timer = MIN(next_timer, now + ns->zns.frl_ns);
+
+ continue;
+ }
+
+ nvme_zone_excursion(n, ns, zone, NULL);
+ }
+
+ QTAILQ_FOREACH(zone, &ns->zns.lru_finished, lru_entry) {
+ int64_t finished_ns = now - zone->stats.finished_ns;
+ if (finished_ns < ns->zns.rrld_ns) {
+ next_timer = MIN(next_timer, zone->stats.finished_ns +
+ ns->zns.rrld_ns);
+
+ break;
+ }
+
+ if (finished_ns < ns->zns.rrld_ns + ns->zns.rrl_ns) {
+ NVME_ZA_SET_RZR(zone->zd.za, 0x1);
+ nvme_zone_changed(n, ns, zone);
+
+ next_timer = MIN(next_timer, now + ns->zns.rrl_ns);
+
+ nvme_zone_changed(n, ns, zone);
+ continue;
+ }
+
+ NVME_ZA_SET_RZR(zone->zd.za, 0x0);
+ }
+
+ if (next_timer != INT64_MAX) {
+ timer_mod(ns->zns.timer, next_timer);
+ }
+}
+
static int nvme_ns_blk_resize(BlockBackend *blk, size_t len, Error **errp)
{
Error *local_err = NULL;
@@ -262,6 +348,21 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns)
id_ns->ncap = ns->zns.info.num_zones * ns->params.zns.zcap;
+ id_ns_zns->rrl = ns->params.zns.rrl;
+ id_ns_zns->frl = ns->params.zns.frl;
+
+ if (ns->params.zns.rrl || ns->params.zns.frl) {
+ ns->zns.rrl_ns = ns->params.zns.rrl * NANOSECONDS_PER_SECOND;
+ ns->zns.rrld_ns = ns->params.zns.rrld * NANOSECONDS_PER_SECOND;
+ ns->zns.frl_ns = ns->params.zns.frl * NANOSECONDS_PER_SECOND;
+ ns->zns.frld_ns = ns->params.zns.frld * NANOSECONDS_PER_SECOND;
+
+ ns->zns.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ nvme_ns_process_timer, ns);
+
+ QTAILQ_INIT(&ns->zns.lru_finished);
+ }
+
id_ns_zns->mar = cpu_to_le32(ns->params.zns.mar);
id_ns_zns->mor = cpu_to_le32(ns->params.zns.mor);
@@ -515,6 +616,10 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_UINT16("zns.ozcs", NvmeNamespace, params.zns.ozcs, 0),
DEFINE_PROP_UINT32("zns.mar", NvmeNamespace, params.zns.mar, 0xffffffff),
DEFINE_PROP_UINT32("zns.mor", NvmeNamespace, params.zns.mor, 0xffffffff),
+ DEFINE_PROP_UINT32("zns.rrl", NvmeNamespace, params.zns.rrl, 0),
+ DEFINE_PROP_UINT32("zns.frl", NvmeNamespace, params.zns.frl, 0),
+ DEFINE_PROP_UINT32("zns.rrld", NvmeNamespace, params.zns.rrld, 0),
+ DEFINE_PROP_UINT32("zns.frld", NvmeNamespace, params.zns.frld, 0),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 6acda5c2cf3f..f92045f19948 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -31,6 +31,10 @@ typedef struct NvmeNamespaceParams {
uint16_t ozcs;
uint32_t mar;
uint32_t mor;
+ uint32_t rrl;
+ uint32_t frl;
+ uint32_t rrld;
+ uint32_t frld;
} zns;
} NvmeNamespaceParams;
@@ -40,6 +44,11 @@ typedef struct NvmeZone {
uint64_t wp_staging;
+ struct {
+ int64_t activated_ns;
+ int64_t finished_ns;
+ } stats;
+
QTAILQ_ENTRY(NvmeZone) lru_entry;
} NvmeZone;
@@ -77,6 +86,10 @@ typedef struct NvmeNamespace {
} resources;
NvmeChangedZoneList changed_list;
+
+ QTAILQ_HEAD(, NvmeZone) lru_finished;
+ QEMUTimer *timer;
+ int64_t rrl_ns, rrld_ns, frl_ns, frld_ns;
} zns;
} NvmeNamespace;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6db6daa62bc5..f28373feb887 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -875,13 +875,13 @@ static void nvme_process_aers(void *opaque)
}
}
-static void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns,
- uint8_t event_type, uint8_t event_info,
- uint8_t log_page)
+void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns, uint8_t event_type,
+ uint8_t event_info, uint8_t log_page)
{
NvmeAsyncEvent *event;
- trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
+ trace_pci_nvme_enqueue_event(ns ? ns->params.nsid : -1, event_type,
+ event_info, log_page);
if (n->aer_queued == n->params.aer_max_queued) {
trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
@@ -1194,7 +1194,7 @@ static void nvme_update_zone_descr(NvmeNamespace *ns, NvmeRequest *req,
nvme_req_add_aio(req, aio);
}
-static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone)
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone)
{
uint16_t num_ids = le16_to_cpu(ns->zns.changed_list.num_ids);
@@ -1213,12 +1213,8 @@ static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone)
NVME_LOG_CHANGED_ZONE_LIST);
}
-static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
- NvmeZone *zone, NvmeZoneState to,
- NvmeRequest *req);
-
-static void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
- NvmeRequest *req)
+void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+ NvmeRequest *req)
{
trace_pci_nvme_zone_excursion(ns->params.nsid, nvme_zslba(zone),
nvme_zs_str(zone));
@@ -1226,6 +1222,7 @@ static void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
assert(nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSF, req) == NVME_SUCCESS);
NVME_ZA_SET_ZFC(zone->zd.za, 0x1);
+ NVME_ZA_SET_FZR(zone->zd.za, 0x0);
nvme_zone_changed(n, ns, zone);
@@ -1333,9 +1330,8 @@ out:
* The function does NOT change the Zone Attribute field; this must be done by
* the caller.
*/
-static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
- NvmeZone *zone, NvmeZoneState to,
- NvmeRequest *req)
+uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+ NvmeZoneState to, NvmeRequest *req)
{
NvmeZoneState from = nvme_zs(zone);
uint16_t status;
@@ -1366,7 +1362,7 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
- goto out;
+ goto activated;
case NVME_ZS_ZSIO:
case NVME_ZS_ZSEO:
@@ -1389,7 +1385,7 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
- goto out;
+ goto activated;
default:
return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR;
@@ -1512,8 +1508,28 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR;
}
+activated:
+ zone->stats.activated_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ if (ns->params.zns.frld && !timer_pending(ns->zns.timer)) {
+ int64_t next_timer = zone->stats.activated_ns + ns->zns.frld_ns;
+ timer_mod(ns->zns.timer, next_timer);
+ }
+
out:
nvme_zs_set(zone, to);
+
+ if (to == NVME_ZS_ZSF && ns->params.zns.rrld) {
+ QTAILQ_INSERT_TAIL(&ns->zns.lru_finished, zone, lru_entry);
+
+ zone->stats.finished_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ if (!timer_pending(ns->zns.timer)) {
+ int64_t next_timer = zone->stats.finished_ns + ns->zns.rrld_ns;
+ timer_mod(ns->zns.timer, next_timer);
+ }
+ }
+
return NVME_SUCCESS;
}
@@ -1979,6 +1995,7 @@ static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl *n, NvmeRequest *req,
case NVME_ZS_ZSRO:
assert(!nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSO, req));
+
nvme_update_zone_info(ns, req, zone);
return NVME_NO_COMPLETE;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 309fb1b94ecb..e51a38546080 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -318,5 +318,12 @@ static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
}
int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
+uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+ NvmeZoneState to, NvmeRequest *req);
+void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns, uint8_t event_type,
+ uint8_t event_info, uint8_t log_page);
+void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+ NvmeRequest *req);
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone);
#endif /* HW_NVME_H */
diff --git a/hw/block/trace-events b/hw/block/trace-events
index c4c80644f782..249487ae79fc 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -85,7 +85,7 @@ pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
-pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_enqueue_event(uint32_t nsid, uint8_t typ, uint8_t info, uint8_t log_page) "nsid 0x%"PRIx32" type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
@@ -105,6 +105,7 @@ pci_nvme_zone_zrm_release_active(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsi
pci_nvme_zone_zrm_excursion_not_allowed(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
pci_nvme_zone_changed(uint32_t nsid, uint64_t zslba) "nsid %"PRIu32" zslba 0x%"PRIx64""
pci_nvme_zone_excursion(uint32_t nsid, uint64_t zslba, const char *zc) "nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_ns_process_timer(uint32_t nsid) "nsid %"PRIu32""
pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
--
2.27.0
next prev parent reply other threads:[~2020-06-30 10:10 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-06-30 10:01 [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces Klaus Jensen
2020-06-30 10:01 ` [PATCH 01/10] hw/block/nvme: support I/O Command Sets Klaus Jensen
2020-06-30 10:01 ` [PATCH 02/10] hw/block/nvme: add zns specific fields and types Klaus Jensen
2020-06-30 10:01 ` [PATCH 03/10] hw/block/nvme: add basic read/write for zoned namespaces Klaus Jensen
2020-06-30 10:01 ` [PATCH 04/10] hw/block/nvme: add the zone management receive command Klaus Jensen
2020-06-30 10:01 ` [PATCH 05/10] hw/block/nvme: add the zone management send command Klaus Jensen
2020-06-30 10:01 ` [PATCH 06/10] hw/block/nvme: add the zone append command Klaus Jensen
2020-06-30 10:01 ` [PATCH 07/10] hw/block/nvme: track and enforce zone resources Klaus Jensen
2020-06-30 10:01 ` [PATCH 08/10] hw/block/nvme: allow open to close transitions by controller Klaus Jensen
2020-06-30 10:01 ` [PATCH 09/10] hw/block/nvme: allow zone excursions Klaus Jensen
2020-06-30 10:01 ` Klaus Jensen [this message]
2020-06-30 12:59 ` [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces Niklas Cassel
2020-06-30 14:09 ` Philippe Mathieu-Daudé
2020-06-30 15:42 ` Keith Busch
2020-06-30 20:36 ` Klaus Jensen
2020-07-01 10:34 ` nvme emulation merge process (was: Re: [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces) Kevin Wolf
2020-07-01 13:18 ` Klaus Jensen
2020-07-01 13:29 ` Maxim Levitsky
2020-07-01 13:57 ` Philippe Mathieu-Daudé
2020-07-01 14:21 ` Keith Busch
2020-07-02 20:29 ` nvme emulation merge process Andrzej Jakowski
2020-07-02 21:13 ` Keith Busch
2020-06-30 20:29 ` [PATCH 00/10] hw/block/nvme: namespace types and zoned namespaces Klaus Jensen
2020-07-01 1:10 ` Dmitry Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200630100139.1483002-11-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=damien.lemoal@wdc.com \
--cc=dmitry.fomichev@wdc.com \
--cc=javier.gonz@samsung.com \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=matias.bjorling@wdc.com \
--cc=mlevitsk@redhat.com \
--cc=mreitz@redhat.com \
--cc=niklas.cassel@wdc.com \
--cc=philmd@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).