From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org
Cc: Fam Zheng <fam@euphon.net>, Kevin Wolf <kwolf@redhat.com>,
qemu-block@nongnu.org, Klaus Jensen <k.jensen@samsung.com>,
Max Reitz <mreitz@redhat.com>, Keith Busch <kbusch@kernel.org>,
Stefan Hajnoczi <stefanha@redhat.com>,
Klaus Jensen <its@irrelevant.dk>
Subject: [PATCH v3 9/9] hw/block/nvme: allow open to close zone transitions by controller
Date: Wed, 14 Oct 2020 13:31:22 +0200 [thread overview]
Message-ID: <20201014113122.388849-10-its@irrelevant.dk> (raw)
In-Reply-To: <20201014113122.388849-1-its@irrelevant.dk>
From: Klaus Jensen <k.jensen@samsung.com>
Allow the controller to release open resources by transitioning
implicitly and explicitly opened zones to closed. This is done using a
naive "least recently opened" strategy.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/block/nvme-ns.h | 5 ++++
hw/block/nvme-ns.c | 5 ++++
hw/block/nvme.c | 57 ++++++++++++++++++++++++++++++++++++++++---
hw/block/trace-events | 1 +
4 files changed, 65 insertions(+), 3 deletions(-)
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 3d0269eef6f0..5d8523c047d8 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -38,6 +38,8 @@ typedef struct NvmeZone {
uint8_t *zde;
uint64_t wp_staging;
+
+ QTAILQ_ENTRY(NvmeZone) lru_entry;
} NvmeZone;
typedef struct NvmeNamespace {
@@ -64,6 +66,9 @@ typedef struct NvmeNamespace {
struct {
uint32_t open;
uint32_t active;
+
+ QTAILQ_HEAD(, NvmeZone) lru_open;
+ QTAILQ_HEAD(, NvmeZone) lru_active;
} resources;
} zns;
} NvmeNamespace;
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index a01cc5eeb445..cb8b44a78450 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -135,6 +135,9 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
ns->zns.resources.open = ns->params.zns.mor != 0xffffffff ?
ns->params.zns.mor + 1 : ns->zns.num_zones;
+ QTAILQ_INIT(&ns->zns.resources.lru_open);
+ QTAILQ_INIT(&ns->zns.resources.lru_active);
+
for (int i = 0; i < ns->zns.num_zones; i++) {
NvmeZone *zone = &ns->zns.zones[i];
zone->zd = &ns->zns.zd[i];
@@ -158,6 +161,8 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
if (ns->zns.resources.active) {
ns->zns.resources.active--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone,
+ lru_entry);
break;
}
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index cc637b3a68e9..1fab9d69261c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1105,11 +1105,47 @@ static inline void nvme_zone_reset_wp(NvmeZone *zone)
zone->wp_staging = nvme_zslba(zone);
}
+static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
+ NvmeZoneState to);
+
+static uint16_t nvme_zrm_release_open(NvmeNamespace *ns)
+{
+ NvmeZone *candidate;
+ NvmeZoneState zs;
+ uint16_t status;
+
+ trace_pci_nvme_zrm_release_open(ns->params.nsid);
+
+ QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_open, lru_entry) {
+ zs = nvme_zs(candidate);
+
+ /* skip explicitly opened zones */
+ if (zs == NVME_ZS_ZSEO) {
+ continue;
+ }
+
+ /* skip zones that have in-flight writes */
+ if (candidate->wp_staging != nvme_wp(candidate)) {
+ continue;
+ }
+
+ status = nvme_zrm_transition(ns, candidate, NVME_ZS_ZSC);
+ if (status) {
+ return status;
+ }
+
+ return NVME_SUCCESS;
+ }
+
+ return NVME_TOO_MANY_OPEN_ZONES;
+}
+
static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState to)
{
NvmeZoneState from = nvme_zs(zone);
NvmeZoneDescriptor *zd = zone->zd;
+ uint16_t status;
trace_pci_nvme_zrm_transition(ns->params.nsid, nvme_zslba(zone), from, to);
@@ -1131,6 +1167,7 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
}
ns->zns.resources.active--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
break;
@@ -1141,11 +1178,15 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
}
if (!ns->zns.resources.open) {
- return NVME_TOO_MANY_OPEN_ZONES;
+ status = nvme_zrm_release_open(ns);
+ if (status) {
+ return status;
+ }
}
ns->zns.resources.active--;
ns->zns.resources.open--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
break;
@@ -1172,11 +1213,15 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
ns->zns.resources.active++;
+ ns->zns.resources.open++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry);
- /* fallthrough */
+ break;
case NVME_ZS_ZSC:
ns->zns.resources.open++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry);
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
break;
@@ -1201,16 +1246,22 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
ns->zns.resources.active++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry);
break;
case NVME_ZS_ZSIO:
case NVME_ZS_ZSEO:
if (!ns->zns.resources.open) {
- return NVME_TOO_MANY_OPEN_ZONES;
+ status = nvme_zrm_release_open(ns);
+ if (status) {
+ return status;
+ }
}
ns->zns.resources.open--;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry);
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
break;
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 2363412a9375..0064fedf31ae 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -83,6 +83,7 @@ pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
pci_nvme_zrm_transition(uint32_t nsid, uint64_t zslba, uint8_t from, uint8_t to) "nsid %"PRIu32" zslba 0x%"PRIx64" from 0x%"PRIx8" to 0x%"PRIx8""
+pci_nvme_zrm_release_open(uint32_t nsid) "nsid %"PRIu32""
pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64""
pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
--
2.28.0
prev parent reply other threads:[~2020-10-14 11:37 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-14 11:31 [PATCH v3 0/9] hw/block/nvme: zoned namespace command set Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 1/9] hw/block/nvme: add commands supported and effects log page Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 2/9] hw/block/nvme: add uuid namespace parameter Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 3/9] hw/block/nvme: support namespace types Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 4/9] hw/block/nvme: add basic read/write for zoned namespaces Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 5/9] hw/block/nvme: add the zone management receive command Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 6/9] hw/block/nvme: add the zone management send command Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 7/9] hw/block/nvme: add the zone append command Klaus Jensen
2020-10-14 11:31 ` [PATCH v3 8/9] hw/block/nvme: track and enforce zone resources Klaus Jensen
2020-10-14 11:31 ` Klaus Jensen [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201014113122.388849-10-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=fam@euphon.net \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.