From: Klaus Jensen <its@irrelevant.dk>
To: Hannes Reinecke <hare@suse.de>
Cc: qemu-block@nongnu.org, "Klaus Jensen" <k.jensen@samsung.com>,
qemu-devel@nongnu.org, "Markus Armbruster" <armbru@redhat.com>,
"Stefan Hajnoczi" <stefanha@redhat.com>,
"Keith Busch" <kbusch@kernel.org>,
"Philippe Mathieu-Daudé" <philmd@redhat.com>
Subject: Re: [PATCH 4/4] hw/nvme: fix controller hot unplugging
Date: Wed, 7 Jul 2021 11:53:45 +0200 [thread overview]
Message-ID: <YOV5qZj8pcnH1aAE@apples.localdomain> (raw)
In-Reply-To: <f468bbdc-fa33-a7e6-a392-394a736a06be@suse.de>
[-- Attachment #1: Type: text/plain, Size: 7801 bytes --]
On Jul 7 09:49, Hannes Reinecke wrote:
>On 7/6/21 11:33 AM, Klaus Jensen wrote:
>>From: Klaus Jensen <k.jensen@samsung.com>
>>
>>Prior to this patch the nvme-ns devices are always children of the
>>NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
>>unrealized when the parent device is removed. However, when subsystems
>>are involved, this is not what we want since the namespaces may be
>>attached to other controllers as well.
>>
>>This patch adds an additional NvmeBus on the subsystem device. When
>>nvme-ns devices are realized, if the parent controller device is linked
>>to a subsystem, the parent bus is set to the subsystem one instead. This
>>makes sure that namespaces are kept alive and not unrealized.
>>
>>Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>>---
>> hw/nvme/nvme.h | 18 ++++++++++--------
>> hw/nvme/ctrl.c | 8 +++++---
>> hw/nvme/ns.c | 32 +++++++++++++++++++++++++-------
>> hw/nvme/subsys.c | 4 ++++
>> 4 files changed, 44 insertions(+), 18 deletions(-)
>>
>>diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>>index c4065467d877..9401e212f9f7 100644
>>--- a/hw/nvme/nvme.h
>>+++ b/hw/nvme/nvme.h
>>@@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
>> typedef struct NvmeCtrl NvmeCtrl;
>> typedef struct NvmeNamespace NvmeNamespace;
>>+#define TYPE_NVME_BUS "nvme-bus"
>>+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
>>+
>>+typedef struct NvmeBus {
>>+ BusState parent_bus;
>>+ bool is_subsys;
>>+} NvmeBus;
>>+
>> #define TYPE_NVME_SUBSYS "nvme-subsys"
>> #define NVME_SUBSYS(obj) \
>> OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
>> typedef struct NvmeSubsystem {
>> DeviceState parent_obj;
>>+ NvmeBus bus;
>> uint8_t subnqn[256];
>> NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
>>@@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
>> QTAILQ_HEAD(, NvmeRequest) req_list;
>> } NvmeCQueue;
>>-#define TYPE_NVME_BUS "nvme-bus"
>>-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
>>-
>>-typedef struct NvmeBus {
>>- BusState parent_bus;
>>-} NvmeBus;
>>-
>> #define TYPE_NVME "nvme"
>> #define NVME(obj) \
>> OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
>>@@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
>> static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
>> {
>>- if (!nsid || nsid > NVME_MAX_NAMESPACES) {
>>+ if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
>> return NULL;
>> }
>>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>>index 90e3ee2b70ee..7c8fca36d9a5 100644
>>--- a/hw/nvme/ctrl.c
>>+++ b/hw/nvme/ctrl.c
>>@@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>> ns = nvme_ns(n, i);
>>- if (!ns) {
>>- continue;
>>+ if (ns) {
>>+ ns->attached--;
>> }
>>+ }
>>- nvme_ns_cleanup(ns);
>>+ if (n->subsys) {
>>+ nvme_subsys_unregister_ctrl(n->subsys, n);
>> }
>> if (n->subsys) {
>>diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
>>index 3c4f5b8c714a..612a2786d75d 100644
>>--- a/hw/nvme/ns.c
>>+++ b/hw/nvme/ns.c
>>@@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
>> static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> {
>> NvmeNamespace *ns = NVME_NS(dev);
>>- BusState *s = qdev_get_parent_bus(dev);
>>- NvmeCtrl *n = NVME(s->parent);
>>- NvmeSubsystem *subsys = n->subsys;
>>+ BusState *qbus = qdev_get_parent_bus(dev);
>>+ NvmeBus *bus = NVME_BUS(qbus);
>>+ NvmeCtrl *n = NULL;
>>+ NvmeSubsystem *subsys = NULL;
>> uint32_t nsid = ns->params.nsid;
>> int i;
>>- if (!n->subsys) {
>>+ if (bus->is_subsys) {
>>+ subsys = NVME_SUBSYS(qbus->parent);
>>+ } else {
>>+ n = NVME(qbus->parent);
>>+ subsys = n->subsys;
>>+ }
>>+
>>+ if (subsys) {
>>+ /*
>>+ * If this namespace belongs to a subsystem (through a link on the
>>+ * controller device), reparent the device.
>>+ */
>>+ if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
>>+ return;
>>+ }
>>+ } else {
>> if (ns->params.detached) {
>> error_setg(errp, "detached requires that the nvme device is "
>> "linked to an nvme-subsys device");
>>@@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> if (!nsid) {
>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>- if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
>>+ if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
>> continue;
>> }
>>@@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> return;
>> }
>> } else {
>>- if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
>>+ if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
>> error_setg(errp, "namespace id '%d' already allocated", nsid);
>> return;
>> }
>>@@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> }
>> }
>>- nvme_attach_ns(n, ns);
>>+ if (n) {
>>+ nvme_attach_ns(n, ns);
>>+ }
>> }
>> static Property nvme_ns_props[] = {
>>diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
>>index 92caa604a280..fb7c3a7c55fc 100644
>>--- a/hw/nvme/subsys.c
>>+++ b/hw/nvme/subsys.c
>>@@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
>> {
>> NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>>+ qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev,
>>+ dev->id);
>>+ subsys->bus.is_subsys = true;
>>+
>> nvme_subsys_setup(subsys);
>> }
>>
>Why not always create a subsystem, and distinguish between 'shared'
>and 'non-shared' subsystems instead of the ->subsys pointer?
>That way all namespaces can be children of the subsystem, we won't
>need any reparenting, and the whole thing will be more in-line with
>qdev, no?
>
Hi Hannes,
Thanks for your reviews and comments!
So, I have actually considered what you suggest.
The problem is that the nvme-ns device currently expects to plug into a
bus (an 'nvme-bus') and we cannot really get rid of that 'bus' parameter
without breaking compatibility. I experimented with removing the bus
from the nvme device and instead creating it in the nvme-subsys device.
My idea here was to implicitly always create an nvme-subsys if not
explicitly created by the user, but since nvme-subsys does not plug into
any bus itself, it is not exposed in the qtree and thus not reachable
(i.e., when realizing the nvme-ns device, it will complain that there
are no 'nvme-bus' available to plug into). To make this work, I believe
we'd have to have the nvme-subsys plug into the main system bus (or some
other bus rooted at main-system-bus), and I'd prefer not to do that
since this is already a flawed design and I think it would be more
intrusive than what my patch does.
I raised these issues on the mailinglist some time ago[1]. We didn't
really find a good solution, but I think the conclusion is that the
bus/device design of subsystems and namespaces is flawed. That's why I
am working on an "objectification" of the two devices as suggested by
Stefan[2] as a proper fix for this mess.
[1]: https://lore.kernel.org/qemu-devel/YJrKRsF4%2F38QheKn@apples.localdomain/T/#u
[2]: https://lore.kernel.org/qemu-devel/YKI9zh2AqX35S8wd@stefanha-x1.localdomain/
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
next prev parent reply other threads:[~2021-07-07 9:57 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-06 9:33 [PATCH 0/4] hw/nvme: fix controller hotplugging Klaus Jensen
2021-07-06 9:33 ` [PATCH 1/4] hw/nvme: remove NvmeCtrl parameter from ns setup/check functions Klaus Jensen
2021-07-07 7:45 ` Hannes Reinecke
2021-07-06 9:33 ` [PATCH 2/4] hw/nvme: mark nvme-subsys non-hotpluggable Klaus Jensen
2021-07-07 7:45 ` Hannes Reinecke
2021-07-06 9:33 ` [PATCH 3/4] hw/nvme: unregister controller with subsystem at exit Klaus Jensen
2021-07-07 7:46 ` Hannes Reinecke
2021-07-06 9:33 ` [PATCH 4/4] hw/nvme: fix controller hot unplugging Klaus Jensen
2021-07-07 7:49 ` Hannes Reinecke
2021-07-07 9:53 ` Klaus Jensen [this message]
2021-07-07 10:43 ` Hannes Reinecke
2021-07-07 15:41 ` Klaus Jensen
2021-07-07 16:14 ` Stefan Hajnoczi
2021-07-07 16:47 ` Klaus Jensen
2021-07-08 6:15 ` Hannes Reinecke
2021-07-07 10:02 ` Klaus Jensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YOV5qZj8pcnH1aAE@apples.localdomain \
--to=its@irrelevant.dk \
--cc=armbru@redhat.com \
--cc=hare@suse.de \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=philmd@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).