qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Klaus Jensen <its@irrelevant.dk>
To: Hannes Reinecke <hare@suse.de>
Cc: qemu-block@nongnu.org, "Klaus Jensen" <k.jensen@samsung.com>,
	qemu-devel@nongnu.org, "Markus Armbruster" <armbru@redhat.com>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Keith Busch" <kbusch@kernel.org>,
	"Philippe Mathieu-Daudé" <philmd@redhat.com>
Subject: Re: [PATCH 4/4] hw/nvme: fix controller hot unplugging
Date: Wed, 7 Jul 2021 11:53:45 +0200	[thread overview]
Message-ID: <YOV5qZj8pcnH1aAE@apples.localdomain> (raw)
In-Reply-To: <f468bbdc-fa33-a7e6-a392-394a736a06be@suse.de>

[-- Attachment #1: Type: text/plain, Size: 7801 bytes --]

On Jul  7 09:49, Hannes Reinecke wrote:
>On 7/6/21 11:33 AM, Klaus Jensen wrote:
>>From: Klaus Jensen <k.jensen@samsung.com>
>>
>>Prior to this patch the nvme-ns devices are always children of the
>>NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
>>unrealized when the parent device is removed. However, when subsystems
>>are involved, this is not what we want since the namespaces may be
>>attached to other controllers as well.
>>
>>This patch adds an additional NvmeBus on the subsystem device. When
>>nvme-ns devices are realized, if the parent controller device is linked
>>to a subsystem, the parent bus is set to the subsystem one instead. This
>>makes sure that namespaces are kept alive and not unrealized.
>>
>>Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>>---
>>  hw/nvme/nvme.h   | 18 ++++++++++--------
>>  hw/nvme/ctrl.c   |  8 +++++---
>>  hw/nvme/ns.c     | 32 +++++++++++++++++++++++++-------
>>  hw/nvme/subsys.c |  4 ++++
>>  4 files changed, 44 insertions(+), 18 deletions(-)
>>
>>diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>>index c4065467d877..9401e212f9f7 100644
>>--- a/hw/nvme/nvme.h
>>+++ b/hw/nvme/nvme.h
>>@@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
>>  typedef struct NvmeCtrl NvmeCtrl;
>>  typedef struct NvmeNamespace NvmeNamespace;
>>+#define TYPE_NVME_BUS "nvme-bus"
>>+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
>>+
>>+typedef struct NvmeBus {
>>+    BusState parent_bus;
>>+    bool     is_subsys;
>>+} NvmeBus;
>>+
>>  #define TYPE_NVME_SUBSYS "nvme-subsys"
>>  #define NVME_SUBSYS(obj) \
>>      OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
>>  typedef struct NvmeSubsystem {
>>      DeviceState parent_obj;
>>+    NvmeBus     bus;
>>      uint8_t     subnqn[256];
>>      NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
>>@@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
>>      QTAILQ_HEAD(, NvmeRequest) req_list;
>>  } NvmeCQueue;
>>-#define TYPE_NVME_BUS "nvme-bus"
>>-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
>>-
>>-typedef struct NvmeBus {
>>-    BusState parent_bus;
>>-} NvmeBus;
>>-
>>  #define TYPE_NVME "nvme"
>>  #define NVME(obj) \
>>          OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
>>@@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
>>  static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
>>  {
>>-    if (!nsid || nsid > NVME_MAX_NAMESPACES) {
>>+    if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
>>          return NULL;
>>      }
>>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>>index 90e3ee2b70ee..7c8fca36d9a5 100644
>>--- a/hw/nvme/ctrl.c
>>+++ b/hw/nvme/ctrl.c
>>@@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
>>      for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>          ns = nvme_ns(n, i);
>>-        if (!ns) {
>>-            continue;
>>+        if (ns) {
>>+            ns->attached--;
>>          }
>>+    }
>>-        nvme_ns_cleanup(ns);
>>+    if (n->subsys) {
>>+        nvme_subsys_unregister_ctrl(n->subsys, n);
>>      }
>>      if (n->subsys) {
>>diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
>>index 3c4f5b8c714a..612a2786d75d 100644
>>--- a/hw/nvme/ns.c
>>+++ b/hw/nvme/ns.c
>>@@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
>>  static void nvme_ns_realize(DeviceState *dev, Error **errp)
>>  {
>>      NvmeNamespace *ns = NVME_NS(dev);
>>-    BusState *s = qdev_get_parent_bus(dev);
>>-    NvmeCtrl *n = NVME(s->parent);
>>-    NvmeSubsystem *subsys = n->subsys;
>>+    BusState *qbus = qdev_get_parent_bus(dev);
>>+    NvmeBus *bus = NVME_BUS(qbus);
>>+    NvmeCtrl *n = NULL;
>>+    NvmeSubsystem *subsys = NULL;
>>      uint32_t nsid = ns->params.nsid;
>>      int i;
>>-    if (!n->subsys) {
>>+    if (bus->is_subsys) {
>>+        subsys = NVME_SUBSYS(qbus->parent);
>>+    } else {
>>+        n = NVME(qbus->parent);
>>+        subsys = n->subsys;
>>+    }
>>+
>>+    if (subsys) {
>>+        /*
>>+         * If this namespace belongs to a subsystem (through a link on the
>>+         * controller device), reparent the device.
>>+         */
>>+        if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
>>+            return;
>>+        }
>>+    } else {
>>          if (ns->params.detached) {
>>              error_setg(errp, "detached requires that the nvme device is "
>>                         "linked to an nvme-subsys device");
>>@@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>>      if (!nsid) {
>>          for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>-            if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
>>+            if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
>>                  continue;
>>              }
>>@@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>>              return;
>>          }
>>      } else {
>>-        if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
>>+        if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
>>              error_setg(errp, "namespace id '%d' already allocated", nsid);
>>              return;
>>          }
>>@@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>>          }
>>      }
>>-    nvme_attach_ns(n, ns);
>>+    if (n) {
>>+        nvme_attach_ns(n, ns);
>>+    }
>>  }
>>  static Property nvme_ns_props[] = {
>>diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
>>index 92caa604a280..fb7c3a7c55fc 100644
>>--- a/hw/nvme/subsys.c
>>+++ b/hw/nvme/subsys.c
>>@@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
>>  {
>>      NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>>+    qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev,
>>+                        dev->id);
>>+    subsys->bus.is_subsys = true;
>>+
>>      nvme_subsys_setup(subsys);
>>  }
>>
>Why not always create a subsystem, and distinguish between 'shared' 
>and 'non-shared' subsystems instead of the ->subsys pointer?
>That way all namespaces can be children of the subsystem, we won't 
>need any reparenting, and the whole thing will be more in-line with 
>qdev, no?
>

Hi Hannes,

Thanks for your reviews and comments!

So, I have actually considered what you suggest.

The problem is that the nvme-ns device currently expects to plug into a 
bus (an 'nvme-bus') and we cannot really get rid of that 'bus' parameter 
without breaking compatibility. I experimented with removing the bus 
from the nvme device and instead creating it in the nvme-subsys device. 
My idea here was to implicitly always create an nvme-subsys if not 
explicitly created by the user, but since nvme-subsys does not plug into 
any bus itself, it is not exposed in the qtree and thus not reachable 
(i.e., when realizing the nvme-ns device, it will complain that there 
are no 'nvme-bus' available to plug into). To make this work, I believe 
we'd have to have the nvme-subsys plug into the main system bus (or some 
other bus rooted at main-system-bus), and I'd prefer not to do that 
since this is already a flawed design and I think it would be more 
intrusive than what my patch does.

I raised these issues on the mailinglist some time ago[1]. We didn't 
really find a good solution, but I think the conclusion is that the 
bus/device design of subsystems and namespaces is flawed. That's why I 
am working on an "objectification" of the two devices as suggested by 
Stefan[2] as a proper fix for this mess.


   [1]: https://lore.kernel.org/qemu-devel/YJrKRsF4%2F38QheKn@apples.localdomain/T/#u
   [2]: https://lore.kernel.org/qemu-devel/YKI9zh2AqX35S8wd@stefanha-x1.localdomain/

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2021-07-07  9:57 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-06  9:33 [PATCH 0/4] hw/nvme: fix controller hotplugging Klaus Jensen
2021-07-06  9:33 ` [PATCH 1/4] hw/nvme: remove NvmeCtrl parameter from ns setup/check functions Klaus Jensen
2021-07-07  7:45   ` Hannes Reinecke
2021-07-06  9:33 ` [PATCH 2/4] hw/nvme: mark nvme-subsys non-hotpluggable Klaus Jensen
2021-07-07  7:45   ` Hannes Reinecke
2021-07-06  9:33 ` [PATCH 3/4] hw/nvme: unregister controller with subsystem at exit Klaus Jensen
2021-07-07  7:46   ` Hannes Reinecke
2021-07-06  9:33 ` [PATCH 4/4] hw/nvme: fix controller hot unplugging Klaus Jensen
2021-07-07  7:49   ` Hannes Reinecke
2021-07-07  9:53     ` Klaus Jensen [this message]
2021-07-07 10:43       ` Hannes Reinecke
2021-07-07 15:41         ` Klaus Jensen
2021-07-07 16:14         ` Stefan Hajnoczi
2021-07-07 16:47           ` Klaus Jensen
2021-07-08  6:15           ` Hannes Reinecke
2021-07-07 10:02   ` Klaus Jensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YOV5qZj8pcnH1aAE@apples.localdomain \
    --to=its@irrelevant.dk \
    --cc=armbru@redhat.com \
    --cc=hare@suse.de \
    --cc=k.jensen@samsung.com \
    --cc=kbusch@kernel.org \
    --cc=philmd@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).