Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2 0/5] nvme: fix initialization memleak
@ 2024-06-04 18:59 Keith Busch
  2024-06-04 18:59 ` [PATCHv2 1/5] nvme: apple: fix device reference counting Keith Busch
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Keith Busch @ 2024-06-04 18:59 UTC (permalink / raw)
  To: linux-nvme
  Cc: hch, sagi, mlombard, hare, shinichiro.kawasaki, james.smart,
	Keith Busch

From: Keith Busch <kbusch@kernel.org>

I ran more of the blktests on loop in addtion to pci, also did more
error injection at various points around the early initialization to
make sure the kmemleak really is gone.

Changes since v1:

  Use ERR_CAST() as appropriate (Christoph, Hannes)

  Fix apple cleanup error (me)

Keith Busch (5):
  nvme: apple: fix device reference counting
  nvme: tcp: split controller bringup handling
  nvme: rdma: split controller bringup handling
  nvme: fc: split controller bringup handling
  nvme: split device add from initialzation

 drivers/nvme/host/apple.c  | 32 +++++++++++++++++----
 drivers/nvme/host/core.c   | 58 +++++++++++++++++++++++---------------
 drivers/nvme/host/fc.c     | 48 ++++++++++++++++++++-----------
 drivers/nvme/host/nvme.h   |  1 +
 drivers/nvme/host/pci.c    |  5 ++++
 drivers/nvme/host/rdma.c   | 33 +++++++++++++++++-----
 drivers/nvme/host/tcp.c    | 30 ++++++++++++++++----
 drivers/nvme/target/loop.c |  5 ++++
 8 files changed, 155 insertions(+), 57 deletions(-)

-- 
2.43.0



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv2 1/5] nvme: apple: fix device reference counting
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
@ 2024-06-04 18:59 ` Keith Busch
  2024-06-04 18:59 ` [PATCHv2 2/5] nvme: tcp: split controller bringup handling Keith Busch
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2024-06-04 18:59 UTC (permalink / raw)
  To: linux-nvme
  Cc: hch, sagi, mlombard, hare, shinichiro.kawasaki, james.smart,
	Keith Busch

From: Keith Busch <kbusch@kernel.org>

Drivers must call nvme_uninit_ctrl after a successful nvme_init_ctrl.
Split the allocation side out to make the error handling boundary easier
to navigate. The apple driver had been doing this wrong, leaking the
controller device memory on a tagset failure.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/apple.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index dd6ec0865141a..d3384aecc0d39 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1388,7 +1388,7 @@ static void devm_apple_nvme_mempool_destroy(void *data)
 	mempool_destroy(data);
 }
 
-static int apple_nvme_probe(struct platform_device *pdev)
+static struct apple_nvme *apple_nvme_alloc(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct apple_nvme *anv;
@@ -1396,7 +1396,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
 
 	anv = devm_kzalloc(dev, sizeof(*anv), GFP_KERNEL);
 	if (!anv)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	anv->dev = get_device(dev);
 	anv->adminq.is_adminq = true;
@@ -1516,10 +1516,26 @@ static int apple_nvme_probe(struct platform_device *pdev)
 		goto put_dev;
 	}
 
+	return anv;
+put_dev:
+	put_device(anv->dev);
+	return ERR_PTR(ret);
+}
+
+static int apple_nvme_probe(struct platform_device *pdev)
+{
+	struct apple_nvme *anv;
+	int ret;
+
+	anv = apple_nvme_alloc(pdev);
+	if (IS_ERR(anv))
+		return PTR_ERR(anv);
+
 	anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
 	if (IS_ERR(anv->ctrl.admin_q)) {
 		ret = -ENOMEM;
-		goto put_dev;
+		anv->ctrl.admin_q = NULL;
+		goto out_uninit_ctrl;
 	}
 
 	nvme_reset_ctrl(&anv->ctrl);
@@ -1527,8 +1543,9 @@ static int apple_nvme_probe(struct platform_device *pdev)
 
 	return 0;
 
-put_dev:
-	put_device(anv->dev);
+out_uninit_ctrl:
+	nvme_uninit_ctrl(&dev->ctrl);
+	nvme_put_ctrl(&dev->ctrl);
 	return ret;
 }
 
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv2 2/5] nvme: tcp: split controller bringup handling
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
  2024-06-04 18:59 ` [PATCHv2 1/5] nvme: apple: fix device reference counting Keith Busch
@ 2024-06-04 18:59 ` Keith Busch
  2024-06-04 18:59 ` [PATCHv2 3/5] nvme: rdma: " Keith Busch
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2024-06-04 18:59 UTC (permalink / raw)
  To: linux-nvme
  Cc: hch, sagi, mlombard, hare, shinichiro.kawasaki, james.smart,
	Keith Busch

From: Keith Busch <kbusch@kernel.org>

Drivers must call nvme_uninit_ctrl after a successful nvme_init_ctrl.
Split the allocation side out to make the error handling boundary easier
to navigate. The nvme tcp driver's error handling had different returns
in the error goto label's, which harm readability.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/tcp.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8b5e4327fe83b..5ee3bbc67f411 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2686,7 +2686,7 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
 	return found;
 }
 
-static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
+static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_tcp_ctrl *ctrl;
@@ -2761,6 +2761,24 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 	if (ret)
 		goto out_kfree_queues;
 
+	return ctrl;
+out_kfree_queues:
+	kfree(ctrl->queues);
+out_free_ctrl:
+	kfree(ctrl);
+	return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
+		struct nvmf_ctrl_options *opts)
+{
+	struct nvme_tcp_ctrl *ctrl;
+	int ret;
+
+	ctrl = nvme_tcp_alloc_ctrl(dev, opts);
+	if (IS_ERR(ctrl))
+		return ERR_CAST(ctrl);
+
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
 		WARN_ON_ONCE(1);
 		ret = -EINTR;
@@ -2786,11 +2804,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 	if (ret > 0)
 		ret = -EIO;
 	return ERR_PTR(ret);
-out_kfree_queues:
-	kfree(ctrl->queues);
-out_free_ctrl:
-	kfree(ctrl);
-	return ERR_PTR(ret);
 }
 
 static struct nvmf_transport_ops nvme_tcp_transport = {
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv2 3/5] nvme: rdma: split controller bringup handling
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
  2024-06-04 18:59 ` [PATCHv2 1/5] nvme: apple: fix device reference counting Keith Busch
  2024-06-04 18:59 ` [PATCHv2 2/5] nvme: tcp: split controller bringup handling Keith Busch
@ 2024-06-04 18:59 ` Keith Busch
  2024-06-04 18:59 ` [PATCHv2 4/5] nvme: fc: " Keith Busch
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2024-06-04 18:59 UTC (permalink / raw)
  To: linux-nvme
  Cc: hch, sagi, mlombard, hare, shinichiro.kawasaki, james.smart,
	Keith Busch

From: Keith Busch <kbusch@kernel.org>

Drivers must call nvme_uninit_ctrl after a successful nvme_init_ctrl.
Split the allocation side out to make the error handling boundary easier
to navigate. The nvme rdma driver's error handling had different returns
in the error goto label's, which harm readability.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/rdma.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 51a62b0c645a7..94d4f3dbac6b6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2237,12 +2237,11 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
 	return found;
 }
 
-static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+static struct nvme_rdma_ctrl *nvme_rdma_alloc_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_rdma_ctrl *ctrl;
 	int ret;
-	bool changed;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl)
@@ -2304,6 +2303,26 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	if (ret)
 		goto out_kfree_queues;
 
+	return ctrl;
+
+out_kfree_queues:
+	kfree(ctrl->queues);
+out_free_ctrl:
+	kfree(ctrl);
+	return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+		struct nvmf_ctrl_options *opts)
+{
+	struct nvme_rdma_ctrl *ctrl;
+	bool changed;
+	int ret;
+
+	ctrl = nvme_rdma_alloc_ctrl(dev, opts);
+	if (IS_ERR(ctrl))
+		return ERR_CAST(ctrl);
+
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
 	WARN_ON_ONCE(!changed);
 
@@ -2326,11 +2345,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	if (ret > 0)
 		ret = -EIO;
 	return ERR_PTR(ret);
-out_kfree_queues:
-	kfree(ctrl->queues);
-out_free_ctrl:
-	kfree(ctrl);
-	return ERR_PTR(ret);
 }
 
 static struct nvmf_transport_ops nvme_rdma_transport = {
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv2 4/5] nvme: fc: split controller bringup handling
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
                   ` (2 preceding siblings ...)
  2024-06-04 18:59 ` [PATCHv2 3/5] nvme: rdma: " Keith Busch
@ 2024-06-04 18:59 ` Keith Busch
  2024-06-04 18:59 ` [PATCHv2 5/5] nvme: split device add from initialization Keith Busch
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2024-06-04 18:59 UTC (permalink / raw)
  To: linux-nvme
  Cc: hch, sagi, mlombard, hare, shinichiro.kawasaki, james.smart,
	Keith Busch

From: Keith Busch <kbusch@kernel.org>

Drivers must call nvme_uninit_ctrl after a successful nvme_init_ctrl.
Split the allocation side out to make the error handling boundary easier
to navigate. The nvme fc driver's error handling had different returns
in the error goto label's, which harm readability.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/fc.c | 43 ++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f0b0813327491..c52446013388f 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3444,12 +3444,11 @@ nvme_fc_existing_controller(struct nvme_fc_rport *rport,
 	return found;
 }
 
-static struct nvme_ctrl *
-nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+static struct nvme_fc_ctrl *
+nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
 {
 	struct nvme_fc_ctrl *ctrl;
-	unsigned long flags;
 	int ret, idx, ctrl_loss_tmo;
 
 	if (!(rport->remoteport.port_role &
@@ -3538,7 +3537,31 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	if (lport->dev)
 		ctrl->ctrl.numa_node = dev_to_node(lport->dev);
 
-	/* at this point, teardown path changes to ref counting on nvme ctrl */
+	return ctrl;
+
+out_free_queues:
+	kfree(ctrl->queues);
+out_free_ida:
+	put_device(ctrl->dev);
+	ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
+out_free_ctrl:
+	kfree(ctrl);
+out_fail:
+	/* exit via here doesn't follow ctlr ref points */
+	return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *
+nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+	struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
+{
+	struct nvme_fc_ctrl *ctrl;
+	unsigned long flags;
+	int ret;
+
+	ctrl = nvme_fc_alloc_ctrl(dev, opts, lport, rport);
+	if (IS_ERR(ctrl))
+		return ERR_CAST(ctrl);
 
 	ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
 			&nvme_fc_admin_mq_ops,
@@ -3597,20 +3620,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	nvme_fc_rport_get(rport);
 
 	return ERR_PTR(-EIO);
-
-out_free_queues:
-	kfree(ctrl->queues);
-out_free_ida:
-	put_device(ctrl->dev);
-	ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
-out_free_ctrl:
-	kfree(ctrl);
-out_fail:
-	/* exit via here doesn't follow ctlr ref points */
-	return ERR_PTR(ret);
 }
 
-
 struct nvmet_fc_traddr {
 	u64	nn;
 	u64	pn;
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv2 5/5] nvme: split device add from initialization
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
                   ` (3 preceding siblings ...)
  2024-06-04 18:59 ` [PATCHv2 4/5] nvme: fc: " Keith Busch
@ 2024-06-04 18:59 ` Keith Busch
  2024-06-06 12:13   ` Yi Zhang
  2024-06-05  1:09 ` [PATCHv2 0/5] nvme: fix initialization memleak Chaitanya Kulkarni
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 10+ messages in thread
From: Keith Busch @ 2024-06-04 18:59 UTC (permalink / raw)
  To: linux-nvme
  Cc: hch, sagi, mlombard, hare, shinichiro.kawasaki, james.smart,
	Keith Busch

From: Keith Busch <kbusch@kernel.org>

Combining both creates an ambiguous cleanup scenario for the caller if
an error is returned: does the device reference need to be dropped or
did the error occur before the device was initialized? If an error
occurs after the device is added, then the existing cleanup routines
will leak memory.

Furthermore, the nvme core is taking it upon itself to free the device's
kobj.name under certain conditions rather than go the the core device
API. We shouldn't be peaking into these implementation details.

Split the device initialization from the addition to make it easier to
know the error handling actions, fix the existing memory leaks, and stop
the device layering violations.

Link: https://lore.kernel.org/linux-nvme/c4050a37-ecc9-462c-9772-65e25166f439@grimberg.me/
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/apple.c  |  5 ++++
 drivers/nvme/host/core.c   | 58 +++++++++++++++++++++++---------------
 drivers/nvme/host/fc.c     |  5 ++++
 drivers/nvme/host/nvme.h   |  1 +
 drivers/nvme/host/pci.c    |  5 ++++
 drivers/nvme/host/rdma.c   |  5 ++++
 drivers/nvme/host/tcp.c    |  5 ++++
 drivers/nvme/target/loop.c |  5 ++++
 8 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index d3384aecc0d39..9760c97bc8f19 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1531,6 +1531,10 @@ static int apple_nvme_probe(struct platform_device *pdev)
 	if (IS_ERR(anv))
 		return PTR_ERR(anv);
 
+	result = nvme_add_ctrl(&anv->ctrl);
+	if (result)
+		goto out_put_ctrl;
+
 	anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
 	if (IS_ERR(anv->ctrl.admin_q)) {
 		ret = -ENOMEM;
@@ -1545,6 +1549,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
 
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&dev->ctrl);
+out_put_ctrl:
 	nvme_put_ctrl(&dev->ctrl);
 	return ret;
 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f5d150c62955d..070cc4d76b6a4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4611,6 +4611,9 @@ static void nvme_free_ctrl(struct device *dev)
  * Initialize a NVMe controller structures.  This needs to be called during
  * earliest initialization so that we have the initialized structured around
  * during probing.
+ *
+ * On success, the caller must use the nvme_put_ctrl() to release this when
+ * needed, which also invokes the ops->free_ctrl() callback.
  */
 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		const struct nvme_ctrl_ops *ops, unsigned long quirks)
@@ -4659,6 +4662,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		goto out;
 	ctrl->instance = ret;
 
+	ret = nvme_auth_init_ctrl(ctrl);
+	if (ret)
+		goto out_release_instance;
+
+	nvme_mpath_init_ctrl(ctrl);
+
 	device_initialize(&ctrl->ctrl_device);
 	ctrl->device = &ctrl->ctrl_device;
 	ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
@@ -4671,16 +4680,36 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		ctrl->device->groups = nvme_dev_attr_groups;
 	ctrl->device->release = nvme_free_ctrl;
 	dev_set_drvdata(ctrl->device, ctrl);
+
+	return ret;
+
+out_release_instance:
+	ida_free(&nvme_instance_ida, ctrl->instance);
+out:
+	if (ctrl->discard_page)
+		__free_page(ctrl->discard_page);
+	cleanup_srcu_struct(&ctrl->srcu);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+
+/*
+ * On success, returns with an elevated controller reference and caller must
+ * use nvme_uninit_ctrl() to properly free resources associated with the ctrl.
+ */
+int nvme_add_ctrl(struct nvme_ctrl *ctrl)
+{
+	int ret;
+
 	ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
 	if (ret)
-		goto out_release_instance;
+		return ret;
 
-	nvme_get_ctrl(ctrl);
 	cdev_init(&ctrl->cdev, &nvme_dev_fops);
-	ctrl->cdev.owner = ops->module;
+	ctrl->cdev.owner = ctrl->ops->module;
 	ret = cdev_device_add(&ctrl->cdev, ctrl->device);
 	if (ret)
-		goto out_free_name;
+		return ret;
 
 	/*
 	 * Initialize latency tolerance controls.  The sysfs files won't
@@ -4691,28 +4720,11 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		min(default_ps_max_latency_us, (unsigned long)S32_MAX));
 
 	nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
-	nvme_mpath_init_ctrl(ctrl);
-	ret = nvme_auth_init_ctrl(ctrl);
-	if (ret)
-		goto out_free_cdev;
+	nvme_get_ctrl(ctrl);
 
 	return 0;
-out_free_cdev:
-	nvme_fault_inject_fini(&ctrl->fault_inject);
-	dev_pm_qos_hide_latency_tolerance(ctrl->device);
-	cdev_device_del(&ctrl->cdev, ctrl->device);
-out_free_name:
-	nvme_put_ctrl(ctrl);
-	kfree_const(ctrl->device->kobj.name);
-out_release_instance:
-	ida_free(&nvme_instance_ida, ctrl->instance);
-out:
-	if (ctrl->discard_page)
-		__free_page(ctrl->discard_page);
-	cleanup_srcu_struct(&ctrl->srcu);
-	return ret;
 }
-EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+EXPORT_SYMBOL_GPL(nvme_add_ctrl);
 
 /* let I/O to all namespaces fail in preparation for surprise removal */
 void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index c52446013388f..d5a383766b34d 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3563,6 +3563,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	if (IS_ERR(ctrl))
 		return ERR_CAST(ctrl);
 
+	ret = nvme_add_ctrl(&ctrl->ctrl);
+	if (ret)
+		goto out_put_ctrl;
+
 	ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
 			&nvme_fc_admin_mq_ops,
 			struct_size_t(struct nvme_fcp_op_w_sgl, priv,
@@ -3607,6 +3611,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	/* initiate nvme ctrl ref counting teardown */
 	nvme_uninit_ctrl(&ctrl->ctrl);
 
+out_put_ctrl:
 	/* Remove core ctrl ref. */
 	nvme_put_ctrl(&ctrl->ctrl);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f3a41133ac3f9..9693490680866 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -792,6 +792,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		const struct nvme_ctrl_ops *ops, unsigned long quirks);
+int nvme_add_ctrl(struct nvme_ctrl *ctrl);
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
 void nvme_start_ctrl(struct nvme_ctrl *ctrl);
 void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 102a9fb0c65ff..c92125b0238d4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3015,6 +3015,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
 
+	result = nvme_add_ctrl(&dev->ctrl);
+	if (result)
+		goto out_put_ctrl;
+
 	result = nvme_dev_map(dev);
 	if (result)
 		goto out_uninit_ctrl;
@@ -3101,6 +3105,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	nvme_dev_unmap(dev);
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&dev->ctrl);
+out_put_ctrl:
 	nvme_put_ctrl(&dev->ctrl);
 	return result;
 }
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 94d4f3dbac6b6..5c44c7c5c688c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2323,6 +2323,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	if (IS_ERR(ctrl))
 		return ERR_CAST(ctrl);
 
+	ret = nvme_add_ctrl(&ctrl->ctrl);
+	if (ret)
+		goto out_put_ctrl;
+
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
 	WARN_ON_ONCE(!changed);
 
@@ -2341,6 +2345,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
 	nvme_put_ctrl(&ctrl->ctrl);
 	if (ret > 0)
 		ret = -EIO;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5ee3bbc67f411..3be67c98c906e 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2779,6 +2779,10 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 	if (IS_ERR(ctrl))
 		return ERR_CAST(ctrl);
 
+	ret = nvme_add_ctrl(&ctrl->ctrl);
+	if (ret)
+		goto out_put_ctrl;
+
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
 		WARN_ON_ONCE(1);
 		ret = -EINTR;
@@ -2800,6 +2804,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
 	nvme_put_ctrl(&ctrl->ctrl);
 	if (ret > 0)
 		ret = -EIO;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e589915ddef85..e32790d8fc260 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -555,6 +555,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 		goto out;
 	}
 
+	ret = nvme_add_ctrl(&ctrl->ctrl);
+	if (ret)
+		goto out_put_ctrl;
+
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
 		WARN_ON_ONCE(1);
 
@@ -611,6 +615,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	kfree(ctrl->queues);
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
 	nvme_put_ctrl(&ctrl->ctrl);
 out:
 	if (ret > 0)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 0/5] nvme: fix initialization memleak
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
                   ` (4 preceding siblings ...)
  2024-06-04 18:59 ` [PATCHv2 5/5] nvme: split device add from initialization Keith Busch
@ 2024-06-05  1:09 ` Chaitanya Kulkarni
  2024-06-05  7:26 ` Christoph Hellwig
  2024-06-06  7:33 ` Sagi Grimberg
  7 siblings, 0 replies; 10+ messages in thread
From: Chaitanya Kulkarni @ 2024-06-05  1:09 UTC (permalink / raw)
  To: Keith Busch, linux-nvme@lists.infradead.org
  Cc: hch@lst.de, sagi@grimberg.me, mlombard@redhat.com, hare@suse.de,
	shinichiro.kawasaki@wdc.com, james.smart@broadcom.com,
	Keith Busch

On 6/4/24 11:59, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> I ran more of the blktests on loop in addtion to pci, also did more
> error injection at various points around the early initialization to
> make sure the kmemleak really is gone.
>

blktests with nvme-loop and nvme-tcp are passing at my end..

for this series, looks good...

Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>

-ck

blktests (master) # ./check nvme
nvme/002 (tr=loop) (create many subsystems and test discovery) [passed]
     runtime    ...  31.884s
nvme/003 (tr=loop) (test if we're sending keep-alives to a discovery 
controller) [passed]
     runtime    ...  11.271s
nvme/004 (tr=loop) (test nvme and nvmet UUID NS descriptors) [passed]
     runtime    ...  0.753s
nvme/005 (tr=loop) (reset local loopback target) [passed]
     runtime    ...  1.294s
nvme/006 (tr=loop bd=device) (create an NVMeOF target) [passed]
     runtime    ...  0.093s
nvme/006 (tr=loop bd=file) (create an NVMeOF target) [passed]
     runtime    ...  0.069s
nvme/008 (tr=loop bd=device) (create an NVMeOF host) [passed]
     runtime    ...  0.752s
nvme/008 (tr=loop bd=file) (create an NVMeOF host) [passed]
     runtime    ...  0.738s
nvme/010 (tr=loop bd=device) (run data verification fio job) [passed]
     runtime    ...  57.317s
nvme/010 (tr=loop bd=file) (run data verification fio job) [passed]
     runtime    ...  95.979s
nvme/012 (tr=loop bd=device) (run mkfs and data verification fio) [passed]
     runtime    ...  59.469s
nvme/012 (tr=loop bd=file) (run mkfs and data verification fio) [passed]
     runtime    ...  100.458s
nvme/014 (tr=loop bd=device) (flush a command from host) [passed]
     runtime    ...  9.122s
nvme/014 (tr=loop bd=file) (flush a command from host) [passed]
     runtime    ...  7.001s
nvme/016 (tr=loop) (create/delete many NVMeOF block device-backed ns and 
test discovery) [passed]
     runtime    ...  20.169s
nvme/017 (tr=loop) (create/delete many file-ns and test discovery) [passed]
     runtime    ...  21.960s
nvme/018 (tr=loop) (unit test NVMe-oF out of range access on a file 
backend) [passed]
     runtime    ...  0.742s
nvme/019 (tr=loop bd=device) (test NVMe DSM Discard command) [passed]
     runtime    ...  0.754s
nvme/019 (tr=loop bd=file) (test NVMe DSM Discard command) [passed]
     runtime    ...  0.726s
nvme/021 (tr=loop bd=device) (test NVMe list command) [passed]
     runtime    ...  0.755s
nvme/021 (tr=loop bd=file) (test NVMe list command) [passed]
     runtime    ...  0.724s
nvme/022 (tr=loop bd=device) (test NVMe reset command) [passed]
     runtime    ...  1.317s
nvme/022 (tr=loop bd=file) (test NVMe reset command) [passed]
     runtime    ...  1.299s
nvme/023 (tr=loop bd=device) (test NVMe smart-log command) [passed]
     runtime    ...  0.751s
nvme/023 (tr=loop bd=file) (test NVMe smart-log command) [passed]
     runtime    ...  0.712s
nvme/025 (tr=loop bd=device) (test NVMe effects-log) [passed]
     runtime    ...  0.746s
nvme/025 (tr=loop bd=file) (test NVMe effects-log) [passed]
     runtime    ...  0.715s
nvme/026 (tr=loop bd=device) (test NVMe ns-descs) [passed]
     runtime    ...  0.741s
nvme/026 (tr=loop bd=file) (test NVMe ns-descs) [passed]
     runtime    ...  0.714s
nvme/027 (tr=loop bd=device) (test NVMe ns-rescan command) [passed]
     runtime    ...  0.775s
nvme/027 (tr=loop bd=file) (test NVMe ns-rescan command) [passed]
     runtime    ...  0.761s
nvme/028 (tr=loop bd=device) (test NVMe list-subsys) [passed]
     runtime    ...  0.738s
nvme/028 (tr=loop bd=file) (test NVMe list-subsys) [passed]
     runtime    ...  0.710s
nvme/029 (tr=loop) (test userspace IO via nvme-cli read/write interface) 
[passed]
     runtime    ...  0.914s
nvme/030 (tr=loop) (ensure the discovery generation counter is updated 
appropriately) [passed]
     runtime    ...  0.363s
nvme/031 (tr=loop) (test deletion of NVMeOF controllers immediately 
after setup) [passed]
     runtime    ...  7.090s
nvme/038 (tr=loop) (test deletion of NVMeOF subsystem without enabling) 
[passed]
     runtime    ...  0.019s
nvme/040 (tr=loop) (test nvme fabrics controller reset/disconnect 
operation during I/O) [passed]
     runtime    ...  7.217s
nvme/041 (tr=loop) (Create authenticated connections) [passed]
     runtime    ...  2.216s
nvme/042 (tr=loop) (Test dhchap key types for authenticated connections) 
[passed]
     runtime    ...  7.456s
nvme/043 (tr=loop) (Test hash and DH group variations for authenticated 
connections) [passed]
     runtime  23.558s  ...  27.950s
nvme/044 (tr=loop) (Test bi-directional authentication) [passed]
     runtime    ...  4.652s
nvme/045 (tr=loop) (Test re-authentication) [passed]
     runtime    ...  2.523s
nvme/047 (tr=loop) (test different queue types for fabric transports) 
[not run]
     nvme_trtype=loop is not supported in this test
nvme/048 (tr=loop) (Test queue count changes on reconnect)   [not run]
     nvme_trtype=loop is not supported in this test
nvme/052 (tr=loop) (test nvmet concurrent ns enable/disable) [passed]
     runtime    ...  2.240s

blktests (master) # nvme_trtype=tcp ./check nvme
nvme/002 (tr=tcp) (create many subsystems and test discovery) [not run]
     nvme_trtype=tcp is not supported in this test
nvme/003 (tr=tcp) (test if we're sending keep-alives to a discovery 
controller) [passed]
     runtime  11.238s  ...  11.262s
nvme/004 (tr=tcp) (test nvme and nvmet UUID NS descriptors) [passed]
     runtime  0.246s  ...  0.261s
nvme/005 (tr=tcp) (reset local loopback target) [passed]
     runtime  0.311s  ...  0.311s
nvme/006 (tr=tcp bd=device) (create an NVMeOF target) [passed]
     runtime  0.087s  ...  0.090s
nvme/006 (tr=tcp bd=file) (create an NVMeOF target) [passed]
     runtime  0.074s  ...  0.072s
nvme/008 (tr=tcp bd=device) (create an NVMeOF host) [passed]
     runtime  0.245s  ...  0.259s
nvme/008 (tr=tcp bd=file) (create an NVMeOF host) [passed]
     runtime  0.206s  ...  0.192s
nvme/010 (tr=tcp bd=device) (run data verification fio job) [passed]
     runtime  64.322s  ...  62.326s
nvme/010 (tr=tcp bd=file) (run data verification fio job) [passed]
     runtime  97.964s  ...  95.985s
nvme/012 (tr=tcp bd=device) (run mkfs and data verification fio) [passed]
     runtime  68.434s  ...  63.920s
nvme/012 (tr=tcp bd=file) (run mkfs and data verification fio) [passed]
     runtime  103.278s  ...  105.927s
nvme/014 (tr=tcp bd=device) (flush a command from host) [passed]
     runtime  7.905s  ...  8.228s
nvme/014 (tr=tcp bd=file) (flush a command from host) [passed]
     runtime  6.517s  ...  6.434s
nvme/016 (tr=tcp) (create/delete many NVMeOF block device-backed ns and 
test discovery) [not run]
     nvme_trtype=tcp is not supported in this test
nvme/017 (tr=tcp) (create/delete many file-ns and test discovery) [not run]
     nvme_trtype=tcp is not supported in this test
nvme/018 (tr=tcp) (unit test NVMe-oF out of range access on a file 
backend) [passed]
     runtime  0.221s  ...  0.213s
nvme/019 (tr=tcp bd=device) (test NVMe DSM Discard command) [passed]
     runtime  0.255s  ...  0.250s
nvme/019 (tr=tcp bd=file) (test NVMe DSM Discard command) [passed]
     runtime  0.228s  ...  0.210s
nvme/021 (tr=tcp bd=device) (test NVMe list command) [passed]
     runtime  0.232s  ...  0.246s
nvme/021 (tr=tcp bd=file) (test NVMe list command) [passed]
     runtime  0.221s  ...  0.204s
nvme/022 (tr=tcp bd=device) (test NVMe reset command) [passed]
     runtime  0.308s  ...  0.320s
nvme/022 (tr=tcp bd=file) (test NVMe reset command) [passed]
     runtime  0.283s  ...  0.293s
nvme/023 (tr=tcp bd=device) (test NVMe smart-log command) [passed]
     runtime  0.232s  ...  0.236s
nvme/023 (tr=tcp bd=file) (test NVMe smart-log command) [passed]
     runtime  0.208s  ...  0.224s
nvme/025 (tr=tcp bd=device) (test NVMe effects-log) [passed]
     runtime  0.229s  ...  0.254s
nvme/025 (tr=tcp bd=file) (test NVMe effects-log) [passed]
     runtime  0.217s  ...  0.225s
nvme/026 (tr=tcp bd=device) (test NVMe ns-descs) [passed]
     runtime  0.258s  ...  0.239s
nvme/026 (tr=tcp bd=file) (test NVMe ns-descs) [passed]
     runtime  0.238s  ...  0.230s
nvme/027 (tr=tcp bd=device) (test NVMe ns-rescan command) [passed]
     runtime  0.284s  ...  0.277s
nvme/027 (tr=tcp bd=file) (test NVMe ns-rescan command) [passed]
     runtime  0.256s  ...  0.260s
nvme/028 (tr=tcp bd=device) (test NVMe list-subsys) [passed]
     runtime  0.247s  ...  0.246s
nvme/028 (tr=tcp bd=file) (test NVMe list-subsys) [passed]
     runtime  0.224s  ...  0.232s
nvme/029 (tr=tcp) (test userspace IO via nvme-cli read/write interface) 
[passed]
     runtime  0.429s  ...  0.406s
nvme/030 (tr=tcp) (ensure the discovery generation counter is updated 
appropriately) [passed]
     runtime  0.171s  ...  0.169s
nvme/031 (tr=tcp) (test deletion of NVMeOF controllers immediately after 
setup) [passed]
     runtime  2.120s  ...  2.121s
nvme/038 (tr=tcp) (test deletion of NVMeOF subsystem without enabling) 
[passed]
     runtime  0.022s  ...  0.022s
nvme/040 (tr=tcp) (test nvme fabrics controller reset/disconnect 
operation during I/O) [passed]
     runtime  6.259s  ...  6.289s
nvme/041 (tr=tcp) (Create authenticated connections) [passed]
     runtime  1.709s  ...  1.710s
nvme/042 (tr=tcp) (Test dhchap key types for authenticated connections) 
[passed]
     runtime  3.936s  ...  3.992s
nvme/043 (tr=tcp) (Test hash and DH group variations for authenticated 
connections) [passed]
     runtime  19.664s  ...  19.801s
nvme/044 (tr=tcp) (Test bi-directional authentication) [passed]
     runtime  3.577s  ...  3.609s
nvme/045 (tr=tcp) (Test re-authentication) [passed]
     runtime  2.014s  ...  1.440s
nvme/047 (tr=tcp) (test different queue types for fabric transports) 
[passed]
     runtime    ...  1.114s
nvme/048 (tr=tcp) (Test queue count changes on reconnect) [passed]
     runtime  5.300s  ...  6.308s
nvme/052 (tr=tcp) (test nvmet concurrent ns enable/disable) [passed]
     runtime  2.206s  ...  2.234s
blktests (master) #



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 0/5] nvme: fix initialization memleak
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
                   ` (5 preceding siblings ...)
  2024-06-05  1:09 ` [PATCHv2 0/5] nvme: fix initialization memleak Chaitanya Kulkarni
@ 2024-06-05  7:26 ` Christoph Hellwig
  2024-06-06  7:33 ` Sagi Grimberg
  7 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2024-06-05  7:26 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-nvme, hch, sagi, mlombard, hare, shinichiro.kawasaki,
	james.smart, Keith Busch

The whole series looks good to me:

Reviewed-by: Christoph Hellwig <hch@lst.de>



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 0/5] nvme: fix initialization memleak
  2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
                   ` (6 preceding siblings ...)
  2024-06-05  7:26 ` Christoph Hellwig
@ 2024-06-06  7:33 ` Sagi Grimberg
  7 siblings, 0 replies; 10+ messages in thread
From: Sagi Grimberg @ 2024-06-06  7:33 UTC (permalink / raw)
  To: Keith Busch, linux-nvme
  Cc: hch, mlombard, hare, shinichiro.kawasaki, james.smart,
	Keith Busch

Looks good from here too. for the series,

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 5/5] nvme: split device add from initialization
  2024-06-04 18:59 ` [PATCHv2 5/5] nvme: split device add from initialization Keith Busch
@ 2024-06-06 12:13   ` Yi Zhang
  0 siblings, 0 replies; 10+ messages in thread
From: Yi Zhang @ 2024-06-06 12:13 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-nvme, hch, sagi, mlombard, hare, shinichiro.kawasaki,
	james.smart, Keith Busch

Verified the kmemleak reported was fixed by this patch, thanks.

Tested-by: Yi Zhang <yi.zhang@redhat.com>

On Wed, Jun 5, 2024 at 3:01 AM Keith Busch <kbusch@meta.com> wrote:
>
> From: Keith Busch <kbusch@kernel.org>
>
> Combining both creates an ambiguous cleanup scenario for the caller if
> an error is returned: does the device reference need to be dropped or
> did the error occur before the device was initialized? If an error
> occurs after the device is added, then the existing cleanup routines
> will leak memory.
>
> Furthermore, the nvme core is taking it upon itself to free the device's
> kobj.name under certain conditions rather than go the the core device
> API. We shouldn't be peaking into these implementation details.
>
> Split the device initialization from the addition to make it easier to
> know the error handling actions, fix the existing memory leaks, and stop
> the device layering violations.
>
> Link: https://lore.kernel.org/linux-nvme/c4050a37-ecc9-462c-9772-65e25166f439@grimberg.me/
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>  drivers/nvme/host/apple.c  |  5 ++++
>  drivers/nvme/host/core.c   | 58 +++++++++++++++++++++++---------------
>  drivers/nvme/host/fc.c     |  5 ++++
>  drivers/nvme/host/nvme.h   |  1 +
>  drivers/nvme/host/pci.c    |  5 ++++
>  drivers/nvme/host/rdma.c   |  5 ++++
>  drivers/nvme/host/tcp.c    |  5 ++++
>  drivers/nvme/target/loop.c |  5 ++++
>  8 files changed, 66 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
> index d3384aecc0d39..9760c97bc8f19 100644
> --- a/drivers/nvme/host/apple.c
> +++ b/drivers/nvme/host/apple.c
> @@ -1531,6 +1531,10 @@ static int apple_nvme_probe(struct platform_device *pdev)
>         if (IS_ERR(anv))
>                 return PTR_ERR(anv);
>
> +       result = nvme_add_ctrl(&anv->ctrl);
> +       if (result)
> +               goto out_put_ctrl;
> +
>         anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
>         if (IS_ERR(anv->ctrl.admin_q)) {
>                 ret = -ENOMEM;
> @@ -1545,6 +1549,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
>
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&dev->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&dev->ctrl);
>         return ret;
>  }
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f5d150c62955d..070cc4d76b6a4 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4611,6 +4611,9 @@ static void nvme_free_ctrl(struct device *dev)
>   * Initialize a NVMe controller structures.  This needs to be called during
>   * earliest initialization so that we have the initialized structured around
>   * during probing.
> + *
> + * On success, the caller must use the nvme_put_ctrl() to release this when
> + * needed, which also invokes the ops->free_ctrl() callback.
>   */
>  int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 const struct nvme_ctrl_ops *ops, unsigned long quirks)
> @@ -4659,6 +4662,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 goto out;
>         ctrl->instance = ret;
>
> +       ret = nvme_auth_init_ctrl(ctrl);
> +       if (ret)
> +               goto out_release_instance;
> +
> +       nvme_mpath_init_ctrl(ctrl);
> +
>         device_initialize(&ctrl->ctrl_device);
>         ctrl->device = &ctrl->ctrl_device;
>         ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
> @@ -4671,16 +4680,36 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 ctrl->device->groups = nvme_dev_attr_groups;
>         ctrl->device->release = nvme_free_ctrl;
>         dev_set_drvdata(ctrl->device, ctrl);
> +
> +       return ret;
> +
> +out_release_instance:
> +       ida_free(&nvme_instance_ida, ctrl->instance);
> +out:
> +       if (ctrl->discard_page)
> +               __free_page(ctrl->discard_page);
> +       cleanup_srcu_struct(&ctrl->srcu);
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(nvme_init_ctrl);
> +
> +/*
> + * On success, returns with an elevated controller reference and caller must
> + * use nvme_uninit_ctrl() to properly free resources associated with the ctrl.
> + */
> +int nvme_add_ctrl(struct nvme_ctrl *ctrl)
> +{
> +       int ret;
> +
>         ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
>         if (ret)
> -               goto out_release_instance;
> +               return ret;
>
> -       nvme_get_ctrl(ctrl);
>         cdev_init(&ctrl->cdev, &nvme_dev_fops);
> -       ctrl->cdev.owner = ops->module;
> +       ctrl->cdev.owner = ctrl->ops->module;
>         ret = cdev_device_add(&ctrl->cdev, ctrl->device);
>         if (ret)
> -               goto out_free_name;
> +               return ret;
>
>         /*
>          * Initialize latency tolerance controls.  The sysfs files won't
> @@ -4691,28 +4720,11 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 min(default_ps_max_latency_us, (unsigned long)S32_MAX));
>
>         nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
> -       nvme_mpath_init_ctrl(ctrl);
> -       ret = nvme_auth_init_ctrl(ctrl);
> -       if (ret)
> -               goto out_free_cdev;
> +       nvme_get_ctrl(ctrl);
>
>         return 0;
> -out_free_cdev:
> -       nvme_fault_inject_fini(&ctrl->fault_inject);
> -       dev_pm_qos_hide_latency_tolerance(ctrl->device);
> -       cdev_device_del(&ctrl->cdev, ctrl->device);
> -out_free_name:
> -       nvme_put_ctrl(ctrl);
> -       kfree_const(ctrl->device->kobj.name);
> -out_release_instance:
> -       ida_free(&nvme_instance_ida, ctrl->instance);
> -out:
> -       if (ctrl->discard_page)
> -               __free_page(ctrl->discard_page);
> -       cleanup_srcu_struct(&ctrl->srcu);
> -       return ret;
>  }
> -EXPORT_SYMBOL_GPL(nvme_init_ctrl);
> +EXPORT_SYMBOL_GPL(nvme_add_ctrl);
>
>  /* let I/O to all namespaces fail in preparation for surprise removal */
>  void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index c52446013388f..d5a383766b34d 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -3563,6 +3563,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>         if (IS_ERR(ctrl))
>                 return ERR_CAST(ctrl);
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
>                         &nvme_fc_admin_mq_ops,
>                         struct_size_t(struct nvme_fcp_op_w_sgl, priv,
> @@ -3607,6 +3611,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>         /* initiate nvme ctrl ref counting teardown */
>         nvme_uninit_ctrl(&ctrl->ctrl);
>
> +out_put_ctrl:
>         /* Remove core ctrl ref. */
>         nvme_put_ctrl(&ctrl->ctrl);
>
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index f3a41133ac3f9..9693490680866 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -792,6 +792,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
>  int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
>  int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 const struct nvme_ctrl_ops *ops, unsigned long quirks);
> +int nvme_add_ctrl(struct nvme_ctrl *ctrl);
>  void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
>  void nvme_start_ctrl(struct nvme_ctrl *ctrl);
>  void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 102a9fb0c65ff..c92125b0238d4 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -3015,6 +3015,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>         if (IS_ERR(dev))
>                 return PTR_ERR(dev);
>
> +       result = nvme_add_ctrl(&dev->ctrl);
> +       if (result)
> +               goto out_put_ctrl;
> +
>         result = nvme_dev_map(dev);
>         if (result)
>                 goto out_uninit_ctrl;
> @@ -3101,6 +3105,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>         nvme_dev_unmap(dev);
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&dev->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&dev->ctrl);
>         return result;
>  }
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index 94d4f3dbac6b6..5c44c7c5c688c 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -2323,6 +2323,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
>         if (IS_ERR(ctrl))
>                 return ERR_CAST(ctrl);
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
>         WARN_ON_ONCE(!changed);
>
> @@ -2341,6 +2345,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
>
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&ctrl->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&ctrl->ctrl);
>         if (ret > 0)
>                 ret = -EIO;
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 5ee3bbc67f411..3be67c98c906e 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -2779,6 +2779,10 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>         if (IS_ERR(ctrl))
>                 return ERR_CAST(ctrl);
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
>                 WARN_ON_ONCE(1);
>                 ret = -EINTR;
> @@ -2800,6 +2804,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&ctrl->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&ctrl->ctrl);
>         if (ret > 0)
>                 ret = -EIO;
> diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
> index e589915ddef85..e32790d8fc260 100644
> --- a/drivers/nvme/target/loop.c
> +++ b/drivers/nvme/target/loop.c
> @@ -555,6 +555,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
>                 goto out;
>         }
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
>                 WARN_ON_ONCE(1);
>
> @@ -611,6 +615,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
>         kfree(ctrl->queues);
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&ctrl->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&ctrl->ctrl);
>  out:
>         if (ret > 0)
> --
> 2.43.0
>
>


--
Best Regards,
  Yi Zhang



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-06-06 12:13 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-04 18:59 [PATCHv2 0/5] nvme: fix initialization memleak Keith Busch
2024-06-04 18:59 ` [PATCHv2 1/5] nvme: apple: fix device reference counting Keith Busch
2024-06-04 18:59 ` [PATCHv2 2/5] nvme: tcp: split controller bringup handling Keith Busch
2024-06-04 18:59 ` [PATCHv2 3/5] nvme: rdma: " Keith Busch
2024-06-04 18:59 ` [PATCHv2 4/5] nvme: fc: " Keith Busch
2024-06-04 18:59 ` [PATCHv2 5/5] nvme: split device add from initialization Keith Busch
2024-06-06 12:13   ` Yi Zhang
2024-06-05  1:09 ` [PATCHv2 0/5] nvme: fix initialization memleak Chaitanya Kulkarni
2024-06-05  7:26 ` Christoph Hellwig
2024-06-06  7:33 ` Sagi Grimberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox