public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
* [PATCH] nvme: start keep-alive after admin queue setup
@ 2023-10-17 13:45 Hannes Reinecke
  2023-10-18  5:45 ` Christoph Hellwig
  0 siblings, 1 reply; 3+ messages in thread
From: Hannes Reinecke @ 2023-10-17 13:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

Setting up I/O queues might take quite some time on larger and/or
busy setups, so KATO might expire before all I/O queues could be
set up.
Fix this by starting keep-alive right after the admin queue is
operational.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/apple.c  |  4 +++-
 drivers/nvme/host/core.c   |  4 ++--
 drivers/nvme/host/fc.c     |  1 +
 drivers/nvme/host/pci.c    | 18 +++++++++++-------
 drivers/nvme/host/rdma.c   |  1 +
 drivers/nvme/host/tcp.c    |  1 +
 drivers/nvme/target/loop.c |  1 +
 7 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 596bb11eeba5..a434eb2a7129 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1123,7 +1123,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
 	dev_dbg(anv->dev, "Creating IOCQ");
 	ret = apple_nvme_create_cq(anv);
 	if (ret)
-		goto out;
+		goto out_stop;
 	dev_dbg(anv->dev, "Creating IOSQ");
 	ret = apple_nvme_create_sq(anv);
 	if (ret)
@@ -1162,6 +1162,8 @@ static void apple_nvme_reset_work(struct work_struct *work)
 	apple_nvme_remove_sq(anv);
 out_remove_cq:
 	apple_nvme_remove_cq(anv);
+ out_stop:
+	nvme_stop_keep_alive(&anv->ctrl);
 out:
 	dev_warn(anv->ctrl.device, "Reset failure status: %d\n", ret);
 	nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_DELETING);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 62612f87aafa..7c689752b360 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3200,6 +3200,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
 	clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags);
 	ctrl->identified = true;
 
+	nvme_start_keep_alive(ctrl);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish);
@@ -4344,8 +4346,6 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
 
 void nvme_start_ctrl(struct nvme_ctrl *ctrl)
 {
-	nvme_start_keep_alive(ctrl);
-
 	nvme_enable_aen(ctrl);
 
 	/*
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index a15b37750d6e..b46b49d010d6 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3203,6 +3203,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n",
 		ctrl->cnum, ctrl->association_id, ret);
+	nvme_stop_keep_alive(&ctrl->ctrl);
 	/* send a Disconnect(association) LS to fc-nvme target */
 	nvme_fc_xmt_disconnect_assoc(ctrl);
 	spin_lock_irqsave(&ctrl->lock, flags);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 60a08dfe8d75..97a86e7d8d27 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2731,11 +2731,11 @@ static void nvme_reset_work(struct work_struct *work)
 
 	result = nvme_setup_host_mem(dev);
 	if (result < 0)
-		goto out;
+		goto out_stop;
 
 	result = nvme_setup_io_queues(dev);
 	if (result)
-		goto out;
+		goto out_stop;
 
 	/*
 	 * Freeze and update the number of I/O queues as thos might have
@@ -2764,7 +2764,7 @@ static void nvme_reset_work(struct work_struct *work)
 		dev_warn(dev->ctrl.device,
 			"failed to mark controller live state\n");
 		result = -ENODEV;
-		goto out;
+		goto out_stop;
 	}
 
 	nvme_start_ctrl(&dev->ctrl);
@@ -2772,6 +2772,8 @@ static void nvme_reset_work(struct work_struct *work)
 
  out_unlock:
 	mutex_unlock(&dev->shutdown_lock);
+ out_stop:
+	nvme_stop_keep_alive(&dev->ctrl);
  out:
 	/*
 	 * Set state to deleting now to avoid blocking nvme_wait_reset(), which
@@ -3021,17 +3023,17 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	result = nvme_init_ctrl_finish(&dev->ctrl, false);
 	if (result)
-		goto out_disable;
+		goto out_stop;
 
 	nvme_dbbuf_dma_alloc(dev);
 
 	result = nvme_setup_host_mem(dev);
 	if (result < 0)
-		goto out_disable;
+		goto out_stop;
 
 	result = nvme_setup_io_queues(dev);
 	if (result)
-		goto out_disable;
+		goto out_stop;
 
 	if (dev->online_queues > 1) {
 		nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops,
@@ -3046,7 +3048,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		dev_warn(dev->ctrl.device,
 			"failed to mark controller live state\n");
 		result = -ENODEV;
-		goto out_disable;
+		goto out_stop;
 	}
 
 	pci_set_drvdata(pdev, dev);
@@ -3056,6 +3058,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	flush_work(&dev->ctrl.scan_work);
 	return 0;
 
+out_stop:
+	nvme_stop_keep_alive(&dev->ctrl);
 out_disable:
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 	nvme_dev_disable(dev, true);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 337a624a537c..f51597c0730f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1077,6 +1077,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
 		nvme_rdma_free_io_queues(ctrl);
 	}
 destroy_admin:
+	nvme_stop_keep_alive(&ctrl->ctrl);
 	nvme_quiesce_admin_queue(&ctrl->ctrl);
 	blk_sync_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_stop_queue(&ctrl->queues[0]);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7eeaa71d1177..cd4e9a295b9e 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2288,6 +2288,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
 		nvme_tcp_destroy_io_queues(ctrl, new);
 	}
 destroy_admin:
+	nvme_stop_keep_alive(ctrl);
 	nvme_quiesce_admin_queue(ctrl);
 	blk_sync_queue(ctrl->admin_q);
 	nvme_tcp_stop_queue(ctrl, 0);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 48d5df054cd0..3bf469f33777 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -466,6 +466,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 out_destroy_io:
 	nvme_loop_destroy_io_queues(ctrl);
 out_destroy_admin:
+	nvme_stop_keep_alive(&ctrl->ctrl);
 	nvme_loop_destroy_admin_queue(ctrl);
 out_disable:
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-- 
2.35.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] nvme: start keep-alive after admin queue setup
  2023-10-17 13:45 [PATCH] nvme: start keep-alive after admin queue setup Hannes Reinecke
@ 2023-10-18  5:45 ` Christoph Hellwig
  2023-10-18  7:31   ` Hannes Reinecke
  0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2023-10-18  5:45 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Christoph Hellwig, Sagi Grimberg, Keith Busch, linux-nvme

On Tue, Oct 17, 2023 at 03:45:35PM +0200, Hannes Reinecke wrote:
> Setting up I/O queues might take quite some time on larger and/or
> busy setups, so KATO might expire before all I/O queues could be
> set up.
> Fix this by starting keep-alive right after the admin queue is
> operational.

Having all these manual unpaired nvme_stop_keep_alive calls look
rather unmaintainable.  What keeps us from doing it in
nvme_uninit_ctrl?



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] nvme: start keep-alive after admin queue setup
  2023-10-18  5:45 ` Christoph Hellwig
@ 2023-10-18  7:31   ` Hannes Reinecke
  0 siblings, 0 replies; 3+ messages in thread
From: Hannes Reinecke @ 2023-10-18  7:31 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme

On 10/18/23 07:45, Christoph Hellwig wrote:
> On Tue, Oct 17, 2023 at 03:45:35PM +0200, Hannes Reinecke wrote:
>> Setting up I/O queues might take quite some time on larger and/or
>> busy setups, so KATO might expire before all I/O queues could be
>> set up.
>> Fix this by starting keep-alive right after the admin queue is
>> operational.
> 
> Having all these manual unpaired nvme_stop_keep_alive calls look
> rather unmaintainable.  What keeps us from doing it in
> nvme_uninit_ctrl?
> 
Problem here is the exit path of controller initialization.
That is per transport, and each and every one makes it ever
so slightly different.

I guess we can move starting and stopping keep alive into
nvme_quiesce_admin_queue()/nvme_unquiesce_admin_queue(),
as arguably quiescing should affect keep-alive, too.

Let me check.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-10-18  7:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-17 13:45 [PATCH] nvme: start keep-alive after admin queue setup Hannes Reinecke
2023-10-18  5:45 ` Christoph Hellwig
2023-10-18  7:31   ` Hannes Reinecke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox