From: Ming Lei <ming.lei@redhat.com>
To: Keith Busch <keith.busch@intel.com>
Cc: Jens Axboe <axboe@kernel.dk>,
linux-block@vger.kernel.org, Ming Lei <ming.lei@redhat.com>,
James Smart <james.smart@broadcom.com>,
Jianchao Wang <jianchao.w.wang@oracle.com>,
Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>,
linux-nvme@lists.infradead.org,
Laurence Oberman <loberman@redhat.com>
Subject: [PATCH V6 06/11] nvme: pci: freeze queue in nvme_dev_disable() in case of error recovery
Date: Wed, 16 May 2018 12:03:08 +0800 [thread overview]
Message-ID: <20180516040313.13596-7-ming.lei@redhat.com> (raw)
In-Reply-To: <20180516040313.13596-1-ming.lei@redhat.com>
When nvme_dev_disable() is used for error recovery, we should always
freeze queues before shutdown controller:
- reset handler supposes queues are frozen, and will wait_freeze &
unfreeze them explicitly, if queues aren't frozen during nvme_dev_disable(),
reset handler may wait forever even though there isn't any requests
allocated.
- this way may avoid to cancel lots of requests during error recovery
This patch introduces the parameter of 'freeze_queue' for fixing this
issue.
Cc: James Smart <james.smart@broadcom.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: linux-nvme@lists.infradead.org
Cc: Laurence Oberman <loberman@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/nvme/host/pci.c | 47 ++++++++++++++++++++++++++++++++---------------
1 file changed, 32 insertions(+), 15 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6413dad51107..365d1a5ee1eb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -69,7 +69,8 @@ struct nvme_dev;
struct nvme_queue;
static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown, bool
+ freeze_queue);
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -1206,7 +1207,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
*/
if (nvme_should_reset(dev, csts)) {
nvme_warn_reset(dev, csts);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
nvme_reset_ctrl(&dev->ctrl);
return BLK_EH_HANDLED;
}
@@ -1233,7 +1234,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
default:
@@ -1249,7 +1250,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
nvme_reset_ctrl(&dev->ctrl);
/*
@@ -2254,19 +2255,35 @@ static void nvme_pci_disable(struct nvme_dev *dev)
}
}
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+/*
+ * Resetting often follows nvme_dev_disable(), so queues need to be frozen
+ * before resetting.
+ */
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown, bool
+ freeze_queue)
{
int i;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
bool frozen = false;
+ /*
+ * 'freeze_queue' is only valid for non-shutdown, and we do
+ * inline freeze & wait_freeze_timeout for shutdown just for
+ * completing as many as possible requests before shutdown
+ */
+ if (shutdown)
+ freeze_queue = false;
+
+ if (freeze_queue)
+ nvme_start_freeze(&dev->ctrl);
+
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
- if (dev->ctrl.state == NVME_CTRL_LIVE ||
- dev->ctrl.state == NVME_CTRL_RESETTING) {
+ if (shutdown && (dev->ctrl.state == NVME_CTRL_LIVE ||
+ dev->ctrl.state == NVME_CTRL_RESETTING)) {
nvme_start_freeze(&dev->ctrl);
frozen = true;
}
@@ -2369,7 +2386,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
nvme_get_ctrl(&dev->ctrl);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
if (!queue_work(nvme_wq, &dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
}
@@ -2390,7 +2407,7 @@ static void nvme_reset_work(struct work_struct *work)
* moving on.
*/
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
/*
* Introduce CONNECTING state from nvme-fc/rdma transports to mark the
@@ -2639,7 +2656,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_reset_prepare(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
}
static void nvme_reset_done(struct pci_dev *pdev)
@@ -2651,7 +2668,7 @@ static void nvme_reset_done(struct pci_dev *pdev)
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, true);
+ nvme_dev_disable(dev, true, false);
}
/*
@@ -2670,13 +2687,13 @@ static void nvme_remove(struct pci_dev *pdev)
if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
}
flush_work(&dev->ctrl.reset_work);
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
- nvme_dev_disable(dev, true);
+ nvme_dev_disable(dev, true, false);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
@@ -2710,7 +2727,7 @@ static int nvme_suspend(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_dev_disable(ndev, true);
+ nvme_dev_disable(ndev, true, false);
return 0;
}
@@ -2742,7 +2759,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
dev_warn(dev->ctrl.device,
--
2.9.5
WARNING: multiple messages have this Message-ID (diff)
From: ming.lei@redhat.com (Ming Lei)
Subject: [PATCH V6 06/11] nvme: pci: freeze queue in nvme_dev_disable() in case of error recovery
Date: Wed, 16 May 2018 12:03:08 +0800 [thread overview]
Message-ID: <20180516040313.13596-7-ming.lei@redhat.com> (raw)
In-Reply-To: <20180516040313.13596-1-ming.lei@redhat.com>
When nvme_dev_disable() is used for error recovery, we should always
freeze queues before shutdown controller:
- reset handler supposes queues are frozen, and will wait_freeze &
unfreeze them explicitly, if queues aren't frozen during nvme_dev_disable(),
reset handler may wait forever even though there isn't any requests
allocated.
- this way may avoid to cancel lots of requests during error recovery
This patch introduces the parameter of 'freeze_queue' for fixing this
issue.
Cc: James Smart <james.smart at broadcom.com>
Cc: Jianchao Wang <jianchao.w.wang at oracle.com>
Cc: Christoph Hellwig <hch at lst.de>
Cc: Sagi Grimberg <sagi at grimberg.me>
Cc: linux-nvme at lists.infradead.org
Cc: Laurence Oberman <loberman at redhat.com>
Signed-off-by: Ming Lei <ming.lei at redhat.com>
---
drivers/nvme/host/pci.c | 47 ++++++++++++++++++++++++++++++++---------------
1 file changed, 32 insertions(+), 15 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6413dad51107..365d1a5ee1eb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -69,7 +69,8 @@ struct nvme_dev;
struct nvme_queue;
static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown, bool
+ freeze_queue);
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -1206,7 +1207,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
*/
if (nvme_should_reset(dev, csts)) {
nvme_warn_reset(dev, csts);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
nvme_reset_ctrl(&dev->ctrl);
return BLK_EH_HANDLED;
}
@@ -1233,7 +1234,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
default:
@@ -1249,7 +1250,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
nvme_reset_ctrl(&dev->ctrl);
/*
@@ -2254,19 +2255,35 @@ static void nvme_pci_disable(struct nvme_dev *dev)
}
}
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+/*
+ * Resetting often follows nvme_dev_disable(), so queues need to be frozen
+ * before resetting.
+ */
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown, bool
+ freeze_queue)
{
int i;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
bool frozen = false;
+ /*
+ * 'freeze_queue' is only valid for non-shutdown, and we do
+ * inline freeze & wait_freeze_timeout for shutdown just for
+ * completing as many as possible requests before shutdown
+ */
+ if (shutdown)
+ freeze_queue = false;
+
+ if (freeze_queue)
+ nvme_start_freeze(&dev->ctrl);
+
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
- if (dev->ctrl.state == NVME_CTRL_LIVE ||
- dev->ctrl.state == NVME_CTRL_RESETTING) {
+ if (shutdown && (dev->ctrl.state == NVME_CTRL_LIVE ||
+ dev->ctrl.state == NVME_CTRL_RESETTING)) {
nvme_start_freeze(&dev->ctrl);
frozen = true;
}
@@ -2369,7 +2386,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
nvme_get_ctrl(&dev->ctrl);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
if (!queue_work(nvme_wq, &dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
}
@@ -2390,7 +2407,7 @@ static void nvme_reset_work(struct work_struct *work)
* moving on.
*/
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
/*
* Introduce CONNECTING state from nvme-fc/rdma transports to mark the
@@ -2639,7 +2656,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_reset_prepare(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
}
static void nvme_reset_done(struct pci_dev *pdev)
@@ -2651,7 +2668,7 @@ static void nvme_reset_done(struct pci_dev *pdev)
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, true);
+ nvme_dev_disable(dev, true, false);
}
/*
@@ -2670,13 +2687,13 @@ static void nvme_remove(struct pci_dev *pdev)
if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
}
flush_work(&dev->ctrl.reset_work);
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
- nvme_dev_disable(dev, true);
+ nvme_dev_disable(dev, true, false);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
@@ -2710,7 +2727,7 @@ static int nvme_suspend(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_dev_disable(ndev, true);
+ nvme_dev_disable(ndev, true, false);
return 0;
}
@@ -2742,7 +2759,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
dev_warn(dev->ctrl.device,
--
2.9.5
next prev parent reply other threads:[~2018-05-16 4:03 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-16 4:03 [PATCH V6 00/11] nvme: pci: fix & improve timeout handling Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 01/11] block: introduce blk_quiesce_timeout() and blk_unquiesce_timeout() Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 02/11] nvme: pci: cover timeout for admin commands running in EH Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-24 15:39 ` Keith Busch
2018-05-24 15:39 ` Keith Busch
2018-05-16 4:03 ` [PATCH V6 03/11] nvme: pci: unquiesce admin queue after controller is shutdown Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 04/11] nvme: pci: set nvmeq->cq_vector after alloc cq/sq Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 05/11] nvme: pci: only wait freezing if queue is frozen Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` Ming Lei [this message]
2018-05-16 4:03 ` [PATCH V6 06/11] nvme: pci: freeze queue in nvme_dev_disable() in case of error recovery Ming Lei
2018-05-16 4:03 ` [PATCH V6 07/11] nvme: pci: prepare for supporting error recovery from resetting context Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 08/11] nvme: pci: move error handling out of nvme_reset_dev() Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 09/11] nvme: pci: don't unfreeze queue until controller state updating succeeds Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 10/11] nvme: core: introduce nvme_force_change_ctrl_state() Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 4:03 ` [PATCH V6 11/11] nvme: pci: support nested EH Ming Lei
2018-05-16 4:03 ` Ming Lei
2018-05-16 14:12 ` Keith Busch
2018-05-16 14:12 ` Keith Busch
2018-05-16 23:10 ` Ming Lei
2018-05-16 23:10 ` Ming Lei
2018-05-17 2:20 ` Keith Busch
2018-05-17 2:20 ` Keith Busch
2018-05-17 8:41 ` Christoph Hellwig
2018-05-17 8:41 ` Christoph Hellwig
2018-05-17 14:20 ` Keith Busch
2018-05-17 14:20 ` Keith Busch
2018-05-17 14:20 ` Keith Busch
2018-05-17 14:23 ` Johannes Thumshirn
2018-05-17 14:23 ` Johannes Thumshirn
2018-05-17 14:23 ` Johannes Thumshirn
2018-05-18 16:28 ` Keith Busch
2018-05-18 16:28 ` Keith Busch
2018-05-18 16:28 ` Keith Busch
2018-05-22 7:35 ` Johannes Thumshirn
2018-05-22 7:35 ` Johannes Thumshirn
2018-05-22 7:35 ` Johannes Thumshirn
2018-05-18 0:20 ` Ming Lei
2018-05-18 0:20 ` Ming Lei
2018-05-18 1:01 ` Ming Lei
2018-05-18 1:01 ` Ming Lei
2018-05-18 13:57 ` Keith Busch
2018-05-18 13:57 ` Keith Busch
2018-05-18 16:58 ` Jens Axboe
2018-05-18 16:58 ` Jens Axboe
2018-05-18 22:26 ` Ming Lei
2018-05-18 22:26 ` Ming Lei
2018-05-18 23:45 ` Keith Busch
2018-05-18 23:45 ` Keith Busch
2018-05-18 23:51 ` Ming Lei
2018-05-18 23:51 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180516040313.13596-7-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=hch@lst.de \
--cc=james.smart@broadcom.com \
--cc=jianchao.w.wang@oracle.com \
--cc=keith.busch@intel.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=loberman@redhat.com \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.