[PATCH RFC 2/5] NVMe: Basic NVMe device hotplug support

linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

From: m@bjorling.me (Matias Bjorling)
Subject: [PATCH RFC 2/5] NVMe: Basic NVMe device hotplug support
Date: Mon, 30 Dec 2013 14:48:08 +0100	[thread overview]
Message-ID: <52C17998.60806@bjorling.me> (raw)
In-Reply-To: <1388399240-13828-3-git-send-email-santoshsy@gmail.com>

On 12/30/2013 11:27 AM, Santosh Y wrote:
> This patch provides basic hotplug support for NVMe.
> For NVMe hotplug to work the PCIe slot must be hotplug capable.
>
> When a NVMe device is surprise removed and inserted back the
> device may need some time to respond to host IO commands, and
> will return NVME_SC_NS_NOT_READY. In this case the requests
> will be requeued until the device responds to the IO commands
> with status response or until the commands timeout.
>
> Signed-off-by: Ravi Kumar <ravi.android at gmail.com>
> Signed-off-by: Santosh Y <santoshsy at gmail.com>
>
> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
> index 86b9f37..f92ec96 100644
> --- a/drivers/block/Kconfig
> +++ b/drivers/block/Kconfig
> @@ -319,6 +319,14 @@ config BLK_DEV_NVME
>   	  To compile this driver as a module, choose M here: the
>   	  module will be called nvme.
>
> +config BLK_DEV_NVME_HP
> +	bool "Enable hotplug support"
> +	depends on BLK_DEV_NVME && HOTPLUG_PCI_PCIE
> +	default n
> +	help
> +	  If you say Y here, the driver will support hotplug feature.


> +	  Hotplug only works if the PCIe slot is hotplug capable.
> +
>   config BLK_DEV_SKD
>   	tristate "STEC S1120 Block Driver"
>   	depends on PCI
> diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
> index a523296..8a02135 100644
> --- a/drivers/block/nvme-core.c
> +++ b/drivers/block/nvme-core.c
> @@ -172,6 +172,13 @@ static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx,
>   	return cmdid;
>   }
>
> +static inline bool nvme_check_surprise_removal(struct nvme_dev *dev)
> +{
> +	if (readl(&dev->bar->csts) == -1)
> +		return true;
> +	return false;
> +}
> +
>   static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
>   				nvme_completion_fn handler, unsigned timeout)
>   {
> @@ -370,6 +377,19 @@ static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
>   	part_stat_unlock();
>   }
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP

There are an excessive amount of ifdef/endif declarations. Instead of 
special case around each chunk hotplug code. What about refactoring it 
into its own nvme-hotpull.c file?

This will clean the code path for hotplugging and won't interfere with 
the natural flow of the nvme core code. Another is to just bite the 
complexity and don't ifdef around hotplug areas.


> +static void nvme_requeue_bio(struct nvme_dev *dev, struct bio *bio)
> +{
> +	struct nvme_queue *nvmeq = get_nvmeq(dev);
> +	if (bio_list_empty(&nvmeq->sq_cong))
> +		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
> +	bio_list_add(&nvmeq->sq_cong, bio);
> +	put_nvmeq(nvmeq);
> +	wake_up_process(nvme_thread);
> +}
> +#endif
> +
> +
>   static void bio_completion(struct nvme_dev *dev, void *ctx,
>   						struct nvme_completion *cqe)
>   {
> @@ -383,10 +403,19 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
>   		nvme_end_io_acct(bio, iod->start_time);
>   	}
>   	nvme_free_iod(dev, iod);
> -	if (status)
> -		bio_endio(bio, -EIO);
> -	else
> +	if (status) {
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +		if ((status & 0xff) == NVME_SC_INVALID_NS) {
> +			bio_endio(bio, -ENODEV);
> +		} else if ((status & 0xff) == NVME_SC_NS_NOT_READY) {
> +			bio->bi_rw |= REQ_FAILFAST_DRIVER;
> +			nvme_requeue_bio(dev, bio);
> +		} else
> +#endif
> +			bio_endio(bio, -EIO);
> +	} else {
>   		bio_endio(bio, 0);
> +	}
>   }
>
>   /* length is in bytes.  gfp flags indicates whether we may sleep. */
> @@ -722,6 +751,10 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
>   	if ((bio->bi_rw & REQ_FLUSH) && !psegs)
>   		return nvme_submit_flush(nvmeq, ns, cmdid);
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +	if (bio->bi_rw & REQ_FAILFAST_DRIVER)
> +		mdelay(100);
> +#endif
>   	control = 0;
>   	if (bio->bi_rw & REQ_FUA)
>   		control |= NVME_RW_FUA;
> @@ -814,10 +847,26 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
>
>   static void nvme_make_request(struct request_queue *q, struct bio *bio)
>   {
> -	struct nvme_ns *ns = q->queuedata;
> -	struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
> +	struct nvme_ns *ns = NULL;
> +	struct nvme_queue *nvmeq = NULL;
>   	int result = -EBUSY;
>
> +	if (likely(q && q->queuedata))
> +		ns = q->queuedata;
> +	if (unlikely(!ns)) {
> +		bio_endio(bio, -ENODEV);
> +		return;
> +	}
> +
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +	if (test_bit(NVME_HOT_REM, &ns->dev->hp_flag) ||
> +		!(bio->bi_bdev->bd_disk->flags & GENHD_FL_UP)) {
> +		bio_endio(bio, -ENODEV);
> +		return;
> +	}
> +#endif
> +	nvmeq = get_nvmeq(ns->dev);
> +
>   	if (!nvmeq) {
>   		put_nvmeq(NULL);
>   		bio_endio(bio, -EIO);
> @@ -1120,6 +1169,12 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
>   			.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
>   		};
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +		if (test_bit(NVME_HOT_REM, &nvmeq->dev->hp_flag)) {
> +			cqe.status |= (NVME_SC_INVALID_NS << 1);
> +			info[cmdid].timeout = jiffies - NVME_IO_TIMEOUT;
> +		}
> +#endif
>   		if (timeout && !time_after(now, info[cmdid].timeout))
>   			continue;
>   		if (info[cmdid].ctx == CMD_CTX_CANCELLED)
> @@ -1205,7 +1260,7 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
>
>   	/* Don't tell the adapter to delete the admin queue.
>   	 * Don't tell a removed adapter to delete IO queues. */
> -	if (qid && readl(&dev->bar->csts) != -1) {
> +	if (qid && !nvme_check_surprise_removal(dev)) {
>   		adapter_delete_sq(dev, qid);
>   		adapter_delete_cq(dev, qid);
>   	}
> @@ -1724,6 +1779,13 @@ static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
>   		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
>   		struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +		if (test_bit(NVME_HOT_REM, &ns->dev->hp_flag) ||
> +			!(bio->bi_bdev->bd_disk->flags & GENHD_FL_UP)) {
> +			bio_endio(bio, -ENODEV);
> +			continue;
> +		}
> +#endif
>   		if (bio_list_empty(&nvmeq->sq_cong))
>   			remove_wait_queue(&nvmeq->sq_full,
>   							&nvmeq->sq_cong_wait);
> @@ -1746,7 +1808,8 @@ static int nvme_kthread(void *data)
>   		spin_lock(&dev_list_lock);
>   		list_for_each_entry_safe(dev, next, &dev_list, node) {
>   			int i;
> -			if (readl(&dev->bar->csts) & NVME_CSTS_CFS &&
> +			if (!nvme_check_surprise_removal(dev) &&
> +				readl(&dev->bar->csts) & NVME_CSTS_CFS &&
>   							dev->initialized) {
>   				if (work_busy(&dev->reset_work))
>   					continue;
> @@ -2082,7 +2145,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
>   	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
>   	if (!dev->bar)
>   		goto disable;
> -	if (readl(&dev->bar->csts) == -1) {
> +	if (nvme_check_surprise_removal(dev)) {
>   		result = -ENODEV;
>   		goto unmap;
>   	}
> @@ -2265,7 +2328,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
>   	list_del_init(&dev->node);
>   	spin_unlock(&dev_list_lock);
>
> -	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
> +	if (!dev->bar || (dev->bar && nvme_check_surprise_removal(dev))) {
>   		for (i = dev->queue_count - 1; i >= 0; i--) {
>   			struct nvme_queue *nvmeq = dev->queues[i];
>   			nvme_suspend_queue(nvmeq);
> @@ -2534,6 +2597,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>
>   	dev->initialized = 1;
>   	kref_init(&dev->kref);
> +
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +	if (!pdev->is_added)
> +		dev_info(&pdev->dev,
> +			"Device 0x%x is on-line\n", pdev->device);
> +#endif
>   	return 0;
>
>    remove:
> @@ -2556,6 +2625,16 @@ static void nvme_remove(struct pci_dev *pdev)
>   {
>   	struct nvme_dev *dev = pci_get_drvdata(pdev);
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +	if (!pdev || !dev)
> +		return;
> +	if (nvme_check_surprise_removal(dev)) {
> +		set_bit(NVME_HOT_REM, &dev->hp_flag);
> +		dev_info(&pdev->dev,
> +			"Surprise removal of device 0x%x\n", pdev->device);
> +	}
> +	pci_dev_get(pdev);
> +#endif
>   	pci_set_drvdata(pdev, NULL);
>   	flush_work(&dev->reset_work);
>   	misc_deregister(&dev->miscdev);
> @@ -2565,6 +2644,9 @@ static void nvme_remove(struct pci_dev *pdev)
>   	nvme_release_instance(dev);
>   	nvme_release_prp_pools(dev);
>   	kref_put(&dev->kref, nvme_free_dev);
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +	pci_dev_put(pdev);
> +#endif
>   }
>
>   /* These functions are yet to be implemented */
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 69ae03f..4ef375e 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -68,6 +68,12 @@ enum {
>
>   #define NVME_IO_TIMEOUT	(5 * HZ)
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +enum {
> +	NVME_HOT_REM,
> +};
> +#endif
> +
>   /*
>    * Represents an NVM Express device.  Each nvme_dev is a PCI function.
>    */
> @@ -97,6 +103,9 @@ struct nvme_dev {
>   	u16 oncs;
>   	u16 abort_limit;
>   	u8 initialized;
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> +	unsigned long hp_flag;
> +#endif
>   };
>
>   /*
>

next prev parent reply	other threads:[~2013-12-30 13:48 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-30 10:27 [PATCH RFC 0/5] NVMe: Hotplug support Santosh Y
2013-12-30 10:27 ` [PATCH RFC 1/5] NVMe: Code cleanup and minor checkpatch correction Santosh Y
2013-12-30 13:32   ` Matthew Wilcox
2013-12-30 14:52   ` Keith Busch
2013-12-30 10:27 ` [PATCH RFC 2/5] NVMe: Basic NVMe device hotplug support Santosh Y
2013-12-30 13:46   ` Matthew Wilcox
2013-12-30 13:48   ` Matias Bjorling [this message]
2013-12-30 14:09   ` Matias Bjorling
2013-12-30 16:06   ` Keith Busch
2013-12-30 17:21   ` Keith Busch
2013-12-31  8:48     ` Ravi Kumar
2013-12-31 13:35   ` Matthew Wilcox
2013-12-31 17:17     ` Matthew Wilcox
2013-12-30 10:27 ` [PATCH RFC 3/5] NVMe: Asynchronous device scan support Santosh Y
2013-12-30 13:50   ` Matthew Wilcox
2013-12-30 15:55     ` Keith Busch
2014-03-28 14:02     ` Santosh Y
2014-03-28 16:29       ` Keith Busch
2014-04-11 13:59       ` Matthew Wilcox
2014-04-13 17:42         ` Matthew Wilcox
2013-12-30 10:27 ` [PATCH RFC 4/5] NVMe: Stale node cleanup based on reference count Santosh Y
2013-12-30 14:00   ` Matthew Wilcox
2013-12-30 10:27 ` [PATCH RFC 5/5] NVMe: Hotplug support during hibernate/sleep states Santosh Y

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52C17998.60806@bjorling.me \
    --to=m@bjorling.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).