From: willy@linux.intel.com (Matthew Wilcox)
Subject: [PATCH RFC 2/5] NVMe: Basic NVMe device hotplug support
Date: Mon, 30 Dec 2013 08:46:07 -0500 [thread overview]
Message-ID: <20131230134607.GH4945@linux.intel.com> (raw)
In-Reply-To: <1388399240-13828-3-git-send-email-santoshsy@gmail.com>
On Mon, Dec 30, 2013@03:57:17PM +0530, Santosh Y wrote:
> +config BLK_DEV_NVME_HP
> + bool "Enable hotplug support"
> + depends on BLK_DEV_NVME && HOTPLUG_PCI_PCIE
> + default n
> + help
> + If you say Y here, the driver will support hotplug feature.
> + Hotplug only works if the PCIe slot is hotplug capable.
> +
No. There is no such thing as "enable hotplug support". All devices
are at least theoretically hotpluggable, and I'm not papering over bugs
with this kind of config option.
> @@ -383,10 +403,19 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
> nvme_end_io_acct(bio, iod->start_time);
> }
> nvme_free_iod(dev, iod);
> - if (status)
> - bio_endio(bio, -EIO);
> - else
> + if (status) {
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + if ((status & 0xff) == NVME_SC_INVALID_NS) {
> + bio_endio(bio, -ENODEV);
> + } else if ((status & 0xff) == NVME_SC_NS_NOT_READY) {
> + bio->bi_rw |= REQ_FAILFAST_DRIVER;
Umm.
__REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
You seem to be using this to mean the exact opposite, "Do a retry".
> + nvme_requeue_bio(dev, bio);
> + } else
> +#endif
> + bio_endio(bio, -EIO);
> + } else {
> bio_endio(bio, 0);
> + }
> }
>
> /* length is in bytes. gfp flags indicates whether we may sleep. */
> @@ -722,6 +751,10 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
> if ((bio->bi_rw & REQ_FLUSH) && !psegs)
> return nvme_submit_flush(nvmeq, ns, cmdid);
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + if (bio->bi_rw & REQ_FAILFAST_DRIVER)
> + mdelay(100);
> +#endif
> control = 0;
> if (bio->bi_rw & REQ_FUA)
> control |= NVME_RW_FUA;
> @@ -814,10 +847,26 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
>
> static void nvme_make_request(struct request_queue *q, struct bio *bio)
> {
> - struct nvme_ns *ns = q->queuedata;
> - struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
> + struct nvme_ns *ns = NULL;
> + struct nvme_queue *nvmeq = NULL;
> int result = -EBUSY;
>
> + if (likely(q && q->queuedata))
> + ns = q->queuedata;
> + if (unlikely(!ns)) {
> + bio_endio(bio, -ENODEV);
> + return;
> + }
This confuses me. You just checked that q->queuedata was != NULL, now
you're checking that it still wasn't NULL with no barriers or anything
between. What are you trying to guard against here?
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + if (test_bit(NVME_HOT_REM, &ns->dev->hp_flag) ||
> + !(bio->bi_bdev->bd_disk->flags & GENHD_FL_UP)) {
> + bio_endio(bio, -ENODEV);
> + return;
> + }
> +#endif
> + nvmeq = get_nvmeq(ns->dev);
> +
> if (!nvmeq) {
> put_nvmeq(NULL);
> bio_endio(bio, -EIO);
> @@ -1120,6 +1169,12 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
> .status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
> };
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + if (test_bit(NVME_HOT_REM, &nvmeq->dev->hp_flag)) {
> + cqe.status |= (NVME_SC_INVALID_NS << 1);
> + info[cmdid].timeout = jiffies - NVME_IO_TIMEOUT;
> + }
> +#endif
> if (timeout && !time_after(now, info[cmdid].timeout))
> continue;
> if (info[cmdid].ctx == CMD_CTX_CANCELLED)
> @@ -1205,7 +1260,7 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
>
> /* Don't tell the adapter to delete the admin queue.
> * Don't tell a removed adapter to delete IO queues. */
> - if (qid && readl(&dev->bar->csts) != -1) {
> + if (qid && !nvme_check_surprise_removal(dev)) {
This isn't really "check surprise removal", it's "nvme_is_present(dev)".
> adapter_delete_sq(dev, qid);
> adapter_delete_cq(dev, qid);
> }
> @@ -1724,6 +1779,13 @@ static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
> struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
> struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + if (test_bit(NVME_HOT_REM, &ns->dev->hp_flag) ||
> + !(bio->bi_bdev->bd_disk->flags & GENHD_FL_UP)) {
> + bio_endio(bio, -ENODEV);
> + continue;
> + }
> +#endif
> if (bio_list_empty(&nvmeq->sq_cong))
> remove_wait_queue(&nvmeq->sq_full,
> &nvmeq->sq_cong_wait);
> @@ -1746,7 +1808,8 @@ static int nvme_kthread(void *data)
> spin_lock(&dev_list_lock);
> list_for_each_entry_safe(dev, next, &dev_list, node) {
> int i;
> - if (readl(&dev->bar->csts) & NVME_CSTS_CFS &&
> + if (!nvme_check_surprise_removal(dev) &&
> + readl(&dev->bar->csts) & NVME_CSTS_CFS &&
> dev->initialized) {
> if (work_busy(&dev->reset_work))
> continue;
> @@ -2082,7 +2145,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
> dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
> if (!dev->bar)
> goto disable;
> - if (readl(&dev->bar->csts) == -1) {
> + if (nvme_check_surprise_removal(dev)) {
> result = -ENODEV;
> goto unmap;
> }
> @@ -2265,7 +2328,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
> list_del_init(&dev->node);
> spin_unlock(&dev_list_lock);
>
> - if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
> + if (!dev->bar || (dev->bar && nvme_check_surprise_removal(dev))) {
> for (i = dev->queue_count - 1; i >= 0; i--) {
> struct nvme_queue *nvmeq = dev->queues[i];
> nvme_suspend_queue(nvmeq);
> @@ -2534,6 +2597,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>
> dev->initialized = 1;
> kref_init(&dev->kref);
> +
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + if (!pdev->is_added)
> + dev_info(&pdev->dev,
> + "Device 0x%x is on-line\n", pdev->device);
> +#endif
> return 0;
>
> remove:
> @@ -2556,6 +2625,16 @@ static void nvme_remove(struct pci_dev *pdev)
> {
> struct nvme_dev *dev = pci_get_drvdata(pdev);
>
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + if (!pdev || !dev)
> + return;
!pdev can't possibly happen, or we crashed on the previous line.
> + if (nvme_check_surprise_removal(dev)) {
> + set_bit(NVME_HOT_REM, &dev->hp_flag);
> + dev_info(&pdev->dev,
> + "Surprise removal of device 0x%x\n", pdev->device);
> + }
> + pci_dev_get(pdev);
> +#endif
> pci_set_drvdata(pdev, NULL);
> flush_work(&dev->reset_work);
> misc_deregister(&dev->miscdev);
> @@ -2565,6 +2644,9 @@ static void nvme_remove(struct pci_dev *pdev)
> nvme_release_instance(dev);
> nvme_release_prp_pools(dev);
> kref_put(&dev->kref, nvme_free_dev);
> +#ifdef CONFIG_BLK_DEV_NVME_HP
> + pci_dev_put(pdev);
I'm pretty sure you don't need to bump the refcount up and down during
this function. Look at the caller, pci_stop_dev(). That dereferences
the pci_dev after calling ->remove. Any bug you could possibly fix by
playing with the reference count here is only going to be hit there when
it sets is_added to 0.
next prev parent reply other threads:[~2013-12-30 13:46 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-30 10:27 [PATCH RFC 0/5] NVMe: Hotplug support Santosh Y
2013-12-30 10:27 ` [PATCH RFC 1/5] NVMe: Code cleanup and minor checkpatch correction Santosh Y
2013-12-30 13:32 ` Matthew Wilcox
2013-12-30 14:52 ` Keith Busch
2013-12-30 10:27 ` [PATCH RFC 2/5] NVMe: Basic NVMe device hotplug support Santosh Y
2013-12-30 13:46 ` Matthew Wilcox [this message]
2013-12-30 13:48 ` Matias Bjorling
2013-12-30 14:09 ` Matias Bjorling
2013-12-30 16:06 ` Keith Busch
2013-12-30 17:21 ` Keith Busch
2013-12-31 8:48 ` Ravi Kumar
2013-12-31 13:35 ` Matthew Wilcox
2013-12-31 17:17 ` Matthew Wilcox
2013-12-30 10:27 ` [PATCH RFC 3/5] NVMe: Asynchronous device scan support Santosh Y
2013-12-30 13:50 ` Matthew Wilcox
2013-12-30 15:55 ` Keith Busch
2014-03-28 14:02 ` Santosh Y
2014-03-28 16:29 ` Keith Busch
2014-04-11 13:59 ` Matthew Wilcox
2014-04-13 17:42 ` Matthew Wilcox
2013-12-30 10:27 ` [PATCH RFC 4/5] NVMe: Stale node cleanup based on reference count Santosh Y
2013-12-30 14:00 ` Matthew Wilcox
2013-12-30 10:27 ` [PATCH RFC 5/5] NVMe: Hotplug support during hibernate/sleep states Santosh Y
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20131230134607.GH4945@linux.intel.com \
--to=willy@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).