* [PATCH 1/4] nvme: Move nvme_freeze/unfreeze_queues to nvme core
2015-12-24 14:26 namespace list locking and ioctl fixes V2 Christoph Hellwig
@ 2015-12-24 14:26 ` Christoph Hellwig
2016-01-04 15:11 ` Keith Busch
2015-12-24 14:27 ` [PATCH 2/4] nvme: synchronize access to ctrl->namespaces Christoph Hellwig
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2015-12-24 14:26 UTC (permalink / raw)
From: Sagi Grimberg <sagig@mellanox.com>
Nothing pci specific about them and We'll need them exported
in other transports too.
Signed-off-by: Sagi Grimberg <sagig at mellanox.com>
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 28 ++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 3 +++
drivers/nvme/host/pci.c | 32 ++------------------------------
3 files changed, 33 insertions(+), 30 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1437ff3..2713005 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1328,6 +1328,34 @@ out:
return ret;
}
+void nvme_freeze_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ blk_mq_freeze_queue_start(ns->queue);
+
+ spin_lock_irq(ns->queue->queue_lock);
+ queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+ spin_unlock_irq(ns->queue->queue_lock);
+
+ blk_mq_cancel_requeue_work(ns->queue);
+ blk_mq_stop_hw_queues(ns->queue);
+ }
+}
+
+void nvme_unfreeze_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+ blk_mq_unfreeze_queue(ns->queue);
+ blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_kick_requeue_list(ns->queue);
+ }
+}
+
int __init nvme_core_init(void)
{
int result;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d88cf45..0da6747 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -237,6 +237,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl);
void nvme_scan_namespaces(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
+void nvme_freeze_queues(struct nvme_ctrl *ctrl);
+void nvme_unfreeze_queues(struct nvme_ctrl *ctrl);
+
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags);
void nvme_requeue_req(struct request *req);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b82bbea..a7e5499 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1903,34 +1903,6 @@ static void nvme_dev_list_remove(struct nvme_dev *dev)
kthread_stop(tmp);
}
-static void nvme_freeze_queues(struct nvme_dev *dev)
-{
- struct nvme_ns *ns;
-
- list_for_each_entry(ns, &dev->ctrl.namespaces, list) {
- blk_mq_freeze_queue_start(ns->queue);
-
- spin_lock_irq(ns->queue->queue_lock);
- queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
- spin_unlock_irq(ns->queue->queue_lock);
-
- blk_mq_cancel_requeue_work(ns->queue);
- blk_mq_stop_hw_queues(ns->queue);
- }
-}
-
-static void nvme_unfreeze_queues(struct nvme_dev *dev)
-{
- struct nvme_ns *ns;
-
- list_for_each_entry(ns, &dev->ctrl.namespaces, list) {
- queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
- blk_mq_unfreeze_queue(ns->queue);
- blk_mq_start_stopped_hw_queues(ns->queue, true);
- blk_mq_kick_requeue_list(ns->queue);
- }
-}
-
static void nvme_dev_shutdown(struct nvme_dev *dev)
{
int i;
@@ -1940,7 +1912,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
mutex_lock(&dev->shutdown_lock);
if (dev->bar) {
- nvme_freeze_queues(dev);
+ nvme_freeze_queues(&dev->ctrl);
csts = readl(dev->bar + NVME_REG_CSTS);
}
if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
@@ -2049,7 +2021,7 @@ static void nvme_reset_work(struct work_struct *work)
dev_warn(dev->dev, "IO queues not created\n");
nvme_remove_namespaces(&dev->ctrl);
} else {
- nvme_unfreeze_queues(dev);
+ nvme_unfreeze_queues(&dev->ctrl);
nvme_dev_add(dev);
}
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 2/4] nvme: synchronize access to ctrl->namespaces
2015-12-24 14:26 namespace list locking and ioctl fixes V2 Christoph Hellwig
2015-12-24 14:26 ` [PATCH 1/4] nvme: Move nvme_freeze/unfreeze_queues to nvme core Christoph Hellwig
@ 2015-12-24 14:27 ` Christoph Hellwig
2016-01-04 15:10 ` Keith Busch
2015-12-24 14:27 ` [PATCH 3/4] nvme: fixes for NVME_IOCTL_IO_CMD on the char device Christoph Hellwig
2015-12-24 14:27 ` [PATCH 4/4] nvme: make SG_IO support optional Christoph Hellwig
3 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2015-12-24 14:27 UTC (permalink / raw)
Currently traversal and modification of ctrl->namespaces happens completely
unsynchronized, which can be fixed by the addition of a simple mutex.
Note: nvme_dev_ioctl will be handled in the next patch.
Signed-off-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
---
drivers/nvme/host/core.c | 17 +++++++++++++++++
drivers/nvme/host/nvme.h | 1 +
2 files changed, 18 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2713005..a928ad5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1034,6 +1034,8 @@ static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->ns_id == nsid)
return ns;
@@ -1049,6 +1051,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
struct gendisk *disk;
int node = dev_to_node(ctrl->dev);
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
return;
@@ -1118,6 +1122,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
bool kill = nvme_io_incapable(ns->ctrl) &&
!blk_queue_dying(ns->queue);
+ lockdep_assert_held(&ns->ctrl->namespaces_mutex);
+
if (kill)
blk_set_queue_dying(ns->queue);
if (ns->disk->flags & GENHD_FL_UP) {
@@ -1188,6 +1194,8 @@ static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
struct nvme_ns *ns, *next;
unsigned i;
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
for (i = 1; i <= nn; i++)
nvme_validate_ns(ctrl, i);
@@ -1205,6 +1213,7 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
if (nvme_identify_ctrl(ctrl, &id))
return;
+ mutex_lock(&ctrl->namespaces_mutex);
nn = le32_to_cpu(id->nn);
if (ctrl->vs >= NVME_VS(1, 1) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
@@ -1214,6 +1223,7 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
__nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
done:
list_sort(NULL, &ctrl->namespaces, ns_cmp);
+ mutex_unlock(&ctrl->namespaces_mutex);
kfree(id);
}
@@ -1221,8 +1231,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns, *next;
+ mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
nvme_ns_remove(ns);
+ mutex_unlock(&ctrl->namespaces_mutex);
}
static DEFINE_IDA(nvme_instance_ida);
@@ -1290,6 +1302,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
int ret;
INIT_LIST_HEAD(&ctrl->namespaces);
+ mutex_init(&ctrl->namespaces_mutex);
kref_init(&ctrl->kref);
ctrl->dev = dev;
ctrl->ops = ops;
@@ -1332,6 +1345,7 @@ void nvme_freeze_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
blk_mq_freeze_queue_start(ns->queue);
@@ -1342,18 +1356,21 @@ void nvme_freeze_queues(struct nvme_ctrl *ctrl)
blk_mq_cancel_requeue_work(ns->queue);
blk_mq_stop_hw_queues(ns->queue);
}
+ mutex_unlock(&ctrl->namespaces_mutex);
}
void nvme_unfreeze_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
+ mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
blk_mq_unfreeze_queue(ns->queue);
blk_mq_start_stopped_hw_queues(ns->queue, true);
blk_mq_kick_requeue_list(ns->queue);
}
+ mutex_unlock(&ctrl->namespaces_mutex);
}
int __init nvme_core_init(void)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0da6747..4437592 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -69,6 +69,7 @@ struct nvme_ctrl {
int instance;
struct blk_mq_tag_set *tagset;
struct list_head namespaces;
+ struct mutex namespaces_mutex;
struct device *device; /* char device */
struct list_head node;
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 3/4] nvme: fixes for NVME_IOCTL_IO_CMD on the char device
2015-12-24 14:26 namespace list locking and ioctl fixes V2 Christoph Hellwig
2015-12-24 14:26 ` [PATCH 1/4] nvme: Move nvme_freeze/unfreeze_queues to nvme core Christoph Hellwig
2015-12-24 14:27 ` [PATCH 2/4] nvme: synchronize access to ctrl->namespaces Christoph Hellwig
@ 2015-12-24 14:27 ` Christoph Hellwig
2016-01-04 15:08 ` Keith Busch
2015-12-24 14:27 ` [PATCH 4/4] nvme: make SG_IO support optional Christoph Hellwig
3 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2015-12-24 14:27 UTC (permalink / raw)
Make sure we synchronize access to the namespaces list and grab a reference
to the namespace before doing I/O. Make sure to reject the ioctl if multiple
namespaces are present as it's entirely unsafe, and warn when using it even
with a single namespace.
Signed-off-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
---
drivers/nvme/host/core.c | 39 ++++++++++++++++++++++++++++++++++-----
1 file changed, 34 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a928ad5..51f6fc8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -922,21 +922,50 @@ static int nvme_dev_release(struct inode *inode, struct file *file)
return 0;
}
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+{
+ struct nvme_ns *ns;
+ int ret;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ if (list_empty(&ctrl->namespaces)) {
+ ret = -ENOTTY;
+ goto out_unlock;
+ }
+
+ ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+ if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
+ dev_warn(ctrl->dev,
+ "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ dev_warn(ctrl->dev,
+ "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
+ kref_get(&ns->kref);
+ mutex_unlock(&ctrl->namespaces_mutex);
+
+ ret = nvme_user_cmd(ctrl, ns, argp);
+ nvme_put_ns(ns);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&ctrl->namespaces_mutex);
+ return ret;
+}
+
static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct nvme_ctrl *ctrl = file->private_data;
void __user *argp = (void __user *)arg;
- struct nvme_ns *ns;
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp);
case NVME_IOCTL_IO_CMD:
- if (list_empty(&ctrl->namespaces))
- return -ENOTTY;
- ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
- return nvme_user_cmd(ctrl, ns, argp);
+ return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
dev_warn(ctrl->dev, "resetting controller\n");
return ctrl->ops->reset_ctrl(ctrl);
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 3/4] nvme: fixes for NVME_IOCTL_IO_CMD on the char device
2015-12-24 14:27 ` [PATCH 3/4] nvme: fixes for NVME_IOCTL_IO_CMD on the char device Christoph Hellwig
@ 2016-01-04 15:08 ` Keith Busch
0 siblings, 0 replies; 11+ messages in thread
From: Keith Busch @ 2016-01-04 15:08 UTC (permalink / raw)
On Thu, Dec 24, 2015@03:27:01PM +0100, Christoph Hellwig wrote:
> Make sure we synchronize access to the namespaces list and grab a reference
> to the namespace before doing I/O. Make sure to reject the ioctl if multiple
> namespaces are present as it's entirely unsafe, and warn when using it even
> with a single namespace.
>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
Okay, I accept the argument for allowing these to "hidden" namespaces
is not sufficiently justified.
Acked-by: Keith Busch <keith.busch at intel.com>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 4/4] nvme: make SG_IO support optional
2015-12-24 14:26 namespace list locking and ioctl fixes V2 Christoph Hellwig
` (2 preceding siblings ...)
2015-12-24 14:27 ` [PATCH 3/4] nvme: fixes for NVME_IOCTL_IO_CMD on the char device Christoph Hellwig
@ 2015-12-24 14:27 ` Christoph Hellwig
2016-01-03 16:36 ` Sagi Grimberg
2016-01-04 15:19 ` Keith Busch
3 siblings, 2 replies; 11+ messages in thread
From: Christoph Hellwig @ 2015-12-24 14:27 UTC (permalink / raw)
Translation SCSI commands to NVMe commands is rather pointless in general
as applications must not expext to be able to use SCSI commands on a
generic block device.
Make the huge translation layer optional and hope no one will ever enable
it in the future.
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/Kconfig | 11 +++++++++++
drivers/nvme/host/Makefile | 3 ++-
drivers/nvme/host/core.c | 2 ++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 002a94a..5d62373 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -8,3 +8,14 @@ config BLK_DEV_NVME
To compile this driver as a module, choose M here: the
module will be called nvme.
+
+config BLK_DEV_NVME_SCSI
+ bool "SCSI emulation for NVMe device nodes"
+ depends on BLK_DEV_NVME
+ ---help---
+ This adds support for the SG_IO ioctl on the NVMe character
+ and block devices nodes, as well a a translation for a small
+ number of selected SCSI commands to NVMe commands to the NVMe
+ driver. If you don't know what this means you probably want
+ to say N here, and if you know what it means you probably
+ want to say N as well.
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 3e26dc9..baf9f52 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,4 +1,5 @@
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
-nvme-y += core.o pci.o scsi.o lightnvm.o
+nvme-y += core.o pci.o lightnvm.o
+nvme-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 51f6fc8..8da4a8a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -467,10 +467,12 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, (void __user *)arg);
+#ifdef CONFIG_BLK_DEV_NVME_SCSI
case SG_GET_VERSION_NUM:
return nvme_sg_get_version_num((void __user *)arg);
case SG_IO:
return nvme_sg_io(ns, (void __user *)arg);
+#endif
default:
return -ENOTTY;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 4/4] nvme: make SG_IO support optional
2015-12-24 14:27 ` [PATCH 4/4] nvme: make SG_IO support optional Christoph Hellwig
@ 2016-01-03 16:36 ` Sagi Grimberg
2016-01-04 15:19 ` Keith Busch
1 sibling, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2016-01-03 16:36 UTC (permalink / raw)
To complete the set, looks fine,
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 4/4] nvme: make SG_IO support optional
2015-12-24 14:27 ` [PATCH 4/4] nvme: make SG_IO support optional Christoph Hellwig
2016-01-03 16:36 ` Sagi Grimberg
@ 2016-01-04 15:19 ` Keith Busch
2016-01-04 16:41 ` Christoph Hellwig
1 sibling, 1 reply; 11+ messages in thread
From: Keith Busch @ 2016-01-04 15:19 UTC (permalink / raw)
On Thu, Dec 24, 2015@03:27:02PM +0100, Christoph Hellwig wrote:
> +config BLK_DEV_NVME_SCSI
> + bool "SCSI emulation for NVMe device nodes"
> + depends on BLK_DEV_NVME
Can we make this "default y"? I strongly advocate for native NVMe over
SCSI translations too, but I know of users relying on this. It's been
built in for years and not aware of it breaking anything, so think it
should be opt-out.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 4/4] nvme: make SG_IO support optional
2016-01-04 15:19 ` Keith Busch
@ 2016-01-04 16:41 ` Christoph Hellwig
0 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2016-01-04 16:41 UTC (permalink / raw)
On Mon, Jan 04, 2016@03:19:25PM +0000, Keith Busch wrote:
> On Thu, Dec 24, 2015@03:27:02PM +0100, Christoph Hellwig wrote:
> > +config BLK_DEV_NVME_SCSI
> > + bool "SCSI emulation for NVMe device nodes"
> > + depends on BLK_DEV_NVME
>
> Can we make this "default y"? I strongly advocate for native NVMe over
> SCSI translations too, but I know of users relying on this. It's been
> built in for years and not aware of it breaking anything, so think it
> should be opt-out.
I'd really prefer not having to carry it around. But if any actual
breakage shows I'd be happy to default it to y ASAP.
^ permalink raw reply [flat|nested] 11+ messages in thread