* [PATCH 1/8] nvme: move set_queue_count to common code
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-07 17:00 ` [PATCH 2/8] nvme: move enable/disable/shutdown_ctrl " Christoph Hellwig
` (7 subsequent siblings)
8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 17 +++++++++++++++++
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 19 +------------------
3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3600a0c..a63a71a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -334,6 +334,23 @@ int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
return error;
}
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int count)
+{
+ int status;
+ u32 result;
+ u32 q_count = (count - 1) | ((count - 1) << 16);
+
+ status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
+ &result);
+ if (status < 0)
+ return status;
+ if (status > 0) {
+ dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+ return 0;
+ }
+ return min(result & 0xffff, result >> 16) + 1;
+}
+
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_user_io io;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d72bbe0..1692fda 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -227,6 +227,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
dma_addr_t dma_addr, u32 *result);
int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int count);
extern spinlock_t dev_list_lock;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2ef408f..89d2442 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1464,23 +1464,6 @@ static void nvme_create_io_queues(struct nvme_dev *dev)
}
}
-static int set_queue_count(struct nvme_dev *dev, int count)
-{
- int status;
- u32 result;
- u32 q_count = (count - 1) | ((count - 1) << 16);
-
- status = nvme_set_features(&dev->ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
- &result);
- if (status < 0)
- return status;
- if (status > 0) {
- dev_err(dev->dev, "Could not set queue count (%d)\n", status);
- return 0;
- }
- return min(result & 0xffff, result >> 16) + 1;
-}
-
static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
{
u64 szu, size, offset;
@@ -1545,7 +1528,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
int result, i, vecs, nr_io_queues, size;
nr_io_queues = num_possible_cpus();
- result = set_queue_count(dev, nr_io_queues);
+ result = nvme_set_queue_count(&dev->ctrl, nr_io_queues);
if (result <= 0)
return result;
if (result < nr_io_queues)
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 2/8] nvme: move enable/disable/shutdown_ctrl to common code
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
2015-11-07 17:00 ` [PATCH 1/8] nvme: move set_queue_count to common code Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-08 16:39 ` Sagi Grimberg
2015-11-07 17:00 ` [PATCH 3/8] nvme: move CC setup into nvme_enable_ctrl Christoph Hellwig
` (6 subsequent siblings)
8 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 87 ++++++++++++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 4 +++
drivers/nvme/host/pci.c | 94 +++++-------------------------------------------
3 files changed, 100 insertions(+), 85 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a63a71a..c061a6a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -14,6 +14,7 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
+#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/hdreg.h>
#include <linux/kernel.h>
@@ -32,6 +33,10 @@
#define NVME_MINORS (1U << MINORBITS)
+static unsigned char shutdown_timeout = 5;
+module_param(shutdown_timeout, byte, 0644);
+MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
+
static int nvme_major;
module_param(nvme_major, int, 0);
@@ -351,6 +356,88 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int count)
return min(result & 0xffff, result >> 16) + 1;
}
+static int nvme_update_ctrl_config(struct nvme_ctrl *ctrl, u64 cap,
+ bool enabled)
+{
+ unsigned long timeout =
+ ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+ u32 bit = enabled ? NVME_CSTS_RDY : 0, csts;
+ int error;
+
+ error = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (error)
+ return error;
+
+ while (!(error = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))) {
+ if ((csts & NVME_CSTS_RDY) == bit)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ dev_err(ctrl->dev,
+ "Controller not ready; aborting %s\n", enabled ?
+ "initialisation" : "reset");
+ return -ENODEV;
+ }
+ }
+
+ return error;
+}
+
+/*
+ * If the device has been passed off to us in an enabled state, just clear
+ * the enabled bit. The spec says we should set the 'shutdown notification
+ * bits', but doing so may cause the device to complete commands to the
+ * admin queue ... and we don't know what memory that might be pointing at!
+ */
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+
+ return nvme_update_ctrl_config(ctrl, cap, false);
+}
+
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config |= NVME_CC_ENABLE;
+
+ return nvme_update_ctrl_config(ctrl, cap, true);
+}
+
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
+{
+ unsigned long timeout = (shutdown_timeout * HZ) + jiffies;
+ u32 csts;
+ int error;
+
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+
+ error = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (error)
+ return error;
+
+ while (!(error = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))) {
+ if ((csts & NVME_CSTS_SHST_MASK) != NVME_CSTS_SHST_CMPLT)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ dev_err(ctrl->dev,
+ "Device shutdown incomplete; abort shutdown\n");
+ return -ENODEV;
+ }
+ }
+
+ return error;
+}
+
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_user_io io;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1692fda..426cafc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -52,6 +52,7 @@ struct nvme_ctrl {
char serial[20];
char model[40];
char firmware_rev[8];
+ u32 ctrl_config;
u32 max_hw_sectors;
u32 stripe_size;
u32 page_size;
@@ -228,6 +229,9 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int count);
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
extern spinlock_t dev_list_lock;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 89d2442..d50a2b7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -48,7 +48,6 @@
#define NVME_NR_AEN_COMMANDS 1
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
-#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
@@ -58,10 +57,6 @@ unsigned char nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-static unsigned char shutdown_timeout = 5;
-module_param(shutdown_timeout, byte, 0644);
-MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
@@ -105,7 +100,6 @@ struct nvme_dev {
unsigned max_qid;
int q_depth;
u32 db_stride;
- u32 ctrl_config;
struct msix_entry *entry;
void __iomem *bar;
struct work_struct reset_work;
@@ -1192,77 +1186,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
return result;
}
-static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
-{
- unsigned long timeout;
- u32 bit = enabled ? NVME_CSTS_RDY : 0;
-
- timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
-
- while ((readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_RDY) != bit) {
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(dev->dev,
- "Device not ready; aborting %s\n", enabled ?
- "initialisation" : "reset");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
-/*
- * If the device has been passed off to us in an enabled state, just clear
- * the enabled bit. The spec says we should set the 'shutdown notification
- * bits', but doing so may cause the device to complete commands to the
- * admin queue ... and we don't know what memory that might be pointing at!
- */
-static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap)
-{
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config &= ~NVME_CC_ENABLE;
- writel(dev->ctrl_config, dev->bar + NVME_REG_CC);
-
- return nvme_wait_ready(dev, cap, false);
-}
-
-static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
-{
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config |= NVME_CC_ENABLE;
- writel(dev->ctrl_config, dev->bar + NVME_REG_CC);
-
- return nvme_wait_ready(dev, cap, true);
-}
-
-static int nvme_shutdown_ctrl(struct nvme_dev *dev)
-{
- unsigned long timeout;
-
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config |= NVME_CC_SHN_NORMAL;
-
- writel(dev->ctrl_config, dev->bar + NVME_REG_CC);
-
- timeout = SHUTDOWN_TIMEOUT + jiffies;
- while ((readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_SHST_MASK) !=
- NVME_CSTS_SHST_CMPLT) {
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(dev->dev,
- "Device shutdown incomplete; abort shutdown\n");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
static struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
.complete = nvme_complete_rq,
@@ -1353,7 +1276,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(dev, cap);
+ result = nvme_disable_ctrl(&dev->ctrl, cap);
if (result < 0)
return result;
@@ -1369,16 +1292,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
dev->page_size = 1 << page_shift;
- dev->ctrl_config = NVME_CC_CSS_NVM;
- dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
- dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
- dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+ dev->ctrl.ctrl_config = NVME_CC_CSS_NVM;
+ dev->ctrl.ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
+ dev->ctrl.ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ dev->ctrl.ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
writel(aqa, dev->bar + NVME_REG_AQA);
writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(dev, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, cap);
if (result)
goto free_nvmeq;
@@ -1764,7 +1687,8 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
* queues than admin tags.
*/
set_current_state(TASK_RUNNING);
- nvme_disable_ctrl(dev, readq(dev->bar + NVME_REG_CAP));
+ nvme_disable_ctrl(&dev->ctrl,
+ readq(dev->bar + NVME_REG_CAP));
nvme_clear_queue(dev->queues[0]);
flush_kthread_worker(dq->worker);
nvme_disable_queue(dev, 0);
@@ -1977,7 +1901,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
}
} else {
nvme_disable_io_queues(dev);
- nvme_shutdown_ctrl(dev);
+ nvme_shutdown_ctrl(&dev->ctrl);
nvme_disable_queue(dev, 0);
}
nvme_dev_unmap(dev);
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 3/8] nvme: move CC setup into nvme_enable_ctrl
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
2015-11-07 17:00 ` [PATCH 1/8] nvme: move set_queue_count to common code Christoph Hellwig
2015-11-07 17:00 ` [PATCH 2/8] nvme: move enable/disable/shutdown_ctrl " Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-07 17:00 ` [PATCH 4/8] nvme: move the timeout module paramters to common code Christoph Hellwig
` (5 subsequent siblings)
8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 7 +++++--
drivers/nvme/host/nvme.h | 2 +-
drivers/nvme/host/pci.c | 7 +------
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c061a6a..2254ce9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -400,9 +400,12 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
return nvme_update_ctrl_config(ctrl, cap, false);
}
-int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap, unsigned page_shift)
{
- ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config = NVME_CC_CSS_NVM;
+ ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
+ ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
ctrl->ctrl_config |= NVME_CC_ENABLE;
return nvme_update_ctrl_config(ctrl, cap, true);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 426cafc..8cf6fa5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -230,7 +230,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int count);
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
-int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap, unsigned page_shift);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
extern spinlock_t dev_list_lock;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d50a2b7..45255e3 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1292,16 +1292,11 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
dev->page_size = 1 << page_shift;
- dev->ctrl.ctrl_config = NVME_CC_CSS_NVM;
- dev->ctrl.ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
- dev->ctrl.ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
- dev->ctrl.ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
-
writel(aqa, dev->bar + NVME_REG_AQA);
writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(&dev->ctrl, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, cap, page_shift);
if (result)
goto free_nvmeq;
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 4/8] nvme: move the timeout module paramters to common code
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
` (2 preceding siblings ...)
2015-11-07 17:00 ` [PATCH 3/8] nvme: move CC setup into nvme_enable_ctrl Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-07 17:00 ` [PATCH 5/8] nvme: add segment limitations Christoph Hellwig
` (4 subsequent siblings)
8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 8 ++++++++
drivers/nvme/host/pci.c | 8 --------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2254ce9..6737aaf 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -33,6 +33,14 @@
#define NVME_MINORS (1U << MINORBITS)
+unsigned char admin_timeout = 60;
+module_param(admin_timeout, byte, 0644);
+MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
+
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
+MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
+
static unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 45255e3..91522bb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -49,14 +49,6 @@
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
-unsigned char admin_timeout = 60;
-module_param(admin_timeout, byte, 0644);
-MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
-
-unsigned char nvme_io_timeout = 30;
-module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
-MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 5/8] nvme: add segment limitations
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
` (3 preceding siblings ...)
2015-11-07 17:00 ` [PATCH 4/8] nvme: move the timeout module paramters to common code Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-07 17:00 ` [PATCH 6/8] nvme: export symbols needed for nvme-loop Christoph Hellwig
` (3 subsequent siblings)
8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 11 +++++++++--
drivers/nvme/host/nvme.h | 2 ++
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 6737aaf..14ee152 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -982,9 +982,16 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
if (ctrl->max_hw_sectors) {
blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
- blk_queue_max_segments(ns->queue,
- ((ctrl->max_hw_sectors << 9) / ctrl->page_size) + 1);
+
+ if (!ctrl->max_segments)
+ ctrl->max_segments = ((ctrl->max_hw_sectors << 9) /
+ ctrl->page_size) + 1;
}
+
+ if (ctrl->max_segments)
+ blk_queue_max_segments(ns->queue, ctrl->max_segments);
+ if (ctrl->max_segment_size)
+ blk_queue_max_segment_size(ns->queue, ctrl->max_segment_size);
if (ctrl->stripe_size)
blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8cf6fa5..8377a3f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -55,6 +55,8 @@ struct nvme_ctrl {
u32 ctrl_config;
u32 max_hw_sectors;
u32 stripe_size;
+ u32 max_segments;
+ u32 max_segment_size;
u32 page_size;
u16 oncs;
atomic_t abort_limit;
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 6/8] nvme: export symbols needed for nvme-loop
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
` (4 preceding siblings ...)
2015-11-07 17:00 ` [PATCH 5/8] nvme: add segment limitations Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-07 17:00 ` [PATCH 7/8] nvmet: add a generic NVMe target Christoph Hellwig
` (2 subsequent siblings)
8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/host/core.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 14ee152..400b1ea 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -36,10 +36,12 @@
unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
+EXPORT_SYMBOL_GPL(admin_timeout);
unsigned char nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
+EXPORT_SYMBOL_GPL(nvme_io_timeout);
static unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
@@ -97,6 +99,7 @@ void nvme_requeue_req(struct request *req)
blk_mq_kick_requeue_list(req->q);
spin_unlock_irqrestore(req->q->queue_lock, flags);
}
+EXPORT_SYMBOL_GPL(nvme_requeue_req);
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, bool nowait)
@@ -324,6 +327,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
}
+EXPORT_SYMBOL_GPL(nvme_set_features);
int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
{
@@ -363,6 +367,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int count)
}
return min(result & 0xffff, result >> 16) + 1;
}
+EXPORT_SYMBOL_GPL(nvme_set_queue_count);
static int nvme_update_ctrl_config(struct nvme_ctrl *ctrl, u64 cap,
bool enabled)
@@ -407,6 +412,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
return nvme_update_ctrl_config(ctrl, cap, false);
}
+EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap, unsigned page_shift)
{
@@ -418,6 +424,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap, unsigned page_shift)
return nvme_update_ctrl_config(ctrl, cap, true);
}
+EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
@@ -448,6 +455,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
return error;
}
+EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
@@ -851,6 +859,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
kfree(id);
return 0;
}
+EXPORT_SYMBOL(nvme_init_identify);
static int nvme_dev_open(struct inode *inode, struct file *file)
{
@@ -1071,6 +1080,7 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
__nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
kfree(id);
}
+EXPORT_SYMBOL_GPL(nvme_scan_namespaces);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
{
@@ -1079,6 +1089,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
nvme_ns_remove(ns);
}
+EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
static DEFINE_IDA(nvme_instance_ida);
@@ -1118,6 +1129,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
list_del(&ctrl->node);
spin_unlock(&dev_list_lock);
}
+EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
static void nvme_free_ctrl(struct kref *kref)
{
@@ -1133,6 +1145,7 @@ void nvme_put_ctrl(struct nvme_ctrl *ctrl)
{
kref_put(&ctrl->kref, nvme_free_ctrl);
}
+EXPORT_SYMBOL_GPL(nvme_put_ctrl);
/*
* Initialize a NVMe controller structures. This needs to be called during
@@ -1184,6 +1197,7 @@ out_release_instance:
out:
return ret;
}
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
int __init nvme_core_init(void)
{
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 7/8] nvmet: add a generic NVMe target
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
` (5 preceding siblings ...)
2015-11-07 17:00 ` [PATCH 6/8] nvme: export symbols needed for nvme-loop Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-08 10:26 ` Sagi Grimberg
2015-11-07 17:00 ` [PATCH 8/8] nvme-loop: add a NVMe loopback device Christoph Hellwig
2015-11-16 7:30 ` [RFC] generic NVMe target and NVMe loop driver Nicholas A. Bellinger
8 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/Kconfig | 1 +
drivers/nvme/Makefile | 1 +
drivers/nvme/target/Kconfig | 4 +
drivers/nvme/target/Makefile | 4 +
drivers/nvme/target/admin-cmd.c | 353 +++++++++++++++++++++++++++++++
drivers/nvme/target/configfs.c | 205 ++++++++++++++++++
drivers/nvme/target/core.c | 454 ++++++++++++++++++++++++++++++++++++++++
drivers/nvme/target/io-cmd.c | 114 ++++++++++
drivers/nvme/target/nvmet.h | 172 +++++++++++++++
9 files changed, 1308 insertions(+)
create mode 100644 drivers/nvme/target/Kconfig
create mode 100644 drivers/nvme/target/Makefile
create mode 100644 drivers/nvme/target/admin-cmd.c
create mode 100644 drivers/nvme/target/configfs.c
create mode 100644 drivers/nvme/target/core.c
create mode 100644 drivers/nvme/target/io-cmd.c
create mode 100644 drivers/nvme/target/nvmet.h
diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig
index a39d943..b7c78a5 100644
--- a/drivers/nvme/Kconfig
+++ b/drivers/nvme/Kconfig
@@ -1 +1,2 @@
source "drivers/nvme/host/Kconfig"
+source "drivers/nvme/target/Kconfig"
diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile
index 9421e82..0096a7f 100644
--- a/drivers/nvme/Makefile
+++ b/drivers/nvme/Makefile
@@ -1,2 +1,3 @@
obj-y += host/
+obj-y += target/
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
new file mode 100644
index 0000000..9a3d742
--- /dev/null
+++ b/drivers/nvme/target/Kconfig
@@ -0,0 +1,4 @@
+
+config NVME_TARGET
+ depends on BLOCK
+ tristate
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
new file mode 100644
index 0000000..9c236e4
--- /dev/null
+++ b/drivers/nvme/target/Makefile
@@ -0,0 +1,4 @@
+
+obj-$(CONFIG_NVME_TARGET) += nvmet.o
+
+nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
new file mode 100644
index 0000000..d9db0d4
--- /dev/null
+++ b/drivers/nvme/target/admin-cmd.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+static void nvmet_execute_get_error_log(struct nvmet_req *req)
+{
+ void *buf;
+
+ /*
+ * We currently never set the More bit in the status field,
+ * so all error log entries are invalid and can be zeroed out.
+ * This is called a minum viable implementation (TM) of this
+ * mandatory log page.
+ */
+ buf = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+ memset(buf, 0, req->data_len);
+ kunmap_atomic(buf);
+
+ nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_get_smart_log(struct nvmet_req *req)
+{
+ struct nvme_smart_log *log;
+
+ /*
+ * XXX: fill out actual smart log
+ *
+ * We might have a hard time coming up with useful values for many
+ * of the fields, and even when we have useful data available
+ * (e.g. units or commands read/written) those aren't persistent
+ * over power loss.
+ */
+ log = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+ memset(log, 0, req->data_len);
+ kunmap_atomic(log);
+
+ nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_get_fwslot_log(struct nvmet_req *req)
+{
+ void *buf;
+
+ /*
+ * We only support a single firmware slot which always is active,
+ * so we can zero out the whole firmware slot log and still claim
+ * to fully implement this mandatory log page.
+ */
+ buf = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+ memset(buf, 0, req->data_len);
+ kunmap_atomic(buf);
+
+ nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvme_id_ctrl *id;
+
+ id = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+ memset(id, 0, sizeof(*id));
+
+ /* XXX: figure out how to assign real vendors IDs. */
+ id->vid = 0;
+ id->ssvid = 0;
+
+ /* XXX: figure out real serial / model / revision values */
+ memset(id->sn, ' ', sizeof(id->sn));
+ memset(id->mn, ' ', sizeof(id->mn));
+ memset(id->fr, ' ', sizeof(id->fr));
+ strcpy((char *)id->mn, "Fake NVMe");
+
+ id->rab = 6;
+
+ /* XXX: figure out a real IEEE OUI */
+ id->ieee[0] = 0x00;
+ id->ieee[1] = 0x02;
+ id->ieee[2] = 0xb3;
+
+ /* we may have multiple controllers attached to the subsystem */
+ id->mic = (1 << 1);
+
+ /* no limit on data transfer sizes for now */
+ id->mdts = 0;
+ id->cntlid = cpu_to_le16(ctrl->cntlid);
+ id->ver = cpu_to_le32(ctrl->subsys->ver);
+
+ /* XXX: figure out what to do about RTD3R/RTD3 */
+
+ id->oacs = 0;
+ id->acl = 3;
+ id->aerl = 3;
+
+ /* first slot is read-only, only one slot supported */
+ id->frmw = (1 << 0) | (1 << 1);
+ id->lpa = 1 << 0;
+#define NVMET_ERROR_LOG_SLOTS 128
+ id->elpe = NVMET_ERROR_LOG_SLOTS - 1;
+ id->npss = 0;
+
+ id->sqes = (0x6 << 4) | 0x6;
+ id->cqes = (0x4 << 4) | 0x4;
+ id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
+
+ /* XXX: don't report vwc if the underlying device is write through */
+ id->vwc = NVME_CTRL_VWC_PRESENT;
+
+ /*
+ * We can't support atomic writes bigger than a LBA without support
+ * from the backend device.
+ */
+ id->awun = 0;
+ id->awupf = 0;
+
+ /*
+ * We support SGLs, but nothing fancy.
+ */
+ id->sgls = (1 << 0);
+
+ /*
+ * Meh, we don't really support any power state. Fake up the same
+ * values that qemu does.
+ */
+ id->psd[0].max_power = cpu_to_le16(0x9c4);
+ id->psd[0].entry_lat = cpu_to_le32(0x10);
+ id->psd[0].exit_lat = cpu_to_le32(0x4);
+
+ kunmap_atomic(id);
+
+ nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_identify_ns(struct nvmet_req *req)
+{
+ struct nvmet_ns *ns;
+ struct nvme_id_ns *id;
+ u16 status = 0;
+
+ ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+ if (!ns) {
+ status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ goto out;
+ }
+
+ id = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+ memset(id, 0, sizeof(*id));
+
+ /*
+ * nuse = ncap = nsze isn't aways true, but we have no way to find
+ * that out from the underlying device.
+ */
+ id->ncap = id->nuse = id->nsze =
+ cpu_to_le64(ns->size >> ns->blksize_shift);
+
+ /*
+ * We just provide a single LBA format that matches what the
+ * underlying device reports.
+ */
+ id->nlbaf = 0;
+ id->flbas = 0;
+
+ /*
+ * Our namespace might always be shared. Not just with other
+ * controllers, but also with any other user of the block device.
+ */
+ id->nmic = (1 << 0);
+
+ /* XXX: provide a nguid value! */
+
+ id->lbaf[0].ds = ns->blksize_shift;
+
+ kunmap_atomic(id);
+
+ nvmet_put_namespace(ns);
+out:
+ nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_identify_nslist(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_ns *ns;
+ u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
+ __le32 *list;
+ int i = 0;
+
+ list = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+ rcu_read_lock();
+ list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+ if (ns->nsid <= min_nsid)
+ continue;
+ list[i++] = cpu_to_le32(ns->nsid);
+ if (i == req->data_len / sizeof(__le32))
+ goto out;
+ }
+
+ list[i] = 0;
+out:
+ rcu_read_unlock();
+ kunmap_atomic(list);
+
+ nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_set_features(struct nvmet_req *req)
+{
+ struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+ u16 status = 0;
+
+ switch (cdw10 & 0xf) {
+ case NVME_FEAT_NUM_QUEUES:
+ nvmet_set_result(req,
+ subsys->max_qid | (subsys->max_qid << 16));
+ break;
+ default:
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_get_features(struct nvmet_req *req)
+{
+ struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+ u16 status = 0;
+
+ switch (cdw10 & 0xf) {
+ /*
+ * These features are mandatory in the spec, but we don't
+ * have a useful way to implement them. We'll eventually
+ * need to come up with some fake values for these.
+ */
+#if 0
+ case NVME_FEAT_ARBITRATION:
+ break;
+ case NVME_FEAT_POWER_MGMT:
+ break;
+ case NVME_FEAT_TEMP_THRESH:
+ break;
+ case NVME_FEAT_ERR_RECOVERY:
+ break;
+ case NVME_FEAT_IRQ_COALESCE:
+ break;
+ case NVME_FEAT_IRQ_CONFIG:
+ break;
+ case NVME_FEAT_WRITE_ATOMIC:
+ break;
+ case NVME_FEAT_ASYNC_EVENT:
+ break;
+#endif
+ case NVME_FEAT_VOLATILE_WC:
+ nvmet_set_result(req, 1);
+ break;
+ case NVME_FEAT_NUM_QUEUES:
+ nvmet_set_result(req,
+ subsys->max_qid | (subsys->max_qid << 16));
+ break;
+ default:
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ nvmet_req_complete(req, status);
+}
+
+static inline u32 nvmet_get_log_page_len(struct nvme_command *cmd)
+{
+ u32 cdw10 = cmd->common.cdw10[0];
+
+ return ((cdw10 >> 16) & 0xff) * sizeof(u32);
+}
+
+int nvmet_parse_admin_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+
+ req->ns = NULL;
+
+ switch (cmd->common.opcode) {
+ case nvme_admin_get_log_page:
+ req->data_len = nvmet_get_log_page_len(cmd);
+
+ switch (cmd->common.cdw10[0] & 0xf) {
+ case 0x01:
+ req->execute = nvmet_execute_get_error_log;
+ return 0;
+ case 0x02:
+ req->execute = nvmet_execute_get_smart_log;
+ return 0;
+ case 0x03:
+ req->execute = nvmet_execute_get_fwslot_log;
+ return 0;
+ }
+ break;
+ case nvme_admin_identify:
+ switch (cmd->identify.cns) {
+ case 0x00:
+ req->execute = nvmet_execute_identify_ns;
+ req->data_len = sizeof(struct nvme_id_ns);
+ return 0;
+ case 0x01:
+ req->execute = nvmet_execute_identify_ctrl;
+ req->data_len = sizeof(struct nvme_id_ctrl);
+ return 0;
+ case 0x02:
+ req->execute = nvmet_execute_identify_nslist;
+ req->data_len = 4096;
+ return 0;
+ }
+ break;
+#if 0
+ case nvme_admin_abort_cmd:
+ req->execute = nvmet_execute_abort;
+ req->data_len = 0;
+ return 0;
+#endif
+ case nvme_admin_set_features:
+ req->execute = nvmet_execute_set_features;
+ req->data_len = 0;
+ return 0;
+ case nvme_admin_get_features:
+ req->execute = nvmet_execute_get_features;
+ req->data_len = 0;
+ return 0;
+#if 0
+ case nvme_admin_async_event:
+ req->exectute = nvmet_execute_aen;
+ req->data = 0;
+ return 0;
+#endif
+ }
+
+ pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
new file mode 100644
index 0000000..7dcdc58e
--- /dev/null
+++ b/drivers/nvme/target/configfs.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+
+#include "nvmet.h"
+
+
+CONFIGFS_ATTR_STRUCT(nvmet_ns);
+CONFIGFS_ATTR_OPS(nvmet_ns);
+
+static ssize_t nvmet_ns_device_path_show(struct nvmet_ns *ns, char *page)
+{
+ return sprintf(page, "%s", ns->device_path);
+}
+
+static ssize_t nvmet_ns_device_path_store(struct nvmet_ns *ns, const char *page,
+ size_t count)
+{
+ int ret = nvmet_ns_enable(ns, page);
+
+ return ret ? ret : count;
+}
+
+static struct nvmet_ns_attribute nvmet_ns_attr_device_path = {
+ .attr = {
+ .ca_name = "device_path",
+ .ca_mode = S_IRUSR | S_IWUSR,
+ .ca_owner = THIS_MODULE,
+ },
+ .show = nvmet_ns_device_path_show,
+ .store = nvmet_ns_device_path_store,
+};
+
+static struct configfs_attribute *nvmet_ns_attrs[] = {
+ &nvmet_ns_attr_device_path.attr,
+ NULL,
+};
+
+static void nvmet_ns_release(struct config_item *item)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+
+ nvmet_ns_free(ns);
+}
+
+static struct configfs_item_operations nvmet_ns_item_ops = {
+ .release = nvmet_ns_release,
+ .show_attribute = nvmet_ns_attr_show,
+ .store_attribute = nvmet_ns_attr_store,
+};
+
+static struct config_item_type nvmet_ns_type = {
+ .ct_item_ops = &nvmet_ns_item_ops,
+ .ct_attrs = nvmet_ns_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group *nvmet_ns_make(struct config_group *group,
+ const char *name)
+{
+ struct nvmet_subsys *subsys = namespaces_to_subsys(&group->cg_item);
+ struct nvmet_ns *ns;
+ int ret;
+ u32 nsid;
+
+ ret = kstrtou32(name, 0, &nsid);
+ if (ret)
+ goto out;
+
+ ret = -EINVAL;
+ if (nsid == 0 || nsid == 0xffffffff)
+ goto out;
+
+ ret = -ENOMEM;
+ ns = nvmet_ns_alloc(subsys, nsid);
+ if (!ns)
+ goto out;
+ config_group_init_type_name(&ns->group, name, &nvmet_ns_type);
+
+ pr_info("adding nsid %d to subsystem %s\n", nsid, subsys->subsys_name);
+
+ return &ns->group;
+out:
+ return ERR_PTR(ret);
+}
+
+static struct configfs_group_operations nvmet_namespaces_group_ops = {
+ .make_group = nvmet_ns_make,
+};
+
+static struct config_item_type nvmet_namespaces_type = {
+ .ct_group_ops = &nvmet_namespaces_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_item_type nvmet_controllers_type = {
+ .ct_owner = THIS_MODULE,
+};
+
+static void nvmet_subsys_release(struct config_item *item)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+
+ nvmet_subsys_free(subsys);
+}
+
+static struct configfs_item_operations nvmet_subsys_item_ops = {
+ .release = nvmet_subsys_release,
+};
+
+static struct config_item_type nvmet_subsys_type = {
+ .ct_item_ops = &nvmet_subsys_item_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group *nvmet_subsys_make(struct config_group *group,
+ const char *name)
+{
+ struct nvmet_subsys *subsys;
+
+ subsys = nvmet_subsys_alloc(name);
+ if (!subsys)
+ return ERR_PTR(-ENOMEM);
+
+ config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type);
+
+ config_group_init_type_name(&subsys->namespaces_group,
+ "namespaces", &nvmet_namespaces_type);
+ config_group_init_type_name(&subsys->controllers_group,
+ "controllers", &nvmet_controllers_type);
+
+ subsys->default_groups[0] = &subsys->namespaces_group;
+ subsys->default_groups[1] = &subsys->controllers_group;
+ subsys->default_groups[2] = NULL;
+
+ subsys->group.default_groups = subsys->default_groups;
+ return &subsys->group;
+}
+
+static struct configfs_group_operations nvmet_subsystems_group_ops = {
+ .make_group = nvmet_subsys_make,
+};
+
+static struct config_item_type nvmet_subsystems_type = {
+ .ct_group_ops = &nvmet_subsystems_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+struct config_group nvmet_subsystems_group;
+
+struct config_group *nvmet_root_default_groups[] = {
+ &nvmet_subsystems_group,
+ NULL,
+};
+
+static struct config_item_type nvmet_root_type = {
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem nvmet_configfs_subsystem = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "nvmet",
+ .ci_type = &nvmet_root_type,
+ },
+ .default_groups = nvmet_root_default_groups,
+ },
+};
+
+int __init nvmet_init_configfs(void)
+{
+ int ret;
+
+ config_group_init(&nvmet_configfs_subsystem.su_group);
+ mutex_init(&nvmet_configfs_subsystem.su_mutex);
+
+ config_group_init_type_name(&nvmet_subsystems_group,
+ "subsystems", &nvmet_subsystems_type);
+
+ ret = configfs_register_subsystem(&nvmet_configfs_subsystem);
+ if (ret) {
+ pr_err("configfs_register_subsystem: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+void __exit nvmet_exit_configfs(void)
+{
+ configfs_unregister_subsystem(&nvmet_configfs_subsystem);
+}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
new file mode 100644
index 0000000..5c770bf
--- /dev/null
+++ b/drivers/nvme/target/core.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+static DEFINE_MUTEX(nvmet_subsystem_mutex);
+static LIST_HEAD(nvmet_subsystems);
+
+static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
+ __le32 nsid)
+{
+ struct nvmet_ns *ns;
+
+ list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+ if (ns->nsid == le32_to_cpu(nsid))
+ return ns;
+ }
+
+ return NULL;
+}
+
+struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
+{
+ struct nvmet_ns *ns;
+
+ rcu_read_lock();
+ ns = __nvmet_find_namespace(ctrl, nsid);
+ if (ns && !kref_get_unless_zero(&ns->ref))
+ ns = NULL;
+ rcu_read_unlock();
+
+ return ns;
+}
+
+static void nvmet_destroy_namespace(struct kref *ref)
+{
+ struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
+
+ if (ns->bdev)
+ blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+ kfree(ns->device_path);
+ kfree(ns);
+}
+
+void nvmet_put_namespace(struct nvmet_ns *ns)
+{
+ kref_put(&ns->ref, nvmet_destroy_namespace);
+}
+
+int nvmet_ns_enable(struct nvmet_ns *ns, const char *path)
+{
+ int ret;
+
+ mutex_lock(&ns->subsys->lock);
+ ret = -EBUSY;
+ if (ns->device_path)
+ goto out_unlock;
+
+ ret = -ENOMEM;
+ ns->device_path = kstrdup(path, GFP_KERNEL);
+ if (!ns->device_path)
+ goto out_unlock;
+
+ ns->bdev = blkdev_get_by_path(path, FMODE_READ|FMODE_WRITE, NULL);
+ if (IS_ERR(ns->bdev)) {
+ pr_err("nvmet: failed to open block device %s: (%ld)\n",
+ path, PTR_ERR(ns->bdev));
+ ret = PTR_ERR(ns->bdev);
+ goto out_free_device_path;
+ }
+
+ ns->size = i_size_read(ns->bdev->bd_inode);
+ ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+ if (ns->nsid > ns->subsys->max_nsid)
+ ns->subsys->max_nsid = ns->nsid;
+
+ list_add_rcu(&ns->dev_link, &ns->subsys->namespaces);
+ mutex_unlock(&ns->subsys->lock);
+
+ return 0;
+
+out_free_device_path:
+ kfree(ns->device_path);
+ ns->device_path = NULL;
+out_unlock:
+ mutex_unlock(&ns->subsys->lock);
+ return ret;
+}
+
+void nvmet_ns_free(struct nvmet_ns *ns)
+{
+ struct nvmet_subsys *subsys = ns->subsys;
+
+ mutex_lock(&subsys->lock);
+ if (!list_empty(&ns->dev_link))
+ list_del_init(&ns->dev_link);
+ mutex_unlock(&subsys->lock);
+
+ nvmet_put_namespace(ns);
+}
+
+struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
+{
+ struct nvmet_ns *ns;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return NULL;
+
+ kref_init(&ns->ref);
+ ns->nsid = nsid;
+ ns->subsys = subsys;
+ return ns;
+}
+
+void nvmet_req_complete(struct nvmet_req *req, u16 status)
+{
+ if (status)
+ nvmet_set_status(req, status);
+
+ /* XXX: need to fill in something useful for sq_head */
+ req->rsp->sq_head = 0;
+ req->rsp->sq_id = cpu_to_le16(req->sq->qid);
+ req->rsp->command_id = req->cmd->common.command_id;
+
+ if (req->ns)
+ nvmet_put_namespace(req->ns);
+ req->queue_response(req);
+}
+EXPORT_SYMBOL_GPL(nvmet_req_complete);
+
+void nvmet_cq_init(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
+ u16 qid, u16 size)
+{
+ cq->qid = qid;
+ cq->size = size;
+
+ ctrl->cqs[qid] = cq;
+}
+EXPORT_SYMBOL_GPL(nvmet_cq_init);
+
+void nvmet_sq_init(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
+ u16 qid, u16 size)
+{
+ sq->ctrl = ctrl;
+ sq->qid = qid;
+ sq->size = size;
+
+ ctrl->sqs[qid] = sq;
+}
+EXPORT_SYMBOL_GPL(nvmet_sq_init);
+
+void nvmet_sq_destroy(struct nvmet_sq *sq)
+{
+ if (sq->ctrl)
+ nvmet_ctrl_put(sq->ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
+
+u16 nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
+ struct nvmet_sq *sq,
+ void (*queue_response)(struct nvmet_req *req))
+{
+ u16 status;
+
+ req->cq = cq;
+ req->sq = sq;
+ req->queue_response = queue_response;
+ req->sg = NULL;
+ req->sg_cnt = 0;
+ req->rsp->status = 0;
+
+ if (unlikely(req->sq->qid == 0))
+ status = nvmet_parse_admin_cmd(req);
+ else
+ status = nvmet_parse_io_cmd(req);
+
+ if (status)
+ return status;
+
+ if (unlikely(!req->sq->ctrl)) {
+ pr_err("queue not connected!\n");
+ return NVME_SC_QID_INVALID | NVME_SC_DNR;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_init);
+
+static inline bool nvmet_cc_en(u32 cc)
+{
+ return cc & 0x1;
+}
+
+static inline u8 nvmet_cc_css(u32 cc)
+{
+ return (cc >> 4) & 0x7;
+}
+
+static inline u8 nvmet_cc_mps(u32 cc)
+{
+ return (cc >> 7) & 0xf;
+}
+
+static inline u8 nvmet_cc_ams(u32 cc)
+{
+ return (cc >> 11) & 0x7;
+}
+
+static inline u8 nvmet_cc_shn(u32 cc)
+{
+ return (cc >> 14) & 0x3;
+}
+
+static inline u8 nvmet_cc_iosqes(u32 cc)
+{
+ return (cc >> 16) & 0xf;
+}
+
+static inline u8 nvmet_cc_iocqes(u32 cc)
+{
+ return (cc >> 20) & 0xf;
+}
+
+static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
+{
+#if 0
+ nvmet_cc_iosqes(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
+ nvmet_cc_iosqes(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
+ nvmet_cc_iocqes(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
+ nvmet_cc_iocqes(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
+#endif
+ if (nvmet_cc_mps(ctrl->cc) != 0 ||
+ nvmet_cc_ams(ctrl->cc) != 0 ||
+ nvmet_cc_css(ctrl->cc) != 0) {
+ ctrl->csts = NVME_CSTS_CFS;
+ return;
+ }
+
+ ctrl->csts = NVME_CSTS_RDY;
+}
+
+static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
+{
+ /* XXX: tear down queues? */
+ ctrl->csts &= ~NVME_CSTS_RDY;
+ ctrl->cc = 0;
+}
+
+void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
+{
+ u32 old;
+
+ /* XXX: locking? */
+ old = ctrl->cc;
+ ctrl->cc = new;
+
+ if (nvmet_cc_en(new) && !nvmet_cc_en(old))
+ nvmet_start_ctrl(ctrl);
+ if (!nvmet_cc_en(new) && nvmet_cc_en(old))
+ nvmet_clear_ctrl(ctrl);
+ if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
+ nvmet_clear_ctrl(ctrl);
+ ctrl->csts |= NVME_CSTS_SHST_CMPLT;
+ }
+ if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
+ ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
+}
+EXPORT_SYMBOL_GPL(nvmet_update_cc);
+
+struct nvmet_ctrl *nvmet_ctrl_find_get(struct nvmet_subsys *subsys, u16 cntlid)
+{
+ struct nvmet_ctrl *ctrl;
+
+ lockdep_assert_held(&subsys->lock);
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (ctrl->cntlid == cntlid) {
+ if (kref_get_unless_zero(&ctrl->ref))
+ return ctrl;
+ return NULL;
+ }
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(nvmet_ctrl_find_get);
+
+struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_subsys *subsys,
+ const char *subsys_name)
+{
+ struct nvmet_ctrl *ctrl;
+ int ret = -ENOMEM;
+
+ lockdep_assert_held(&subsys->lock);
+
+ ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+ if (!ctrl)
+ goto out;
+
+ /* command sets supported: NVMe command set: */
+ ctrl->cap |= (1ULL << 37);
+ /* CC.EN timeout in 500msec units: */
+ ctrl->cap |= (15ULL << 24);
+ /* maximum queue entries supported: */
+ ctrl->cap |= NVMET_QUEUE_SIZE - 1;
+
+ memcpy(ctrl->subsys_name, subsys_name, NVMET_SUBSYS_NAME_LEN);
+
+ kref_init(&ctrl->ref);
+ ctrl->subsys = subsys;
+
+ ctrl->cqs = kcalloc(subsys->max_qid + 1,
+ sizeof(struct nvmet_queue *),
+ GFP_KERNEL);
+ if (!ctrl->cqs)
+ goto out_free_ctrl;
+
+ ctrl->sqs = kcalloc(subsys->max_qid + 1,
+ sizeof(struct nvmet_queue *),
+ GFP_KERNEL);
+ if (!ctrl->sqs)
+ goto out_free_cqs;
+
+ ctrl->cntlid = ida_simple_get(&subsys->cntlid_ida, 0, USHRT_MAX - 1,
+ GFP_KERNEL);
+ if (ctrl->cntlid < 0) {
+ ret = ctrl->cntlid;
+ goto out_free_sqs;
+ }
+
+ list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
+ return ctrl;
+
+out_free_sqs:
+ kfree(ctrl->sqs);
+out_free_cqs:
+ kfree(ctrl->cqs);
+out_free_ctrl:
+ kfree(ctrl);
+out:
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
+
+static void nvmet_ctrl_free(struct kref *ref)
+{
+ struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
+ struct nvmet_subsys *subsys = ctrl->subsys;
+
+ mutex_lock(&ctrl->subsys->lock);
+ list_del(&ctrl->subsys_entry);
+ mutex_unlock(&ctrl->subsys->lock);
+
+ mutex_lock(&subsys->lock);
+ ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
+ mutex_unlock(&subsys->lock);
+
+ kfree(ctrl->sqs);
+ kfree(ctrl->cqs);
+ kfree(ctrl);
+}
+
+void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
+{
+ kref_put(&ctrl->ref, nvmet_ctrl_free);
+}
+
+struct nvmet_subsys *nvmet_find_subsys(char *subsys_name)
+{
+ struct nvmet_subsys *subsys;
+
+ mutex_lock(&nvmet_subsystem_mutex);
+ list_for_each_entry(subsys, &nvmet_subsystems, entry) {
+ if (!strncmp(subsys->subsys_name, subsys_name,
+ NVMET_SUBSYS_NAME_LEN)) {
+ /* XXX: need to start refcounting subsystems.. */
+ mutex_unlock(&nvmet_subsystem_mutex);
+ return subsys;
+ }
+ }
+ mutex_unlock(&nvmet_subsystem_mutex);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(nvmet_find_subsys);
+
+struct nvmet_subsys *nvmet_subsys_alloc(const char *subsys_name)
+{
+ struct nvmet_subsys *subsys;
+
+ subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+ if (!subsys)
+ return NULL;
+
+ subsys->subsys_name = kstrndup(subsys_name, NVMET_SUBSYS_NAME_LEN,
+ GFP_KERNEL);
+ if (IS_ERR(subsys->subsys_name)) {
+ kfree(subsys);
+ return NULL;
+ }
+
+ mutex_init(&subsys->lock);
+ INIT_LIST_HEAD(&subsys->namespaces);
+ INIT_LIST_HEAD(&subsys->ctrls);
+
+ ida_init(&subsys->cntlid_ida);
+ subsys->max_qid = NVMET_NR_QUEUES;
+
+ mutex_lock(&nvmet_subsystem_mutex);
+ list_add_tail(&subsys->entry, &nvmet_subsystems);
+ mutex_unlock(&nvmet_subsystem_mutex);
+
+ return subsys;
+}
+
+void nvmet_subsys_free(struct nvmet_subsys *subsys)
+{
+ WARN_ON_ONCE(!list_empty(&subsys->namespaces));
+
+ mutex_lock(&nvmet_subsystem_mutex);
+ list_del(&subsys->entry);
+ mutex_unlock(&nvmet_subsystem_mutex);
+
+ kfree(subsys->subsys_name);
+ kfree(subsys);
+}
+
+static int __init nvmet_init(void)
+{
+ return nvmet_init_configfs();
+}
+
+static void __exit nvmet_exit(void)
+{
+ nvmet_exit_configfs();
+}
+
+module_init(nvmet_init);
+module_exit(nvmet_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
new file mode 100644
index 0000000..2cf1811
--- /dev/null
+++ b/drivers/nvme/target/io-cmd.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+static void nvmet_bio_done(struct bio *bio)
+{
+ nvmet_req_complete(bio->bi_private,
+ bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ bio_put(bio);
+}
+
+static void nvmet_execute_rw(struct nvmet_req *req)
+{
+ int sg_cnt = req->sg_cnt;
+ struct scatterlist *sg;
+ struct bio *bio;
+ sector_t sector;
+ int rw, i;
+
+ if (!req->sg_cnt) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+
+ if (req->cmd->rw.opcode == nvme_cmd_write) {
+ if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+ rw = WRITE_FUA;
+ else
+ rw = WRITE;
+ } else {
+ rw = READ;
+ }
+
+ sector = le64_to_cpu(req->cmd->rw.slba);
+ sector <<= (req->ns->blksize_shift - 9);
+
+ bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+ bio->bi_bdev = req->ns->bdev;
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_private = req;
+ bio->bi_end_io = nvmet_bio_done;
+
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ if (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+ != sg->length) {
+ struct bio *prev = bio;
+
+ bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+ bio->bi_bdev = req->ns->bdev;
+ bio->bi_iter.bi_sector = sector;
+
+ bio_chain(bio, prev);
+ submit_bio(rw, prev);
+ }
+
+ sector += sg->length >> 9;
+ sg_cnt--;
+ }
+
+ submit_bio(rw, bio);
+}
+
+static void nvmet_execute_flush(struct nvmet_req *req)
+{
+ struct bio *bio = bio_alloc(GFP_KERNEL, 0);
+
+ bio->bi_bdev = req->ns->bdev;
+ bio->bi_private = req;
+ bio->bi_end_io = nvmet_bio_done;
+
+ submit_bio(WRITE_FLUSH, bio);
+}
+
+int nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+
+ req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+ if (!req->ns)
+ return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+ switch (cmd->common.opcode) {
+ case nvme_cmd_read:
+ req->execute = nvmet_execute_rw;
+ req->data_len = ((u32)le16_to_cpu(cmd->rw.length) + 1) <<
+ req->ns->blksize_shift;
+ return 0;
+ case nvme_cmd_write:
+ req->execute = nvmet_execute_rw;
+ req->data_len = ((u32)le16_to_cpu(cmd->rw.length) + 1) <<
+ req->ns->blksize_shift;
+ return 0;
+ case nvme_cmd_flush:
+ req->execute = nvmet_execute_flush;
+ req->data_len = 0;
+ return 0;
+ default:
+ pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
new file mode 100644
index 0000000..9335584
--- /dev/null
+++ b/drivers/nvme/target/nvmet.h
@@ -0,0 +1,172 @@
+#ifndef _NVME_CMD_H
+#define _NVME_CMD_H
+
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/nvme.h>
+#include <linux/configfs.h>
+#include <linux/rcupdate.h>
+
+struct nvmet_ns {
+ struct list_head dev_link;
+ struct kref ref;
+ struct block_device *bdev;
+ u32 nsid;
+ u32 blksize_shift;
+ loff_t size;
+
+ struct nvmet_subsys *subsys;
+ const char *device_path;
+
+ struct config_group device_group;
+ struct config_group default_groups[2];
+ struct config_group group;
+ struct rcu_head rcu;
+};
+
+static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
+{
+ return container_of(to_config_group(item), struct nvmet_ns, group);
+}
+
+struct nvmet_cq {
+ u16 qid;
+ u16 size;
+};
+
+struct nvmet_sq {
+ struct nvmet_ctrl *ctrl;
+ u16 qid;
+ u16 size;
+};
+
+struct nvmet_ctrl {
+ struct nvmet_subsys *subsys;
+ struct nvmet_cq **cqs;
+ struct nvmet_sq **sqs;
+
+ u64 cap;
+ u32 cc;
+ u32 csts;
+
+ u16 cntlid;
+
+ struct list_head subsys_entry;
+ struct kref ref;
+#define NVMET_SUBSYS_NAME_LEN 256
+ char subsys_name[NVMET_SUBSYS_NAME_LEN];
+};
+
+struct nvmet_subsys {
+ struct mutex lock;
+
+ struct list_head namespaces;
+ unsigned int max_nsid;
+
+ struct list_head ctrls;
+ struct ida cntlid_ida;
+
+ u16 max_qid;
+
+ u64 ver;
+ char *subsys_name;
+
+ struct list_head entry;
+ struct config_group group;
+
+ struct config_group namespaces_group;
+ struct config_group controllers_group;
+ struct config_group *default_groups[3];
+};
+
+static inline struct nvmet_subsys *to_subsys(struct config_item *item)
+{
+ return container_of(to_config_group(item), struct nvmet_subsys, group);
+}
+
+static inline struct nvmet_subsys *namespaces_to_subsys(
+ struct config_item *item)
+{
+ return container_of(to_config_group(item), struct nvmet_subsys,
+ namespaces_group);
+}
+
+struct nvmet_req {
+ struct nvme_command *cmd;
+ struct nvme_completion *rsp;
+ struct nvmet_sq *sq;
+ struct nvmet_cq *cq;
+ struct nvmet_ns *ns;
+ struct scatterlist *sg;
+ int sg_cnt;
+ size_t data_len;
+
+ void (*execute)(struct nvmet_req *req);
+ void (*queue_response)(struct nvmet_req *req);
+};
+
+static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
+{
+ req->rsp->status = cpu_to_le16(status << 1);
+}
+
+static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
+{
+ req->rsp->result = cpu_to_le32(result);
+}
+
+static inline bool nvmet_is_write(struct nvmet_req *req)
+{
+ return req->cmd->common.opcode & 1;
+}
+
+/*
+ * NVMe command writes actually are DMA reads for us on the target side.
+ */
+static inline enum dma_data_direction
+nvmet_data_dir(struct nvmet_req *cmd)
+{
+ return nvmet_is_write(cmd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+}
+
+int nvmet_parse_io_cmd(struct nvmet_req *req);
+int nvmet_parse_admin_cmd(struct nvmet_req *req);
+
+u16 nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
+ struct nvmet_sq *sq,
+ void (*queue_response)(struct nvmet_req *req));
+void nvmet_req_complete(struct nvmet_req *req, u16 status);
+
+void nvmet_cq_init(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
+ u16 size);
+void nvmet_sq_init(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
+ u16 size);
+void nvmet_sq_destroy(struct nvmet_sq *sq);
+
+void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
+struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_subsys *subsys,
+ const char *subsys_name);
+struct nvmet_ctrl *nvmet_ctrl_find_get(struct nvmet_subsys *subsys, u16 cntlid);
+void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
+
+struct nvmet_subsys *nvmet_find_subsys(char *subsys_name);
+struct nvmet_subsys *nvmet_subsys_alloc(const char *subsys_name);
+void nvmet_subsys_free(struct nvmet_subsys *subsys);
+
+struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid);
+void nvmet_put_namespace(struct nvmet_ns *ns);
+int nvmet_ns_enable(struct nvmet_ns *ns, const char *path);
+struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
+void nvmet_ns_free(struct nvmet_ns *ns);
+
+#define NVMET_QUEUE_SIZE 1024
+#define NVMET_NR_QUEUES 64
+
+int __init nvmet_init_configfs(void);
+void __exit nvmet_exit_configfs(void);
+
+#endif /* _NVME_CMD_H */
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 7/8] nvmet: add a generic NVMe target
2015-11-07 17:00 ` [PATCH 7/8] nvmet: add a generic NVMe target Christoph Hellwig
@ 2015-11-08 10:26 ` Sagi Grimberg
2015-11-08 13:57 ` Christoph Hellwig
0 siblings, 1 reply; 24+ messages in thread
From: Sagi Grimberg @ 2015-11-08 10:26 UTC (permalink / raw)
> +void nvmet_ns_free(struct nvmet_ns *ns)
> +{
> + struct nvmet_subsys *subsys = ns->subsys;
> +
> + mutex_lock(&subsys->lock);
> + if (!list_empty(&ns->dev_link))
> + list_del_init(&ns->dev_link);
> + mutex_unlock(&subsys->lock);
> +
> + nvmet_put_namespace(ns);
> +}
> +
> +struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
> +{
> + struct nvmet_ns *ns;
> +
> + ns = kzalloc(sizeof(*ns), GFP_KERNEL);
> + if (!ns)
> + return NULL;
> +
This lacks also:
INIT_LIST_HEAD(&ns->dev_link);
So the list_empty() check in ns_free won't be bogus...
It causes a NULL deref on configfs rmdir.
> + kref_init(&ns->ref);
> + ns->nsid = nsid;
> + ns->subsys = subsys;
> + return ns;
> +}
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 8/8] nvme-loop: add a NVMe loopback device
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
` (6 preceding siblings ...)
2015-11-07 17:00 ` [PATCH 7/8] nvmet: add a generic NVMe target Christoph Hellwig
@ 2015-11-07 17:00 ` Christoph Hellwig
2015-11-08 10:54 ` Sagi Grimberg
2015-11-15 19:18 ` Sagi Grimberg
2015-11-16 7:30 ` [RFC] generic NVMe target and NVMe loop driver Nicholas A. Bellinger
8 siblings, 2 replies; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-07 17:00 UTC (permalink / raw)
Usage:
------ snip ------
NAME="testsubsystem"
DEV="/dev/?da"
mkdir /sys/kernel/config/nvmet/subsystems/${NAME}
mkdir /sys/kernel/config/nvmet/subsystems/${NAME}/namespaces/1
echo -n ${DEV} > /sys/kernel/config/nvmet/subsystems/${NAME}/namespaces/1/device_path
echo "name=${NAME}" > /sys/class/nvme-loop/ctl/add_ctrl
------ snip ------
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
drivers/nvme/target/Kconfig | 10 +
drivers/nvme/target/Makefile | 2 +
drivers/nvme/target/loop.c | 688 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 700 insertions(+)
create mode 100644 drivers/nvme/target/loop.c
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 9a3d742..1bf92db 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -2,3 +2,13 @@
config NVME_TARGET
depends on BLOCK
tristate
+
+config NVME_TARGET_LOOP
+ tristate "NVMe loopback device support"
+ depends on BLK_DEV_NVME
+ select NVME_TARGET
+ help
+ This enabled the NVMe loopback device support, which can be useful
+ to test NVMe host and target side features.
+
+ If unsure, say N.
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 9c236e4..21e9134 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -1,4 +1,6 @@
obj-$(CONFIG_NVME_TARGET) += nvmet.o
+obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o
nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o
+nvme-loop-y += loop.o
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
new file mode 100644
index 0000000..446bdff
--- /dev/null
+++ b/drivers/nvme/target/loop.c
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/scatterlist.h>
+#include <linux/delay.h>
+#include <linux/blk-mq.h>
+#include <linux/nvme.h>
+#include <linux/module.h>
+#include <linux/parser.h>
+#include <linux/t10-pi.h>
+#include "nvmet.h"
+#include "../host/nvme.h"
+
+#define NVME_LOOP_MAX_Q_DEPTH 1024
+#define NVME_LOOP_AQ_DEPTH 256
+
+#define NVME_LOOP_MAX_SEGMENTS 32
+
+struct nvme_loop_ctrl {
+ spinlock_t lock;
+ struct nvme_loop_queue *queues;
+ u32 queue_count;
+ size_t queue_size;
+
+ struct blk_mq_tag_set admin_tag_set;
+
+ u16 cntlid;
+ char *subsys_name;
+
+ struct list_head list;
+ u64 cap;
+ struct blk_mq_tag_set tag_set;
+ struct nvme_ctrl ctrl;
+
+ struct nvmet_ctrl *target_ctrl;
+};
+
+static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
+{
+ return container_of(ctrl, struct nvme_loop_ctrl, ctrl);
+}
+
+struct nvme_loop_queue {
+ struct nvmet_cq nvme_cq;
+ struct nvmet_sq nvme_sq;
+ struct nvme_loop_ctrl *ctrl;
+};
+
+struct nvme_loop_iod {
+ struct scatterlist sg[NVME_LOOP_MAX_SEGMENTS];
+ struct nvme_command cmd;
+ struct nvme_completion rsp;
+ struct nvmet_req req;
+ struct work_struct work;
+};
+
+static int nr_io_queues;
+module_param(nr_io_queues, int, 0444);
+MODULE_PARM_DESC(nr_io_queues,
+ "Number of I/O queues. Default is one per CPU");
+
+static LIST_HEAD(nvme_loop_ctrl_list);
+static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
+
+static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
+{
+ return queue - queue->ctrl->queues;
+}
+
+static void nvme_loop_complete_rq(struct request *req)
+{
+ int error = 0;
+
+ if (unlikely(req->errors)) {
+ if (nvme_req_needs_retry(req, req->errors)) {
+ nvme_requeue_req(req);
+ return;
+ }
+
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ error = req->errors;
+ else
+ error = nvme_error_status(req->errors);
+ }
+
+ blk_mq_end_request(req, error);
+}
+
+static void nvme_loop_queue_response(struct nvmet_req *nvme_req)
+{
+ struct nvme_loop_iod *iod =
+ container_of(nvme_req, struct nvme_loop_iod, req);
+ struct nvme_completion *cqe = &iod->rsp;
+ struct request *req = blk_mq_rq_from_pdu(iod);
+
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ req->special = (void *)(uintptr_t)le32_to_cpu(cqe->result);
+ blk_mq_complete_request(req, le16_to_cpu(cqe->status) >> 1);
+}
+
+static void nvme_loop_execute_work(struct work_struct *work)
+{
+ struct nvme_loop_iod *iod =
+ container_of(work, struct nvme_loop_iod, work);
+
+ iod->req.execute(&iod->req);
+}
+
+static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct nvme_ns *ns = hctx->queue->queuedata;
+ struct nvme_loop_queue *queue = hctx->driver_data;
+ struct request *req = bd->rq;
+ struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
+ int ret;
+
+ switch (req->cmd_type) {
+ case REQ_TYPE_FS:
+ if (req->cmd_flags & REQ_FLUSH)
+ nvme_setup_flush(ns, &iod->cmd);
+ else
+ nvme_setup_rw(ns, req, &iod->cmd);
+ break;
+ case REQ_TYPE_DRV_PRIV:
+ memcpy(&iod->cmd, req->cmd, sizeof(struct nvme_command));
+ break;
+ default:
+ return BLK_MQ_RQ_QUEUE_ERROR;
+ }
+
+ ret = nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq,
+ nvme_loop_queue_response);
+ if (ret)
+ goto out_err;
+
+ if (blk_rq_bytes(req)) {
+ sg_init_table(iod->sg, req->nr_phys_segments);
+
+ iod->req.sg = iod->sg;
+ iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg);
+ BUG_ON(iod->req.sg_cnt > req->nr_phys_segments);
+ }
+
+ iod->cmd.common.command_id = req->tag;
+ blk_mq_start_request(req);
+
+ schedule_work(&iod->work);
+ return 0;
+out_err:
+ return BLK_MQ_RQ_QUEUE_ERROR;
+}
+
+static int __nvme_loop_init_request(struct nvme_loop_ctrl *ctrl,
+ struct request *req, unsigned int queue_idx)
+{
+ struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
+
+ BUG_ON(queue_idx >= ctrl->queue_count);
+
+ iod->req.cmd = &iod->cmd;
+ iod->req.rsp = &iod->rsp;
+ INIT_WORK(&iod->work, nvme_loop_execute_work);
+ return 0;
+}
+
+static int nvme_loop_init_request(void *data, struct request *req,
+ unsigned int hctx_idx, unsigned int rq_idx,
+ unsigned int numa_node)
+{
+ return __nvme_loop_init_request(data, req, hctx_idx + 1);
+}
+
+static int nvme_loop_init_admin_request(void *data, struct request *req,
+ unsigned int hctx_idx, unsigned int rq_idx,
+ unsigned int numa_node)
+{
+ return __nvme_loop_init_request(data, req, 0);
+}
+
+static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct nvme_loop_ctrl *ctrl = data;
+ struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
+
+ BUG_ON(hctx_idx >= ctrl->queue_count);
+
+ hctx->driver_data = queue;
+ return 0;
+}
+
+static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct nvme_loop_ctrl *ctrl = data;
+ struct nvme_loop_queue *queue = &ctrl->queues[0];
+
+ BUG_ON(hctx_idx != 0);
+
+ hctx->driver_data = queue;
+ return 0;
+}
+
+static struct blk_mq_ops nvme_loop_mq_ops = {
+ .queue_rq = nvme_loop_queue_rq,
+ .complete = nvme_loop_complete_rq,
+ .map_queue = blk_mq_map_queue,
+ .init_request = nvme_loop_init_request,
+ .init_hctx = nvme_loop_init_hctx,
+};
+
+static struct blk_mq_ops nvme_loop_admin_mq_ops = {
+ .queue_rq = nvme_loop_queue_rq,
+ .complete = nvme_loop_complete_rq,
+ .map_queue = blk_mq_map_queue,
+ .init_request = nvme_loop_init_admin_request,
+ .init_hctx = nvme_loop_init_admin_hctx,
+};
+
+static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
+{
+ blk_cleanup_queue(ctrl->ctrl.admin_q);
+ blk_mq_free_tag_set(&ctrl->admin_tag_set);
+ nvme_shutdown_ctrl(&ctrl->ctrl);
+ /* disconnect queue */
+}
+
+static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
+{
+ struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
+
+ list_del(&ctrl->list);
+#if 0
+ for (i = 1; i < ctrl->queue_count; i++)
+ /* disconnect queue */
+#endif
+ blk_mq_free_tag_set(&ctrl->tag_set);
+ nvme_loop_destroy_admin_queue(ctrl);
+ kfree(ctrl->queues);
+ kfree(ctrl->subsys_name);
+ kfree(ctrl);
+}
+
+static int nvme_loop_init_queue(struct nvme_loop_ctrl *ctrl, int idx,
+ size_t queue_size)
+{
+ struct nvme_loop_queue *queue;
+ struct nvmet_subsys *subsys;
+ struct nvmet_ctrl *target_ctrl = NULL;
+ u16 qid, cntlid;
+ int ret = 0;
+
+ queue = &ctrl->queues[idx];
+ queue->ctrl = ctrl;
+
+ qid = nvme_loop_queue_idx(queue);
+ cntlid = qid ? ctrl->cntlid : 0xffff;
+
+ subsys = nvmet_find_subsys(ctrl->subsys_name);
+ if (!subsys) {
+ pr_warn("connect request for invalid subsystem!\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&subsys->lock);
+ target_ctrl = nvmet_ctrl_find_get(subsys, cntlid);
+ if (target_ctrl) {
+ pr_info("adding queue %d to ctrl %d.\n",
+ qid, target_ctrl->cntlid);
+ } else {
+ BUG_ON(qid != 0);
+
+ target_ctrl = nvmet_alloc_ctrl(subsys, ctrl->subsys_name);
+ if (IS_ERR(target_ctrl)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ pr_info("creating controller %d.\n", target_ctrl->cntlid);
+ }
+
+ nvmet_cq_init(target_ctrl, &queue->nvme_cq, qid,
+ qid ? ctrl->queue_size : NVME_LOOP_AQ_DEPTH);
+ nvmet_sq_init(target_ctrl, &queue->nvme_sq, qid,
+ qid ? ctrl->queue_size : NVME_LOOP_AQ_DEPTH);
+ if (!qid)
+ ctrl->cntlid = target_ctrl->cntlid;
+
+ if (!ctrl->target_ctrl)
+ ctrl->target_ctrl = target_ctrl;
+
+out_unlock:
+ mutex_unlock(&subsys->lock);
+ return ret;
+}
+
+static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
+{
+ unsigned page_shift = PAGE_SHIFT;
+ unsigned dev_page_min, dev_page_max;
+ int error;
+
+ error = nvme_loop_init_queue(ctrl, 0, NVME_LOOP_AQ_DEPTH);
+ if (error) {
+ dev_err(ctrl->ctrl.dev,
+ "failed to initialize admin queue: %d\n", error);
+ return error;
+ }
+
+ error = ctrl->ctrl.ops->reg_read64(&ctrl->ctrl, NVME_REG_CAP,
+ &ctrl->cap);
+ if (error) {
+ dev_err(ctrl->ctrl.dev,
+ "prop_get NVME_REG_CAP failed\n");
+ return error;
+ }
+
+ dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12;
+ if (page_shift < dev_page_min) {
+ dev_err(ctrl->ctrl.dev,
+ "Minimum device page size (%u) too large for "
+ "host (%u)\n", 1 << dev_page_min, 1 << page_shift);
+ return -ENODEV;
+ }
+
+ dev_page_max = NVME_CAP_MPSMAX(ctrl->cap) + 12;
+ if (page_shift > dev_page_max) {
+ dev_info(ctrl->ctrl.dev,
+ "Device maximum page size (%u) smaller than "
+ "host (%u); enabling work-around\n",
+ 1 << dev_page_max, 1 << page_shift);
+ page_shift = dev_page_max;
+ }
+
+ ctrl->queue_size =
+ min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, NVME_LOOP_MAX_Q_DEPTH);
+
+ error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap, page_shift);
+ if (error)
+ return error;
+
+ memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
+ ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
+ ctrl->admin_tag_set.queue_depth = NVME_LOOP_AQ_DEPTH;
+ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod);
+ ctrl->admin_tag_set.driver_data = ctrl;
+ ctrl->admin_tag_set.nr_hw_queues = 1;
+ ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+
+ error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
+ if (error)
+ goto out_disable;
+
+ ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+ if (IS_ERR(ctrl->ctrl.admin_q)) {
+ error = PTR_ERR(ctrl->ctrl.admin_q);
+ goto out_free_tagset;
+ }
+ ctrl->ctrl.admin_q->queuedata = ctrl;
+
+ return 0;
+
+out_free_tagset:
+ blk_mq_free_tag_set(&ctrl->admin_tag_set);
+out_disable:
+ nvme_shutdown_ctrl(&ctrl->ctrl);
+ return error;
+}
+
+static int nvme_loop_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+{
+ struct nvmet_ctrl *target_ctrl = to_loop_ctrl(ctrl)->target_ctrl;
+
+ switch (off) {
+ case NVME_REG_VS:
+ *val = target_ctrl->subsys->ver;
+ return 0;
+ case NVME_REG_CSTS:
+ *val = target_ctrl->csts;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int nvme_loop_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+{
+ struct nvmet_ctrl *target_ctrl = to_loop_ctrl(ctrl)->target_ctrl;
+
+ switch (off) {
+ case NVME_REG_CAP:
+ *val = target_ctrl->cap;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int nvme_loop_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+{
+ struct nvmet_ctrl *target_ctrl = to_loop_ctrl(ctrl)->target_ctrl;
+
+ switch (off) {
+ case NVME_REG_CC:
+ nvmet_update_cc(target_ctrl, val);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static bool nvme_loop_io_incapable(struct nvme_ctrl *ctrl)
+{
+ /* XXX: */
+ return false;
+}
+
+static int nvme_loop_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+ return -EIO;
+}
+
+static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
+ .reg_read32 = nvme_loop_reg_read32,
+ .reg_read64 = nvme_loop_reg_read64,
+ .reg_write32 = nvme_loop_reg_write32,
+ .io_incapable = nvme_loop_io_incapable,
+ .reset_ctrl = nvme_loop_reset_ctrl,
+ .free_ctrl = nvme_loop_free_ctrl,
+};
+
+enum {
+ NVME_OPT_ERR = 0,
+ NVME_OPT_NAME = 1 << 2,
+ NVME_OPT_REQUIRED =
+ NVME_OPT_NAME,
+};
+
+static const match_table_t opt_tokens = {
+ { NVME_OPT_NAME, "name=%s" },
+ { NVME_OPT_ERR, NULL }
+};
+
+static int nvme_loop_parse_options(const char *buf, struct nvme_loop_ctrl *ctrl)
+{
+ substring_t args[MAX_OPT_ARGS];
+ char *options, *p, *o;
+ int token, ret = 0;
+ unsigned opt_mask = 0;
+
+ o = options = kstrdup(buf, GFP_KERNEL);
+ if (!options)
+ return -ENOMEM;
+
+ while ((p = strsep(&options, ",\n")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, opt_tokens, args);
+ opt_mask |= token;
+ switch (token) {
+ case NVME_OPT_NAME:
+ p = match_strdup(args);
+ if (!p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ctrl->subsys_name = p;
+ break;
+ default:
+ pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
+ p);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if ((opt_mask & NVME_OPT_REQUIRED) != NVME_OPT_REQUIRED) {
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
+ if ((opt_tokens[i].token & NVME_OPT_REQUIRED) &&
+ !(opt_tokens[i].token & opt_mask)) {
+ pr_warn("nvmf: missing parameter '%s'\n",
+ opt_tokens[i].pattern);
+ }
+ }
+
+ ret = -EINVAL;
+ }
+
+out:
+ kfree(o);
+ return ret;
+}
+
+static ssize_t
+nvme_loop_create_ctrl(struct device *sysfs_dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nvme_loop_ctrl *ctrl;
+ int ret, i;
+
+ ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+ if (!ctrl)
+ return -ENOMEM;
+
+ ret = nvme_init_ctrl(&ctrl->ctrl, sysfs_dev, &nvme_loop_ctrl_ops,
+ 0 /* no vendor.. */,
+ 0 /* no quirks, we're perfect! */);
+ if (ret)
+ goto out_free_ctrl;
+
+ ret = nvme_loop_parse_options(buf, ctrl);
+ if (ret)
+ goto out_uninit_ctrl;
+
+ spin_lock_init(&ctrl->lock);
+
+ ret = -ENOMEM;
+ ctrl->queue_count = 1; /* admin queue */;
+ if (nr_io_queues > 0)
+ ctrl->queue_count += nr_io_queues;
+ else
+ ctrl->queue_count += num_possible_cpus();
+
+ ctrl->queues = kcalloc(ctrl->queue_count,
+ sizeof(*ctrl->queues), GFP_KERNEL);
+ if (!ctrl->queues)
+ goto out_uninit_ctrl;
+
+ ret = nvme_loop_configure_admin_queue(ctrl);
+ if (ret)
+ goto out_kfree_queues;
+
+ ret = nvme_set_queue_count(&ctrl->ctrl, ctrl->queue_count - 1);
+ if (ret <= 0) {
+ dev_err(ctrl->ctrl.dev,
+ "set_queue_count failed: %d\n", ret);
+ goto out_remove_admin_queue;
+ }
+
+ if (ret <= ctrl->queue_count)
+ ctrl->queue_count = ret + 1;
+
+ dev_info(ctrl->ctrl.dev,
+ "creating %d I/O queues.\n", ctrl->queue_count - 1);
+
+ for (i = 1; i < ctrl->queue_count; i++) {
+ ret = nvme_loop_init_queue(ctrl, i, ctrl->queue_size);
+ if (ret) {
+ dev_err(ctrl->ctrl.dev,
+ "failed to initialize I/O queue: %d\n", ret);
+ goto out_remove_admin_queue;
+ }
+ }
+
+ memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
+ ctrl->tag_set.ops = &nvme_loop_mq_ops;
+ ctrl->tag_set.queue_depth = ctrl->queue_size;
+ ctrl->tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod);
+ ctrl->tag_set.driver_data = ctrl;
+ ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+ ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
+ ctrl->ctrl.tagset = &ctrl->tag_set;
+
+ ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
+ if (ret)
+ goto out_free_tag_set;
+
+ ret = nvme_init_identify(&ctrl->ctrl);
+ if (ret)
+ goto out_free_queues;
+
+ ctrl->ctrl.max_segments = NVME_LOOP_MAX_SEGMENTS;
+
+ nvme_scan_namespaces(&ctrl->ctrl);
+
+ pr_info("new ctrl: \"%s\"\n", ctrl->subsys_name);
+
+ mutex_lock(&nvme_loop_ctrl_mutex);
+ list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
+ mutex_unlock(&nvme_loop_ctrl_mutex);
+ return count;
+
+out_free_tag_set:
+ blk_mq_free_tag_set(&ctrl->tag_set);
+out_free_queues:
+#if 0
+ for (i = 1; i < ctrl->queue_count; i++)
+ /* disconnect queue */
+#endif
+out_remove_admin_queue:
+ nvme_loop_destroy_admin_queue(ctrl);
+out_kfree_queues:
+ kfree(ctrl->queues);
+out_uninit_ctrl:
+ nvme_uninit_ctrl(&ctrl->ctrl);
+out_free_ctrl:
+ kfree(ctrl);
+ return ret;
+}
+
+static DEVICE_ATTR(add_ctrl, S_IWUSR, NULL, nvme_loop_create_ctrl);
+
+static void __nvme_loop_remove_ctrl(struct nvme_loop_ctrl *ctrl)
+{
+ nvme_remove_namespaces(&ctrl->ctrl);
+ nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_put_ctrl(&ctrl->ctrl);
+}
+
+static struct class *nvme_loop_class;
+static struct device *nvme_loop_device;
+
+static int __init nvme_loop_init_module(void)
+{
+ int ret = -ENOMEM;
+
+ nvme_loop_class = class_create(THIS_MODULE, "nvme-loop");
+ if (IS_ERR(nvme_loop_class)) {
+ pr_err("couldn't register class nvme-loop\n");
+ ret = PTR_ERR(nvme_loop_class);
+ goto out;
+ }
+
+ nvme_loop_device =
+ device_create(nvme_loop_class, NULL, MKDEV(0, 0), NULL, "ctl");
+ if (IS_ERR(nvme_loop_device)) {
+ pr_err("couldn't create nvme-loop device!\n");
+ ret = PTR_ERR(nvme_loop_device);
+ goto out_destroy_class;
+ }
+
+ ret = device_create_file(nvme_loop_device, &dev_attr_add_ctrl);
+ if (ret) {
+ pr_err("couldn't add device attr.\n");
+ goto out_destroy_device;
+ }
+
+ return 0;
+
+out_destroy_device:
+ device_destroy(nvme_loop_class, MKDEV(0, 0));
+out_destroy_class:
+ class_destroy(nvme_loop_class);
+out:
+ return ret;
+}
+
+static void __exit nvme_loop_cleanup_module(void)
+{
+ struct nvme_loop_ctrl *ctrl;
+
+ mutex_lock(&nvme_loop_ctrl_mutex);
+ while (!list_empty(&nvme_loop_ctrl_list)) {
+ ctrl = list_entry(nvme_loop_ctrl_list.next,
+ struct nvme_loop_ctrl, list);
+
+ if (!list_empty(&ctrl->list))
+ list_del(&ctrl->list);
+
+ __nvme_loop_remove_ctrl(ctrl);
+ }
+ mutex_unlock(&nvme_loop_ctrl_mutex);
+
+ device_destroy(nvme_loop_class, MKDEV(0, 0));
+ class_destroy(nvme_loop_class);
+}
+
+module_init(nvme_loop_init_module);
+module_exit(nvme_loop_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
--
1.9.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 8/8] nvme-loop: add a NVMe loopback device
2015-11-07 17:00 ` [PATCH 8/8] nvme-loop: add a NVMe loopback device Christoph Hellwig
@ 2015-11-08 10:54 ` Sagi Grimberg
2015-11-08 13:22 ` Sagi Grimberg
2015-11-15 19:18 ` Sagi Grimberg
1 sibling, 1 reply; 24+ messages in thread
From: Sagi Grimberg @ 2015-11-08 10:54 UTC (permalink / raw)
> +static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
> +{
> + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
> +
> + list_del(&ctrl->list);
This should be list_del_init so that cleanup_module
list_empty() check will be correct. unloading nvme-loop
with active controllers is getting a list corruption.
^ permalink raw reply [flat|nested] 24+ messages in thread* [PATCH 8/8] nvme-loop: add a NVMe loopback device
2015-11-08 10:54 ` Sagi Grimberg
@ 2015-11-08 13:22 ` Sagi Grimberg
2015-11-08 13:56 ` Christoph Hellwig
0 siblings, 1 reply; 24+ messages in thread
From: Sagi Grimberg @ 2015-11-08 13:22 UTC (permalink / raw)
On 08/11/2015 12:54, Sagi Grimberg wrote:
>
>> +static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
>> +{
>> + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
>> +
>> + list_del(&ctrl->list);
>
> This should be list_del_init so that cleanup_module
> list_empty() check will be correct. unloading nvme-loop
> with active controllers is getting a list corruption.
Actually this is wrong. Whats wrong here is that ctrl->list
is being removed twice (once in nvme_loop_cleanup_module and
then here).
The following worked for me:
--
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index a0eac07..cf0f745 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -869,18 +869,11 @@ out:
static void __exit nvme_loop_cleanup_module(void)
{
- struct nvme_loop_ctrl *ctrl;
+ struct nvme_loop_ctrl *ctrl, *tmp;
mutex_lock(&nvme_loop_ctrl_mutex);
- while (!list_empty(&nvme_loop_ctrl_list)) {
- ctrl = list_entry(nvme_loop_ctrl_list.next,
- struct nvme_loop_ctrl, list);
-
- if (!list_empty(&ctrl->list))
- list_del(&ctrl->list);
-
+ list_for_each_entry_safe(ctrl, tmp, &nvme_loop_ctrl_list, list)
__nvme_loop_remove_ctrl(ctrl);
- }
mutex_unlock(&nvme_loop_ctrl_mutex);
device_destroy(nvme_loop_class, MKDEV(0, 0));
--
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH 8/8] nvme-loop: add a NVMe loopback device
2015-11-07 17:00 ` [PATCH 8/8] nvme-loop: add a NVMe loopback device Christoph Hellwig
2015-11-08 10:54 ` Sagi Grimberg
@ 2015-11-15 19:18 ` Sagi Grimberg
2015-11-16 8:29 ` Christoph Hellwig
1 sibling, 1 reply; 24+ messages in thread
From: Sagi Grimberg @ 2015-11-15 19:18 UTC (permalink / raw)
> + ctrl->queue_count = 1; /* admin queue */;
> + if (nr_io_queues > 0)
> + ctrl->queue_count += nr_io_queues;
> + else
> + ctrl->queue_count += num_possible_cpus();
This should really be num_online_cpus(), I get way more than
I need with num_possible_cpus()...
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 8/8] nvme-loop: add a NVMe loopback device
2015-11-15 19:18 ` Sagi Grimberg
@ 2015-11-16 8:29 ` Christoph Hellwig
2015-11-16 9:35 ` Sagi Grimberg
0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-16 8:29 UTC (permalink / raw)
On Sun, Nov 15, 2015@09:18:34PM +0200, Sagi Grimberg wrote:
>
>> + ctrl->queue_count = 1; /* admin queue */;
>> + if (nr_io_queues > 0)
>> + ctrl->queue_count += nr_io_queues;
>> + else
>> + ctrl->queue_count += num_possible_cpus();
>
> This should really be num_online_cpus(), I get way more than
> I need with num_possible_cpus()...
Maybe. But then we'd want a hot plug event. The whole number of
contexts thing is a bit of a mess.
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 8/8] nvme-loop: add a NVMe loopback device
2015-11-16 8:29 ` Christoph Hellwig
@ 2015-11-16 9:35 ` Sagi Grimberg
0 siblings, 0 replies; 24+ messages in thread
From: Sagi Grimberg @ 2015-11-16 9:35 UTC (permalink / raw)
On 16/11/2015 10:29, Christoph Hellwig wrote:
> On Sun, Nov 15, 2015@09:18:34PM +0200, Sagi Grimberg wrote:
>>
>>> + ctrl->queue_count = 1; /* admin queue */;
>>> + if (nr_io_queues > 0)
>>> + ctrl->queue_count += nr_io_queues;
>>> + else
>>> + ctrl->queue_count += num_possible_cpus();
>>
>> This should really be num_online_cpus(), I get way more than
>> I need with num_possible_cpus()...
>
> Maybe. But then we'd want a hot plug event. The whole number of
> contexts thing is a bit of a mess.
Or we can just remain with more or less queues then cpus... it is
already possible if the user used nr_io_queues != num_online_cpus()...
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC] generic NVMe target and NVMe loop driver
2015-11-07 17:00 [RFC] generic NVMe target and NVMe loop driver Christoph Hellwig
` (7 preceding siblings ...)
2015-11-07 17:00 ` [PATCH 8/8] nvme-loop: add a NVMe loopback device Christoph Hellwig
@ 2015-11-16 7:30 ` Nicholas A. Bellinger
2015-11-16 8:08 ` Ming Lin
8 siblings, 1 reply; 24+ messages in thread
From: Nicholas A. Bellinger @ 2015-11-16 7:30 UTC (permalink / raw)
On Sat, 2015-11-07@18:00 +0100, Christoph Hellwig wrote:
> This series continues the NVMe host drive split and also starts adding a
> consume for it. The consumer is mostly interesting for developers at this
> point as it's simply a 'loopback' NVMe device that ties the split NVMe
> driver fronted into the new generic NVMe target subsystem.
Very excited to see this code posted. 8-)
> This was developed for our NVMe over Fabrics prototype, but will also be useful for
> other work like Ming's virtio-nvme or event an implementation traditional
> PCIe NVMe using vhost.
>
Wrt to vhost-nvme, the WIP code (Dr. Hannes + Dave CC'ed) I'm currently
hacking on is here:
https://git.kernel.org/cgit/linux/kernel/git/nab/target-pending.git/log/?h=vhost-nvme-wip
Note it's still a week or two away (using rts-megasas as a reference)
from actually functioning across a modest number of queue resources, but
should at least give interested folks an idea of how things look so far.
> The biggest gap at this point is aborts and resets, which are still under
> heavy development. Once those are done and I've finished the configfs
> layout the code will hopefully be ready for inclusion in Linux 4.5.
>
> I've also written a python based shell to ease configuration of the target
> subsystem which I'll hopefully be able to release soon.
>
;)
> As this depends on a lot of pending NVMe patch a git tree is also
> available:
>
> http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/nvme-loop.2
>
So on vhost-nvme configfs side, my initial code will be using
target_core_fabric_configfs.c to process incoming nvme-hi frames
into existing backend struct se_device.
That said, I'm planning to rebase atop your WIP tree in the short-term
for the common nvme opcode + command set decoding bits, that both
vhost-nvme and nvme-fabrics code are going to require.
--nab
^ permalink raw reply [flat|nested] 24+ messages in thread* [RFC] generic NVMe target and NVMe loop driver
2015-11-16 7:30 ` [RFC] generic NVMe target and NVMe loop driver Nicholas A. Bellinger
@ 2015-11-16 8:08 ` Ming Lin
2015-11-16 8:29 ` Nicholas A. Bellinger
2015-11-16 8:32 ` Christoph Hellwig
0 siblings, 2 replies; 24+ messages in thread
From: Ming Lin @ 2015-11-16 8:08 UTC (permalink / raw)
On Sun, 2015-11-15@23:30 -0800, Nicholas A. Bellinger wrote:
> On Sat, 2015-11-07@18:00 +0100, Christoph Hellwig wrote:
> > This series continues the NVMe host drive split and also starts adding a
> > consume for it. The consumer is mostly interesting for developers at this
> > point as it's simply a 'loopback' NVMe device that ties the split NVMe
> > driver fronted into the new generic NVMe target subsystem.
>
> Very excited to see this code posted. 8-)
>
> > This was developed for our NVMe over Fabrics prototype, but will also be useful for
> > other work like Ming's virtio-nvme or event an implementation traditional
> > PCIe NVMe using vhost.
> >
>
> Wrt to vhost-nvme, the WIP code (Dr. Hannes + Dave CC'ed) I'm currently
> hacking on is here:
>
> https://git.kernel.org/cgit/linux/kernel/git/nab/target-pending.git/log/?h=vhost-nvme-wip
>
> Note it's still a week or two away (using rts-megasas as a reference)
> from actually functioning across a modest number of queue resources, but
> should at least give interested folks an idea of how things look so far.
Hi Nic,
FYI,
I have done the vhost-nvme patches(based on our previous discussion) on
top of NVMe target.
I'll post kernel & qemu patches early this week.
But the tests I have done so far didn't show competitive performance
compared with vhost-scsi. Maybe because the mmio thing is slow.
I'm going to do anther implementation with virtio-nvme/vhost-nvme.
With nvme-split, now virtio-nvme seems make sense.
I have ported virtio-nvme last month.
https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-split/virtio
Thanks,
Ming
>
> > The biggest gap at this point is aborts and resets, which are still under
> > heavy development. Once those are done and I've finished the configfs
> > layout the code will hopefully be ready for inclusion in Linux 4.5.
> >
> > I've also written a python based shell to ease configuration of the target
> > subsystem which I'll hopefully be able to release soon.
> >
>
> ;)
>
> > As this depends on a lot of pending NVMe patch a git tree is also
> > available:
> >
> > http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/nvme-loop.2
> >
>
> So on vhost-nvme configfs side, my initial code will be using
> target_core_fabric_configfs.c to process incoming nvme-hi frames
> into existing backend struct se_device.
>
> That said, I'm planning to rebase atop your WIP tree in the short-term
> for the common nvme opcode + command set decoding bits, that both
> vhost-nvme and nvme-fabrics code are going to require.
>
> --nab
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC] generic NVMe target and NVMe loop driver
2015-11-16 8:08 ` Ming Lin
@ 2015-11-16 8:29 ` Nicholas A. Bellinger
2015-11-17 5:53 ` Ming Lin
2015-11-16 8:32 ` Christoph Hellwig
1 sibling, 1 reply; 24+ messages in thread
From: Nicholas A. Bellinger @ 2015-11-16 8:29 UTC (permalink / raw)
On Mon, 2015-11-16@00:08 -0800, Ming Lin wrote:
> On Sun, 2015-11-15@23:30 -0800, Nicholas A. Bellinger wrote:
> > On Sat, 2015-11-07@18:00 +0100, Christoph Hellwig wrote:
> > > This series continues the NVMe host drive split and also starts adding a
> > > consume for it. The consumer is mostly interesting for developers at this
> > > point as it's simply a 'loopback' NVMe device that ties the split NVMe
> > > driver fronted into the new generic NVMe target subsystem.
> >
> > Very excited to see this code posted. 8-)
> >
> > > This was developed for our NVMe over Fabrics prototype, but will also be useful for
> > > other work like Ming's virtio-nvme or event an implementation traditional
> > > PCIe NVMe using vhost.
> > >
> >
> > Wrt to vhost-nvme, the WIP code (Dr. Hannes + Dave CC'ed) I'm currently
> > hacking on is here:
> >
> > https://git.kernel.org/cgit/linux/kernel/git/nab/target-pending.git/log/?h=vhost-nvme-wip
> >
> > Note it's still a week or two away (using rts-megasas as a reference)
> > from actually functioning across a modest number of queue resources, but
> > should at least give interested folks an idea of how things look so far.
>
> Hi Nic,
>
> FYI,
>
> I have done the vhost-nvme patches(based on our previous discussion) on
> top of NVMe target.
>
> I'll post kernel & qemu patches early this week.
>
Great. Looking forward to seeing the prototype code.
> But the tests I have done so far didn't show competitive performance
> compared with vhost-scsi. Maybe because the mmio thing is slow.
>
Anything interesting hot-spots that show up in perf output..?
> I'm going to do anther implementation with virtio-nvme/vhost-nvme.
> With nvme-split, now virtio-nvme seems make sense.
>
> I have ported virtio-nvme last month.
> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-split/virtio
Cool. :)
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC] generic NVMe target and NVMe loop driver
2015-11-16 8:29 ` Nicholas A. Bellinger
@ 2015-11-17 5:53 ` Ming Lin
0 siblings, 0 replies; 24+ messages in thread
From: Ming Lin @ 2015-11-17 5:53 UTC (permalink / raw)
On Mon, 2015-11-16@00:29 -0800, Nicholas A. Bellinger wrote:
> On Mon, 2015-11-16@00:08 -0800, Ming Lin wrote:
> > On Sun, 2015-11-15@23:30 -0800, Nicholas A. Bellinger wrote:
> > > On Sat, 2015-11-07@18:00 +0100, Christoph Hellwig wrote:
> > > > This series continues the NVMe host drive split and also starts adding a
> > > > consume for it. The consumer is mostly interesting for developers at this
> > > > point as it's simply a 'loopback' NVMe device that ties the split NVMe
> > > > driver fronted into the new generic NVMe target subsystem.
> > >
> > > Very excited to see this code posted. 8-)
> > >
> > > > This was developed for our NVMe over Fabrics prototype, but will also be useful for
> > > > other work like Ming's virtio-nvme or event an implementation traditional
> > > > PCIe NVMe using vhost.
> > > >
> > >
> > > Wrt to vhost-nvme, the WIP code (Dr. Hannes + Dave CC'ed) I'm currently
> > > hacking on is here:
> > >
> > > https://git.kernel.org/cgit/linux/kernel/git/nab/target-pending.git/log/?h=vhost-nvme-wip
> > >
> > > Note it's still a week or two away (using rts-megasas as a reference)
> > > from actually functioning across a modest number of queue resources, but
> > > should at least give interested folks an idea of how things look so far.
> >
> > Hi Nic,
> >
> > FYI,
> >
> > I have done the vhost-nvme patches(based on our previous discussion) on
> > top of NVMe target.
> >
> > I'll post kernel & qemu patches early this week.
> >
>
> Great. Looking forward to seeing the prototype code.
>
> > But the tests I have done so far didn't show competitive performance
> > compared with vhost-scsi. Maybe because the mmio thing is slow.
> >
>
> Anything interesting hot-spots that show up in perf output..?
To ease development, I use nested kvm.
"vm_host" runs on bare metal and "vm_guest" runs on "vm_host"
I just integrated Google's extension to vhost-nvme.
https://github.com/rlnelson-git/linux-nvme.git
It's amazing performance improves a lot.
I use a 256M /dev/ram0 on vm_host as backend.
fio 4k read:
qemu-nvme: ~20M to ~30M
qemu-vhost-nvme + google ext: 80M to 200M(not very stable though)
(BTW, still waiting for employer's approval to send out patches)
PerfTop: 1039 irqs/sec kernel:99.8% exact: 0.0% [4000Hz cpu-clock], (all, 4 CPUs)
---------------------------------------------------------------------------------------------
36.93% [kernel] [k] _raw_spin_unlock_irq
20.98% [kernel] [k] vmx_handle_external_intr
10.10% [kernel] [k] _raw_spin_unlock_irqrestore
4.95% [kernel] [k] __mutex_unlock_slowpath
4.41% [kernel] [k] lock_acquire
4.15% [kernel] [k] lock_is_held
2.30% [kernel] [k] mutex_lock_nested
1.68% [kernel] [k] lock_release
1.14% [kernel] [k] put_compound_page
0.93% [kernel] [k] debug_lockdep_rcu_enabled
0.66% [kernel] [k] check_preemption_disabled
0.64% [kernel] [k] __schedule
0.62% [kernel] [k] lock_acquired
0.54% [kernel] [k] rcu_lockdep_current_cpu_online
0.54% [kernel] [k] preempt_count_sub
0.54% [kernel] [k] preempt_count_add
0.46% [kernel] [k] find_vma
0.45% [kernel] [k] vmcs_writel
0.40% [kernel] [k] ___might_sleep
0.38% [kernel] [k] rcu_note_context_switch
0.37% [kernel] [k] rcu_read_lock_sched_held
0.32% [kernel] [k] __rcu_is_watching
0.32% [kernel] [k] follow_trans_huge_pmd
0.31% [kernel] [k] debug_smp_processor_id
0.22% [kernel] [k] follow_page_mask
0.18% [kernel] [k] __get_user_pages
0.16% [kernel] [k] vmx_read_guest_seg_ar
0.16% [kernel] [k] nvmet_vhost_rw
0.15% [kernel] [k] kthread_should_stop
0.14% [kernel] [k] schedule
0.14% [kernel] [k] rcu_is_watching
0.12% [kernel] [k] nvmet_vhost_sq_thread
0.11% [kernel] [k] get_parent_ip
0.11% [kernel] [k] _raw_spin_lock_irqsave
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC] generic NVMe target and NVMe loop driver
2015-11-16 8:08 ` Ming Lin
2015-11-16 8:29 ` Nicholas A. Bellinger
@ 2015-11-16 8:32 ` Christoph Hellwig
1 sibling, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2015-11-16 8:32 UTC (permalink / raw)
On Mon, Nov 16, 2015@12:08:58AM -0800, Ming Lin wrote:
> But the tests I have done so far didn't show competitive performance
> compared with vhost-scsi. Maybe because the mmio thing is slow.
It is. Take a look at the google patches for the NVMe even index
extension, which make it behave a lot more like virtio.
^ permalink raw reply [flat|nested] 24+ messages in thread