All of lore.kernel.org
 help / color / mirror / Atom feed
* NVMe driver split for Linux 4.5
@ 2015-11-30  8:36 Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 01/23] blk-mq: add a flags parameter to blk_mq_alloc_request Christoph Hellwig
                   ` (16 more replies)
  0 siblings, 17 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


This series contains a the split of the NVMe driver into a common core part
and a PCIe specific fontend.
nvme driver split to show how I want to get started.

To make testing easier I've pushed out a git tree with this and all
dependencies:

    git://git.infradead.org/users/hch/block.git nvme-split.6

or in gitweb:

    http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/nvme-split.6

Chances since the previous post from the patchbomb:
  - use vendor ID from Identify data in the SCSI translation layer
  - add common enable/shutdown code from the NVMe loop series

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 01/23] blk-mq: add a flags parameter to blk_mq_alloc_request
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 02/23] nvme: move struct nvme_iod to pci.c Christoph Hellwig
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


We already have the reserved flag, and a nowait flag awkwardly encoded as
a gfp_t.  Add a real flags argument to make the scheme more extensible and
allow for a nicer calling convention.

Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 block/blk-core.c                  | 11 ++++++-----
 block/blk-mq-tag.c                | 11 +++++------
 block/blk-mq.c                    | 20 ++++++++------------
 block/blk-mq.h                    | 11 ++++-------
 drivers/block/mtip32xx/mtip32xx.c |  2 +-
 drivers/block/null_blk.c          |  2 +-
 drivers/nvme/host/lightnvm.c      |  2 +-
 drivers/nvme/host/pci.c           | 11 ++++++-----
 fs/block_dev.c                    |  4 ++--
 include/linux/blk-mq.h            |  8 +++++++-
 include/linux/blkdev.h            |  2 +-
 11 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index c88a946..5ec9960 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -630,7 +630,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-int blk_queue_enter(struct request_queue *q, gfp_t gfp)
+int blk_queue_enter(struct request_queue *q, bool nowait)
 {
 	while (true) {
 		int ret;
@@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
 		if (percpu_ref_tryget_live(&q->q_usage_counter))
 			return 0;
 
-		if (!gfpflags_allow_blocking(gfp))
+		if (nowait)
 			return -EBUSY;
 
 		ret = wait_event_interruptible(q->mq_freeze_wq,
@@ -1276,7 +1276,9 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	if (q->mq_ops)
-		return blk_mq_alloc_request(q, rw, gfp_mask, false);
+		return blk_mq_alloc_request(q, rw,
+			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
+				0 : BLK_MQ_REQ_NOWAIT);
 	else
 		return blk_old_get_request(q, rw, gfp_mask);
 }
@@ -2044,8 +2046,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
-
+		if (likely(blk_queue_enter(q, false) == 0)) {
 			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a07ca34..abdbb47 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
 	if (tag != -1)
 		return tag;
 
-	if (!gfpflags_allow_blocking(data->gfp))
+	if (data->flags & BLK_MQ_REQ_NOWAIT)
 		return -1;
 
 	bs = bt_wait_ptr(bt, hctx);
@@ -303,7 +303,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
 		data->ctx = blk_mq_get_ctx(data->q);
 		data->hctx = data->q->mq_ops->map_queue(data->q,
 				data->ctx->cpu);
-		if (data->reserved) {
+		if (data->flags & BLK_MQ_REQ_RESERVED) {
 			bt = &data->hctx->tags->breserved_tags;
 		} else {
 			last_tag = &data->ctx->last_tag;
@@ -349,10 +349,9 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
 
 unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 {
-	if (!data->reserved)
-		return __blk_mq_get_tag(data);
-
-	return __blk_mq_get_reserved_tag(data);
+	if (data->flags & BLK_MQ_REQ_RESERVED)
+		return __blk_mq_get_reserved_tag(data);
+	return __blk_mq_get_tag(data);
 }
 
 static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6d6f8fe..93a4e19 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -229,8 +229,8 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
 	return NULL;
 }
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
-		bool reserved)
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+		unsigned int flags)
 {
 	struct blk_mq_ctx *ctx;
 	struct blk_mq_hw_ctx *hctx;
@@ -238,24 +238,22 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
 	struct blk_mq_alloc_data alloc_data;
 	int ret;
 
-	ret = blk_queue_enter(q, gfp);
+	ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
 	if (ret)
 		return ERR_PTR(ret);
 
 	ctx = blk_mq_get_ctx(q);
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
-	blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
-			reserved, ctx, hctx);
+	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
 
 	rq = __blk_mq_alloc_request(&alloc_data, rw);
-	if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
+	if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
 		__blk_mq_run_hw_queue(hctx);
 		blk_mq_put_ctx(ctx);
 
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
-		blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx,
-				hctx);
+		blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
 		rq =  __blk_mq_alloc_request(&alloc_data, rw);
 		ctx = alloc_data.ctx;
 	}
@@ -1175,8 +1173,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 		rw |= REQ_SYNC;
 
 	trace_block_getrq(q, bio, rw);
-	blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx,
-			hctx);
+	blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
 	rq = __blk_mq_alloc_request(&alloc_data, rw);
 	if (unlikely(!rq)) {
 		__blk_mq_run_hw_queue(hctx);
@@ -1185,8 +1182,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
-		blk_mq_set_alloc_data(&alloc_data, q,
-				__GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx);
+		blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
 		rq = __blk_mq_alloc_request(&alloc_data, rw);
 		ctx = alloc_data.ctx;
 		hctx = alloc_data.hctx;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 713820b..eaede8e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -96,8 +96,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
 struct blk_mq_alloc_data {
 	/* input parameter */
 	struct request_queue *q;
-	gfp_t gfp;
-	bool reserved;
+	unsigned int flags;
 
 	/* input & output parameter */
 	struct blk_mq_ctx *ctx;
@@ -105,13 +104,11 @@ struct blk_mq_alloc_data {
 };
 
 static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
-		struct request_queue *q, gfp_t gfp, bool reserved,
-		struct blk_mq_ctx *ctx,
-		struct blk_mq_hw_ctx *hctx)
+		struct request_queue *q, unsigned int flags,
+		struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
 {
 	data->q = q;
-	data->gfp = gfp;
-	data->reserved = reserved;
+	data->flags = flags;
 	data->ctx = ctx;
 	data->hctx = hctx;
 }
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3457ac8..10bd8d0 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 {
 	struct request *rq;
 
-	rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
+	rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED);
 	return blk_mq_rq_to_pdu(rq);
 }
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 5c8ba54..fa742dd 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -464,7 +464,7 @@ static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
 	struct request *rq;
 	struct bio *bio = rqd->bio;
 
-	rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
+	rq = blk_mq_alloc_request(q, bio_rw(bio), 0);
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 9202d1a..d5622f9 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -470,7 +470,7 @@ static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
 	struct bio *bio = rqd->bio;
 	struct nvme_nvm_command *cmd;
 
-	rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
+	rq = blk_mq_alloc_request(q, bio_rw(bio), 0);
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f3b53af..b8a0222 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1041,7 +1041,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	struct request *req;
 	int ret;
 
-	req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
+	req = blk_mq_alloc_request(q, write, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1094,7 +1094,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
 	struct nvme_cmd_info *cmd_info;
 	struct request *req;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
+	req = blk_mq_alloc_request(dev->admin_q, WRITE,
+			BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1119,7 +1120,7 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
 	struct request *req;
 	struct nvme_cmd_info *cmd_rq;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
+	req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1320,8 +1321,8 @@ static void nvme_abort_req(struct request *req)
 	if (!dev->abort_limit)
 		return;
 
-	abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC,
-									false);
+	abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
+			BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(abort_req))
 		return;
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c25639e..aa1a4598 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -395,7 +395,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 	if (!ops->rw_page || bdev_get_integrity(bdev))
 		return result;
 
-	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	result = blk_queue_enter(bdev->bd_queue, false);
 	if (result)
 		return result;
 	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
@@ -432,7 +432,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 
 	if (!ops->rw_page || bdev_get_integrity(bdev))
 		return -EOPNOTSUPP;
-	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	result = blk_queue_enter(bdev->bd_queue, false);
 	if (result)
 		return result;
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index daf17d7..7fc9296 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -188,8 +188,14 @@ void blk_mq_insert_request(struct request *, bool, bool, bool);
 void blk_mq_free_request(struct request *rq);
 void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
+
+enum {
+	BLK_MQ_REQ_NOWAIT	= (1 << 0), /* return when out of requests */
+	BLK_MQ_REQ_RESERVED	= (1 << 1), /* allocate from reserved pool */
+};
+
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
-		gfp_t gfp, bool reserved);
+		unsigned int flags);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c0d2b79..e711f29 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -794,7 +794,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
-extern int blk_queue_enter(struct request_queue *q, gfp_t gfp);
+extern int blk_queue_enter(struct request_queue *q, bool nowait);
 extern void blk_queue_exit(struct request_queue *q);
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 02/23] nvme: move struct nvme_iod to pci.c
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 01/23] blk-mq: add a flags parameter to blk_mq_alloc_request Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 03/23] nvme: split command submission helpers out of pci.c Christoph Hellwig
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


This structure is specific to the PCIe driver internals and should be moved
to pci.c.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Acked-by: Keith Busch <keith.busch at intel.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/nvme.h | 17 -----------------
 drivers/nvme/host/pci.c  | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fdb4e5b..2cead2c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -94,23 +94,6 @@ struct nvme_ns {
 	u32 mode_select_block_len;
 };
 
-/*
- * The nvme_iod describes the data in an I/O, including the list of PRP
- * entries.  You can't see it in this data structure because C doesn't let
- * me express that.  Use nvme_alloc_iod to ensure there's enough space
- * allocated to store the PRP list.
- */
-struct nvme_iod {
-	unsigned long private;	/* For the use of the submitter of the I/O */
-	int npages;		/* In the PRP list. 0 means small pool in use */
-	int offset;		/* Of PRP list */
-	int nents;		/* Used in scatterlist */
-	int length;		/* Of data, in bytes */
-	dma_addr_t first_dma;
-	struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
-	struct scatterlist sg[0];
-};
-
 static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 {
 	return (sector >> (ns->lba_shift - 9));
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b8a0222..0f24d3c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -130,6 +130,23 @@ struct nvme_queue {
 };
 
 /*
+ * The nvme_iod describes the data in an I/O, including the list of PRP
+ * entries.  You can't see it in this data structure because C doesn't let
+ * me express that.  Use nvme_alloc_iod to ensure there's enough space
+ * allocated to store the PRP list.
+ */
+struct nvme_iod {
+	unsigned long private;	/* For the use of the submitter of the I/O */
+	int npages;		/* In the PRP list. 0 means small pool in use */
+	int offset;		/* Of PRP list */
+	int nents;		/* Used in scatterlist */
+	int length;		/* Of data, in bytes */
+	dma_addr_t first_dma;
+	struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
+	struct scatterlist sg[0];
+};
+
+/*
  * Check we didin't inadvertently grow the command struct
  */
 static inline void _nvme_check_size(void)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 03/23] nvme: split command submission helpers out of pci.c
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 01/23] blk-mq: add a flags parameter to blk_mq_alloc_request Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 02/23] nvme: move struct nvme_iod to pci.c Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 04/23] nvme: use offset instead of a struct for registers Christoph Hellwig
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


Create a new core.c and start by adding the command submission helpers
to it, which are already abstracted away from the actual hardware queues
by the block layer.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Acked-by: Keith Busch <keith.busch at intel.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/Makefile |   2 +-
 drivers/nvme/host/core.c   | 173 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h   |   3 +
 drivers/nvme/host/pci.c    | 155 +---------------------------------------
 4 files changed, 178 insertions(+), 155 deletions(-)
 create mode 100644 drivers/nvme/host/core.c

diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 219dc206..3e26dc9 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,4 +1,4 @@
 
 obj-$(CONFIG_BLK_DEV_NVME)     += nvme.o
 
-nvme-y		+= pci.o scsi.o lightnvm.o
+nvme-y		+= core.o pci.o scsi.o lightnvm.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
new file mode 100644
index 0000000..ce938a4
--- /dev/null
+++ b/drivers/nvme/host/core.c
@@ -0,0 +1,173 @@
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "nvme.h"
+
+/*
+ * Returns 0 on success.  If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, void __user *ubuffer, unsigned bufflen,
+		u32 *result, unsigned timeout)
+{
+	bool write = cmd->common.opcode & 1;
+	struct bio *bio = NULL;
+	struct request *req;
+	int ret;
+
+	req = blk_mq_alloc_request(q, write, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->cmd_type = REQ_TYPE_DRV_PRIV;
+	req->cmd_flags |= REQ_FAILFAST_DRIVER;
+	req->__data_len = 0;
+	req->__sector = (sector_t) -1;
+	req->bio = req->biotail = NULL;
+
+	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+	req->cmd = (unsigned char *)cmd;
+	req->cmd_len = sizeof(struct nvme_command);
+	req->special = (void *)0;
+
+	if (buffer && bufflen) {
+		ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
+		if (ret)
+			goto out;
+	} else if (ubuffer && bufflen) {
+		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+				GFP_KERNEL);
+		if (ret)
+			goto out;
+		bio = req->bio;
+	}
+
+	blk_execute_rq(req->q, NULL, req, 0);
+	if (bio)
+		blk_rq_unmap_user(bio);
+	if (result)
+		*result = (u32)(uintptr_t)req->special;
+	ret = req->errors;
+ out:
+	blk_mq_free_request(req);
+	return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, unsigned bufflen)
+{
+	return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
+}
+
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
+{
+	struct nvme_command c = { };
+	int error;
+
+	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+	c.identify.opcode = nvme_admin_identify;
+	c.identify.cns = cpu_to_le32(1);
+
+	*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+	if (!*id)
+		return -ENOMEM;
+
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+			sizeof(struct nvme_id_ctrl));
+	if (error)
+		kfree(*id);
+	return error;
+}
+
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+		struct nvme_id_ns **id)
+{
+	struct nvme_command c = { };
+	int error;
+
+	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+	c.identify.opcode = nvme_admin_identify,
+	c.identify.nsid = cpu_to_le32(nsid),
+
+	*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+	if (!*id)
+		return -ENOMEM;
+
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+			sizeof(struct nvme_id_ns));
+	if (error)
+		kfree(*id);
+	return error;
+}
+
+int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+					dma_addr_t dma_addr, u32 *result)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_get_features;
+	c.features.nsid = cpu_to_le32(nsid);
+	c.features.prp1 = cpu_to_le64(dma_addr);
+	c.features.fid = cpu_to_le32(fid);
+
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+			result, 0);
+}
+
+int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+					dma_addr_t dma_addr, u32 *result)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_set_features;
+	c.features.prp1 = cpu_to_le64(dma_addr);
+	c.features.fid = cpu_to_le32(fid);
+	c.features.dword11 = cpu_to_le32(dword11);
+
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+			result, 0);
+}
+
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+{
+	struct nvme_command c = { };
+	int error;
+
+	c.common.opcode = nvme_admin_get_log_page,
+	c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+	c.common.cdw10[0] = cpu_to_le32(
+			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+			 NVME_LOG_SMART),
+
+	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+	if (!*log)
+		return -ENOMEM;
+
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+			sizeof(struct nvme_smart_log));
+	if (error)
+		kfree(*log);
+	return error;
+}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2cead2c..a53977c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -22,6 +22,9 @@
 extern unsigned char nvme_io_timeout;
 #define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
+extern unsigned char admin_timeout;
+#define ADMIN_TIMEOUT	(admin_timeout * HZ)
+
 enum {
 	NVME_NS_LBA		= 0,
 	NVME_NS_LIGHTNVM	= 1,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0f24d3c..9963562 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -52,10 +52,9 @@
 #define NVME_AQ_DEPTH		256
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
-#define ADMIN_TIMEOUT		(admin_timeout * HZ)
 #define SHUTDOWN_TIMEOUT	(shutdown_timeout * HZ)
 
-static unsigned char admin_timeout = 60;
+unsigned char admin_timeout = 60;
 module_param(admin_timeout, byte, 0644);
 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
 
@@ -1045,65 +1044,6 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	return 0;
 }
 
-/*
- * Returns 0 on success.  If the result is negative, it's a Linux error code;
- * if the result is positive, it's an NVM Express status code
- */
-int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, void __user *ubuffer, unsigned bufflen,
-		u32 *result, unsigned timeout)
-{
-	bool write = cmd->common.opcode & 1;
-	struct bio *bio = NULL;
-	struct request *req;
-	int ret;
-
-	req = blk_mq_alloc_request(q, write, 0);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->cmd_type = REQ_TYPE_DRV_PRIV;
-	req->cmd_flags |= REQ_FAILFAST_DRIVER;
-	req->__data_len = 0;
-	req->__sector = (sector_t) -1;
-	req->bio = req->biotail = NULL;
-
-	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
-
-	req->cmd = (unsigned char *)cmd;
-	req->cmd_len = sizeof(struct nvme_command);
-	req->special = (void *)0;
-
-	if (buffer && bufflen) {
-		ret = blk_rq_map_kern(q, req, buffer, bufflen,
-				      __GFP_DIRECT_RECLAIM);
-		if (ret)
-			goto out;
-	} else if (ubuffer && bufflen) {
-		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
-				      __GFP_DIRECT_RECLAIM);
-		if (ret)
-			goto out;
-		bio = req->bio;
-	}
-
-	blk_execute_rq(req->q, NULL, req, 0);
-	if (bio)
-		blk_rq_unmap_user(bio);
-	if (result)
-		*result = (u32)(uintptr_t)req->special;
-	ret = req->errors;
- out:
-	blk_mq_free_request(req);
-	return ret;
-}
-
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, unsigned bufflen)
-{
-	return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
-}
-
 static int nvme_submit_async_admin_req(struct nvme_dev *dev)
 {
 	struct nvme_queue *nvmeq = dev->queues[0];
@@ -1216,99 +1156,6 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
-{
-	struct nvme_command c = { };
-	int error;
-
-	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
-	c.identify.opcode = nvme_admin_identify;
-	c.identify.cns = cpu_to_le32(1);
-
-	*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
-	if (!*id)
-		return -ENOMEM;
-
-	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
-			sizeof(struct nvme_id_ctrl));
-	if (error)
-		kfree(*id);
-	return error;
-}
-
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
-		struct nvme_id_ns **id)
-{
-	struct nvme_command c = { };
-	int error;
-
-	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
-	c.identify.opcode = nvme_admin_identify,
-	c.identify.nsid = cpu_to_le32(nsid),
-
-	*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
-	if (!*id)
-		return -ENOMEM;
-
-	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
-			sizeof(struct nvme_id_ns));
-	if (error)
-		kfree(*id);
-	return error;
-}
-
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
-					dma_addr_t dma_addr, u32 *result)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_get_features;
-	c.features.nsid = cpu_to_le32(nsid);
-	c.features.prp1 = cpu_to_le64(dma_addr);
-	c.features.fid = cpu_to_le32(fid);
-
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
-			result, 0);
-}
-
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
-					dma_addr_t dma_addr, u32 *result)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_set_features;
-	c.features.prp1 = cpu_to_le64(dma_addr);
-	c.features.fid = cpu_to_le32(fid);
-	c.features.dword11 = cpu_to_le32(dword11);
-
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
-			result, 0);
-}
-
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
-{
-	struct nvme_command c = { };
-	int error;
-
-	c.common.opcode = nvme_admin_get_log_page,
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF),
-	c.common.cdw10[0] = cpu_to_le32(
-			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
-			 NVME_LOG_SMART),
-
-	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
-	if (!*log)
-		return -ENOMEM;
-
-	error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
-			sizeof(struct nvme_smart_log));
-	if (error)
-		kfree(*log);
-	return error;
-}
-
 /**
  * nvme_abort_req - Attempt aborting a request
  *
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 04/23] nvme: use offset instead of a struct for registers
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 03/23] nvme: split command submission helpers out of pci.c Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 05/23] nvme: split nvme_trans_device_id_page Christoph Hellwig
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


This makes life easier for future non-PCI drivers where access to the
registers might be more complicated.  Note that Linux drivers are
pretty evenly split between the two versions, and in fact the NVMe
driver already uses offsets for the doorbells.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Acked-by: Keith Busch <keith.busch at intel.com>
[Fixed CMBSZ offset]
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/nvme.h |  2 +-
 drivers/nvme/host/pci.c  | 60 ++++++++++++++++++++++++++----------------------
 drivers/nvme/host/scsi.c |  6 ++---
 include/linux/nvme.h     | 27 +++++++++++-----------
 4 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a53977c..66550b7 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -51,7 +51,7 @@ struct nvme_dev {
 	u32 db_stride;
 	u32 ctrl_config;
 	struct msix_entry *entry;
-	struct nvme_bar __iomem *bar;
+	void __iomem *bar;
 	struct list_head namespaces;
 	struct kref kref;
 	struct device *device;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9963562..bfea7ec 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1322,7 +1322,7 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 
 	/* Don't tell the adapter to delete the admin queue.
 	 * Don't tell a removed adapter to delete IO queues. */
-	if (qid && readl(&dev->bar->csts) != -1) {
+	if (qid && readl(dev->bar + NVME_REG_CSTS) != -1) {
 		adapter_delete_sq(dev, qid);
 		adapter_delete_cq(dev, qid);
 	}
@@ -1475,7 +1475,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
 
 	timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
 
-	while ((readl(&dev->bar->csts) & NVME_CSTS_RDY) != bit) {
+	while ((readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_RDY) != bit) {
 		msleep(100);
 		if (fatal_signal_pending(current))
 			return -EINTR;
@@ -1500,7 +1500,7 @@ static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap)
 {
 	dev->ctrl_config &= ~NVME_CC_SHN_MASK;
 	dev->ctrl_config &= ~NVME_CC_ENABLE;
-	writel(dev->ctrl_config, &dev->bar->cc);
+	writel(dev->ctrl_config, dev->bar + NVME_REG_CC);
 
 	return nvme_wait_ready(dev, cap, false);
 }
@@ -1509,7 +1509,7 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
 {
 	dev->ctrl_config &= ~NVME_CC_SHN_MASK;
 	dev->ctrl_config |= NVME_CC_ENABLE;
-	writel(dev->ctrl_config, &dev->bar->cc);
+	writel(dev->ctrl_config, dev->bar + NVME_REG_CC);
 
 	return nvme_wait_ready(dev, cap, true);
 }
@@ -1521,10 +1521,10 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
 	dev->ctrl_config &= ~NVME_CC_SHN_MASK;
 	dev->ctrl_config |= NVME_CC_SHN_NORMAL;
 
-	writel(dev->ctrl_config, &dev->bar->cc);
+	writel(dev->ctrl_config, dev->bar + NVME_REG_CC);
 
 	timeout = SHUTDOWN_TIMEOUT + jiffies;
-	while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
+	while ((readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_SHST_MASK) !=
 							NVME_CSTS_SHST_CMPLT) {
 		msleep(100);
 		if (fatal_signal_pending(current))
@@ -1600,7 +1600,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
-	u64 cap = lo_hi_readq(&dev->bar->cap);
+	u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 	struct nvme_queue *nvmeq;
 	/*
 	 * default to a 4K page size, with the intention to update this
@@ -1618,11 +1618,12 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 		return -ENODEV;
 	}
 
-	dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
+	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ?
 						NVME_CAP_NSSRC(cap) : 0;
 
-	if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
-		writel(NVME_CSTS_NSSRO, &dev->bar->csts);
+	if (dev->subsystem &&
+	    (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
+		writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
 
 	result = nvme_disable_ctrl(dev, cap);
 	if (result < 0)
@@ -1645,9 +1646,9 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
 	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
-	writel(aqa, &dev->bar->aqa);
-	lo_hi_writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
-	lo_hi_writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
+	writel(aqa, dev->bar + NVME_REG_AQA);
+	lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
+	lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
 
 	result = nvme_enable_ctrl(dev, cap);
 	if (result)
@@ -1789,7 +1790,7 @@ static int nvme_subsys_reset(struct nvme_dev *dev)
 	if (!dev->subsystem)
 		return -ENOTTY;
 
-	writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
+	writel(0x4E564D65, dev->bar + NVME_REG_NSSR); /* "NVMe" */
 	return 0;
 }
 
@@ -2076,14 +2077,14 @@ static int nvme_kthread(void *data)
 		spin_lock(&dev_list_lock);
 		list_for_each_entry_safe(dev, next, &dev_list, node) {
 			int i;
-			u32 csts = readl(&dev->bar->csts);
+			u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
 			if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
 							csts & NVME_CSTS_CFS) {
 				if (!__nvme_reset(dev)) {
 					dev_warn(dev->dev,
 						"Failed status: %x, reset controller\n",
-						readl(&dev->bar->csts));
+						readl(dev->bar + NVME_REG_CSTS));
 				}
 				continue;
 			}
@@ -2243,11 +2244,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 	if (!use_cmb_sqes)
 		return NULL;
 
-	dev->cmbsz = readl(&dev->bar->cmbsz);
+	dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
 	if (!(NVME_CMB_SZ(dev->cmbsz)))
 		return NULL;
 
-	cmbloc = readl(&dev->bar->cmbloc);
+	cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
 
 	szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
 	size = szu * NVME_CMB_SZ(dev->cmbsz);
@@ -2321,7 +2322,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 				return -ENOMEM;
 			size = db_bar_size(dev, nr_io_queues);
 		} while (1);
-		dev->dbs = ((void __iomem *)dev->bar) + 4096;
+		dev->dbs = dev->bar + 4096;
 		adminq->q_db = dev->dbs;
 	}
 
@@ -2397,8 +2398,9 @@ static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
 
 static inline bool nvme_io_incapable(struct nvme_dev *dev)
 {
-	return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
-							dev->online_queues < 2);
+	return (!dev->bar ||
+		readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_CFS ||
+		dev->online_queues < 2);
 }
 
 static void nvme_ns_remove(struct nvme_ns *ns)
@@ -2478,7 +2480,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int res;
 	struct nvme_id_ctrl *ctrl;
-	int shift = NVME_CAP_MPSMIN(lo_hi_readq(&dev->bar->cap)) + 12;
+	int shift = NVME_CAP_MPSMIN(lo_hi_readq(dev->bar + NVME_REG_CAP)) + 12;
 
 	res = nvme_identify_ctrl(dev, &ctrl);
 	if (res) {
@@ -2554,7 +2556,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (!dev->bar)
 		goto disable;
 
-	if (readl(&dev->bar->csts) == -1) {
+	if (readl(dev->bar + NVME_REG_CSTS) == -1) {
 		result = -ENODEV;
 		goto unmap;
 	}
@@ -2569,11 +2571,12 @@ static int nvme_dev_map(struct nvme_dev *dev)
 			goto unmap;
 	}
 
-	cap = lo_hi_readq(&dev->bar->cap);
+	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+
 	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
 	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
-	dev->dbs = ((void __iomem *)dev->bar) + 4096;
-	if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
+	dev->dbs = dev->bar + 4096;
+	if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
 		dev->cmb = nvme_map_cmb(dev);
 
 	return 0;
@@ -2632,7 +2635,8 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
 			 * queues than admin tags.
 			 */
 			set_current_state(TASK_RUNNING);
-			nvme_disable_ctrl(dev, lo_hi_readq(&dev->bar->cap));
+			nvme_disable_ctrl(dev,
+				lo_hi_readq(dev->bar + NVME_REG_CAP));
 			nvme_clear_queue(dev->queues[0]);
 			flush_kthread_worker(dq->worker);
 			nvme_disable_queue(dev, 0);
@@ -2808,7 +2812,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 
 	if (dev->bar) {
 		nvme_freeze_queues(dev);
-		csts = readl(&dev->bar->csts);
+		csts = readl(dev->bar + NVME_REG_CSTS);
 	}
 	if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
 		for (i = dev->queue_count - 1; i >= 0; i--) {
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index c3d8d38..8586994 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -611,7 +611,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	memset(inq_response, 0, alloc_len);
 	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;    /* Page Code */
-	if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
+	if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1)) {
 		struct nvme_id_ns *id_ns;
 		void *eui;
 		int len;
@@ -623,7 +623,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 		eui = id_ns->eui64;
 		len = sizeof(id_ns->eui64);
-		if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
+		if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) {
 			if (bitmap_empty(eui, len * 8)) {
 				eui = id_ns->nguid;
 				len = sizeof(id_ns->nguid);
@@ -2297,7 +2297,7 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
 {
 	struct nvme_dev *dev = ns->dev;
 
-	if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
+	if (!(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_RDY))
 		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					    NOT_READY, SCSI_ASC_LUN_NOT_READY,
 					    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 3af5f45..a55986f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -17,20 +17,19 @@
 
 #include <linux/types.h>
 
-struct nvme_bar {
-	__u64			cap;	/* Controller Capabilities */
-	__u32			vs;	/* Version */
-	__u32			intms;	/* Interrupt Mask Set */
-	__u32			intmc;	/* Interrupt Mask Clear */
-	__u32			cc;	/* Controller Configuration */
-	__u32			rsvd1;	/* Reserved */
-	__u32			csts;	/* Controller Status */
-	__u32			nssr;	/* Subsystem Reset */
-	__u32			aqa;	/* Admin Queue Attributes */
-	__u64			asq;	/* Admin SQ Base Address */
-	__u64			acq;	/* Admin CQ Base Address */
-	__u32			cmbloc; /* Controller Memory Buffer Location */
-	__u32			cmbsz;  /* Controller Memory Buffer Size */
+enum {
+	NVME_REG_CAP	= 0x0000,	/* Controller Capabilities */
+	NVME_REG_VS	= 0x0008,	/* Version */
+	NVME_REG_INTMS	= 0x000c,	/* Interrupt Mask Set */
+	NVME_REG_INTMC	= 0x0010,	/* Interrupt Mask Set */
+	NVME_REG_CC	= 0x0014,	/* Controller Configuration */
+	NVME_REG_CSTS	= 0x001c,	/* Controller Status */
+	NVME_REG_NSSR	= 0x0020,	/* NVM Subsystem Reset */
+	NVME_REG_AQA	= 0x0024,	/* Admin Queue Attributes */
+	NVME_REG_ASQ	= 0x0028,	/* Admin SQ Base Address */
+	NVME_REG_ACQ	= 0x0030,	/* Admin SQ Base Address */
+	NVME_REG_CMBLOC = 0x0038,	/* Controller Memory Buffer Location */
+	NVME_REG_CMBSZ	= 0x003c,	/* Controller Memory Buffer Size */
 };
 
 #define NVME_CAP_MQES(cap)	((cap) & 0xffff)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 05/23] nvme: split nvme_trans_device_id_page
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 04/23] nvme: use offset instead of a struct for registers Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 06/23] nvme: use vendor it from identify Christoph Hellwig
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/host/scsi.c | 135 +++++++++++++++++++++++++++--------------------
 1 file changed, 79 insertions(+), 56 deletions(-)

diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index 8586994..b42cf44 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -600,70 +600,93 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
 	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *inq_response, int alloc_len)
+static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+		u8 *inq_response, int alloc_len)
 {
-	struct nvme_dev *dev = ns->dev;
-	int res;
-	int nvme_sc;
-	int xfer_len;
-	__be32 tmp_id = cpu_to_be32(ns->ns_id);
+	struct nvme_id_ns *id_ns;
+	int nvme_sc, res;
+	size_t len;
+	void *eui;
 
-	memset(inq_response, 0, alloc_len);
-	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;    /* Page Code */
-	if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1)) {
-		struct nvme_id_ns *id_ns;
-		void *eui;
-		int len;
+	nvme_sc = nvme_identify_ns(ns->dev, ns->ns_id, &id_ns);
+	res = nvme_trans_status_code(hdr, nvme_sc);
+	if (res)
+		return res;
 
-		nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
-		res = nvme_trans_status_code(hdr, nvme_sc);
-		if (res)
-			return res;
+	eui = id_ns->eui64;
+	len = sizeof(id_ns->eui64);
 
-		eui = id_ns->eui64;
-		len = sizeof(id_ns->eui64);
-		if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) {
-			if (bitmap_empty(eui, len * 8)) {
-				eui = id_ns->nguid;
-				len = sizeof(id_ns->nguid);
-			}
-		}
+	if (readl(ns->dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) {
 		if (bitmap_empty(eui, len * 8)) {
-			kfree(id_ns);
-			goto scsi_string;
+			eui = id_ns->nguid;
+			len = sizeof(id_ns->nguid);
 		}
+	}
 
-		inq_response[3] = 4 + len; /* Page Length */
-		/* Designation Descriptor start */
-		inq_response[4] = 0x01;    /* Proto ID=0h | Code set=1h */
-		inq_response[5] = 0x02;    /* PIV=0b | Asso=00b | Designator Type=2h */
-		inq_response[6] = 0x00;    /* Rsvd */
-		inq_response[7] = len;     /* Designator Length */
-		memcpy(&inq_response[8], eui, len);
-		kfree(id_ns);
-	} else {
- scsi_string:
-		if (alloc_len < 72) {
-			return nvme_trans_completion(hdr,
-					SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		}
-		inq_response[3] = 0x48;    /* Page Length */
-		/* Designation Descriptor start */
-		inq_response[4] = 0x03;    /* Proto ID=0h | Code set=3h */
-		inq_response[5] = 0x08;    /* PIV=0b | Asso=00b | Designator Type=8h */
-		inq_response[6] = 0x00;    /* Rsvd */
-		inq_response[7] = 0x44;    /* Designator Length */
-
-		sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
-		memcpy(&inq_response[12], dev->model, sizeof(dev->model));
-		sprintf(&inq_response[52], "%04x", tmp_id);
-		memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+	if (bitmap_empty(eui, len * 8)) {
+		res = -EOPNOTSUPP;
+		goto out_free_id;
 	}
-	xfer_len = alloc_len;
-	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
+
+	memset(inq_response, 0, alloc_len);
+	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+	inq_response[3] = 4 + len; /* Page Length */
+
+	/* Designation Descriptor start */
+	inq_response[4] = 0x01;	/* Proto ID=0h | Code set=1h */
+	inq_response[5] = 0x02;	/* PIV=0b | Asso=00b | Designator Type=2h */
+	inq_response[6] = 0x00;	/* Rsvd */
+	inq_response[7] = len;	/* Designator Length */
+	memcpy(&inq_response[8], eui, len);
+
+	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+out_free_id:
+	kfree(id_ns);
+	return res;
+}
+
+static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
+		struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
+{
+	struct nvme_dev *dev = ns->dev;
+
+	if (alloc_len < 72) {
+		return nvme_trans_completion(hdr,
+				SAM_STAT_CHECK_CONDITION,
+				ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
+				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+	}
+
+	memset(inq_response, 0, alloc_len);
+	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+	inq_response[3] = 0x48;	/* Page Length */
+
+	/* Designation Descriptor start */
+	inq_response[4] = 0x03;	/* Proto ID=0h | Code set=3h */
+	inq_response[5] = 0x08;	/* PIV=0b | Asso=00b | Designator Type=8h */
+	inq_response[6] = 0x00;	/* Rsvd */
+	inq_response[7] = 0x44;	/* Designator Length */
+
+	sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
+	memcpy(&inq_response[12], dev->model, sizeof(dev->model));
+	sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
+	memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+
+	return nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+}
+
+static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+					u8 *resp, int alloc_len)
+{
+	int res;
+
+	if (readl(ns->dev->bar + NVME_REG_VS) >= NVME_VS(1, 1)) {
+		res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
+		if (res != -EOPNOTSUPP)
+			return res;
+	}
+
+	return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
 }
 
 static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 06/23] nvme: use vendor it from identify
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 05/23] nvme: split nvme_trans_device_id_page Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 07/23] nvme: split a new struct nvme_ctrl out of struct nvme_dev Christoph Hellwig
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


Use the vendor ID from the identify data instead of the PCI device to
make the SCSI translation layer independent from the PCI driver.  The NVMe
spec defines them as having the same value for current PCIe devices.

Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/host/scsi.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index b42cf44..0bf90b6 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -649,6 +649,8 @@ static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
 		struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
 {
 	struct nvme_dev *dev = ns->dev;
+	struct nvme_id_ctrl *id_ctrl;
+	int nvme_sc, res;
 
 	if (alloc_len < 72) {
 		return nvme_trans_completion(hdr,
@@ -657,6 +659,11 @@ static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
 				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	}
 
+	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+	res = nvme_trans_status_code(hdr, nvme_sc);
+	if (res)
+		return res;
+
 	memset(inq_response, 0, alloc_len);
 	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
 	inq_response[3] = 0x48;	/* Page Length */
@@ -667,12 +674,14 @@ static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
 	inq_response[6] = 0x00;	/* Rsvd */
 	inq_response[7] = 0x44;	/* Designator Length */
 
-	sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
+	sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
 	memcpy(&inq_response[12], dev->model, sizeof(dev->model));
 	sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
 	memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
 
-	return nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+	kfree(id_ctrl);
+	return res;
 }
 
 static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 07/23] nvme: split a new struct nvme_ctrl out of struct nvme_dev
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 06/23] nvme: use vendor it from identify Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 08/23] nvme: simplify nvme_setup_prps calling convention Christoph Hellwig
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


The new struct nvme_ctrl will be used by the common NVMe code that sits
on top of struct request_queue and the new nvme_ctrl_ops abstraction.
It only contains the bare minimum required, which consists of values
sampled during controller probe, the admin queue pointer and a second
struct device pointer at the moment, but more will follow later.  Only
values that are not used in the I/O fast path should be moved to
struct nvme_ctrl so that drivers can optimize their cache line usage
easily.  That's also the reason why we have two device pointers as
the struct device is used for DMA mapping purposes.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Acked-by: Keith Busch <keith.busch at intel.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c |  10 +--
 drivers/nvme/host/nvme.h |  61 ++++++---------
 drivers/nvme/host/pci.c  | 190 +++++++++++++++++++++++++++++++----------------
 drivers/nvme/host/scsi.c |  89 ++++++++++------------
 4 files changed, 193 insertions(+), 157 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ce938a4..ca54a34 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -79,7 +79,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
 }
 
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
 	int error;
@@ -99,7 +99,7 @@ int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
 	return error;
 }
 
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id)
 {
 	struct nvme_command c = { };
@@ -120,7 +120,7 @@ int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
 	return error;
 }
 
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 					dma_addr_t dma_addr, u32 *result)
 {
 	struct nvme_command c;
@@ -135,7 +135,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 			result, 0);
 }
 
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 					dma_addr_t dma_addr, u32 *result)
 {
 	struct nvme_command c;
@@ -150,7 +150,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 			result, 0);
 }
 
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
 {
 	struct nvme_command c = { };
 	int error;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 66550b7..19583e1 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -30,46 +30,16 @@ enum {
 	NVME_NS_LIGHTNVM	= 1,
 };
 
-/*
- * Represents an NVM Express device.  Each nvme_dev is a PCI function.
- */
-struct nvme_dev {
-	struct list_head node;
-	struct nvme_queue **queues;
+struct nvme_ctrl {
+	const struct nvme_ctrl_ops *ops;
 	struct request_queue *admin_q;
-	struct blk_mq_tag_set tagset;
-	struct blk_mq_tag_set admin_tagset;
-	u32 __iomem *dbs;
 	struct device *dev;
-	struct dma_pool *prp_page_pool;
-	struct dma_pool *prp_small_pool;
 	int instance;
-	unsigned queue_count;
-	unsigned online_queues;
-	unsigned max_qid;
-	int q_depth;
-	u32 db_stride;
-	u32 ctrl_config;
-	struct msix_entry *entry;
-	void __iomem *bar;
-	struct list_head namespaces;
-	struct kref kref;
-	struct device *device;
-	struct work_struct reset_work;
-	struct work_struct probe_work;
-	struct work_struct scan_work;
+
 	char name[12];
 	char serial[20];
 	char model[40];
 	char firmware_rev[8];
-	bool subsystem;
-	u32 max_hw_sectors;
-	u32 stripe_size;
-	u32 page_size;
-	void __iomem *cmb;
-	dma_addr_t cmb_dma_addr;
-	u64 cmb_size;
-	u32 cmbsz;
 	u16 oncs;
 	u16 abort_limit;
 	u8 event_limit;
@@ -82,7 +52,7 @@ struct nvme_dev {
 struct nvme_ns {
 	struct list_head list;
 
-	struct nvme_dev *dev;
+	struct nvme_ctrl *ctrl;
 	struct request_queue *queue;
 	struct gendisk *disk;
 	struct kref kref;
@@ -97,6 +67,19 @@ struct nvme_ns {
 	u32 mode_select_block_len;
 };
 
+struct nvme_ctrl_ops {
+	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+};
+
+static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
+{
+	u32 val = 0;
+
+	if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+		return false;
+	return val & NVME_CSTS_RDY;
+}
+
 static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 {
 	return (sector >> (ns->lba_shift - 9));
@@ -107,13 +90,13 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buffer, void __user *ubuffer, unsigned bufflen,
 		u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 			dma_addr_t dma_addr, u32 *result);
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 			dma_addr_t dma_addr, u32 *result);
 
 struct sg_io_hdr;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bfea7ec..8a564f4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -87,6 +87,9 @@ static wait_queue_head_t nvme_kthread_wait;
 
 static struct class *nvme_class;
 
+struct nvme_dev;
+struct nvme_queue;
+
 static int __nvme_reset(struct nvme_dev *dev);
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
@@ -102,6 +105,49 @@ struct async_cmd_info {
 };
 
 /*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct list_head node;
+	struct nvme_queue **queues;
+	struct blk_mq_tag_set tagset;
+	struct blk_mq_tag_set admin_tagset;
+	u32 __iomem *dbs;
+	struct device *dev;
+	struct dma_pool *prp_page_pool;
+	struct dma_pool *prp_small_pool;
+	unsigned queue_count;
+	unsigned online_queues;
+	unsigned max_qid;
+	int q_depth;
+	u32 db_stride;
+	u32 ctrl_config;
+	struct msix_entry *entry;
+	void __iomem *bar;
+	struct list_head namespaces;
+	struct kref kref;
+	struct device *device;
+	struct work_struct reset_work;
+	struct work_struct probe_work;
+	struct work_struct scan_work;
+	bool subsystem;
+	u32 max_hw_sectors;
+	u32 stripe_size;
+	u32 page_size;
+	void __iomem *cmb;
+	dma_addr_t cmb_dma_addr;
+	u64 cmb_size;
+	u32 cmbsz;
+
+	struct nvme_ctrl ctrl;
+};
+
+static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
+{
+	return container_of(ctrl, struct nvme_dev, ctrl);
+}
+
+/*
  * An NVM Express queue.  Each device has at least two (one for admin
  * commands and one for I/O commands).
  */
@@ -333,7 +379,7 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
-		++nvmeq->dev->event_limit;
+		++nvmeq->dev->ctrl.event_limit;
 	if (status != NVME_SC_SUCCESS)
 		return;
 
@@ -357,7 +403,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
 	blk_mq_free_request(req);
 
 	dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
-	++nvmeq->dev->abort_limit;
+	++nvmeq->dev->ctrl.abort_limit;
 }
 
 static void async_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1051,7 +1097,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
 	struct nvme_cmd_info *cmd_info;
 	struct request *req;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE,
+	req = blk_mq_alloc_request(dev->ctrl.admin_q, WRITE,
 			BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -1077,7 +1123,7 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
 	struct request *req;
 	struct nvme_cmd_info *cmd_rq;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
+	req = blk_mq_alloc_request(dev->ctrl.admin_q, WRITE, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1101,7 +1147,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 	c.delete_queue.opcode = opcode;
 	c.delete_queue.qid = cpu_to_le16(id);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1122,7 +1168,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	c.create_cq.cq_flags = cpu_to_le16(flags);
 	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1143,7 +1189,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	c.create_sq.sq_flags = cpu_to_le16(flags);
 	c.create_sq.cqid = cpu_to_le16(qid);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1182,10 +1228,10 @@ static void nvme_abort_req(struct request *req)
 		return;
 	}
 
-	if (!dev->abort_limit)
+	if (!dev->ctrl.abort_limit)
 		return;
 
-	abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
+	abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, WRITE,
 			BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(abort_req))
 		return;
@@ -1199,7 +1245,7 @@ static void nvme_abort_req(struct request *req)
 	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
 	cmd.abort.command_id = abort_req->tag;
 
-	--dev->abort_limit;
+	--dev->ctrl.abort_limit;
 	cmd_rq->aborted = 1;
 
 	dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
@@ -1294,8 +1340,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 	nvmeq->cq_vector = -1;
 	spin_unlock_irq(&nvmeq->q_lock);
 
-	if (!nvmeq->qid && nvmeq->dev->admin_q)
-		blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
+	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
+		blk_mq_freeze_queue_start(nvmeq->dev->ctrl.admin_q);
 
 	irq_set_affinity_hint(vector, NULL);
 	free_irq(vector, nvmeq);
@@ -1391,7 +1437,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->q_dmadev = dev->dev;
 	nvmeq->dev = dev;
 	snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
-			dev->instance, qid);
+			dev->ctrl.instance, qid);
 	spin_lock_init(&nvmeq->q_lock);
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
@@ -1559,15 +1605,15 @@ static struct blk_mq_ops nvme_mq_ops = {
 
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
-	if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
-		blk_cleanup_queue(dev->admin_q);
+	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
 }
 
 static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 {
-	if (!dev->admin_q) {
+	if (!dev->ctrl.admin_q) {
 		dev->admin_tagset.ops = &nvme_mq_admin_ops;
 		dev->admin_tagset.nr_hw_queues = 1;
 		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
@@ -1580,18 +1626,18 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 		if (blk_mq_alloc_tag_set(&dev->admin_tagset))
 			return -ENOMEM;
 
-		dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
-		if (IS_ERR(dev->admin_q)) {
+		dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
+		if (IS_ERR(dev->ctrl.admin_q)) {
 			blk_mq_free_tag_set(&dev->admin_tagset);
 			return -ENOMEM;
 		}
-		if (!blk_get_queue(dev->admin_q)) {
+		if (!blk_get_queue(dev->ctrl.admin_q)) {
 			nvme_dev_remove_admin(dev);
-			dev->admin_q = NULL;
+			dev->ctrl.admin_q = NULL;
 			return -ENODEV;
 		}
 	} else
-		blk_mq_unfreeze_queue(dev->admin_q);
+		blk_mq_unfreeze_queue(dev->ctrl.admin_q);
 
 	return 0;
 }
@@ -1670,7 +1716,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
 	struct nvme_user_io io;
 	struct nvme_command c;
 	unsigned length, meta_len;
@@ -1745,7 +1791,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
-static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			struct nvme_passthru_cmd __user *ucmd)
 {
 	struct nvme_passthru_cmd cmd;
@@ -1774,7 +1820,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
 	if (cmd.timeout_ms)
 		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
-	status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
+	status = __nvme_submit_sync_cmd(ns ? ns->queue : ctrl->admin_q, &c,
 			NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
 			&cmd.result, timeout);
 	if (status >= 0) {
@@ -1804,9 +1850,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		force_successful_syscall_return();
 		return ns->ns_id;
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
 	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
+		return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
 	case SG_GET_VERSION_NUM:
@@ -1836,6 +1882,7 @@ static void nvme_free_dev(struct kref *kref);
 static void nvme_free_ns(struct kref *kref)
 {
 	struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
 
 	if (ns->type == NVME_NS_LIGHTNVM)
 		nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
@@ -1844,7 +1891,7 @@ static void nvme_free_ns(struct kref *kref)
 	ns->disk->private_data = NULL;
 	spin_unlock(&dev_list_lock);
 
-	kref_put(&ns->dev->kref, nvme_free_dev);
+	kref_put(&dev->kref, nvme_free_dev);
 	put_disk(ns->disk);
 	kfree(ns);
 }
@@ -1893,15 +1940,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
 static int nvme_revalidate_disk(struct gendisk *disk)
 {
 	struct nvme_ns *ns = disk->private_data;
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
 	struct nvme_id_ns *id;
 	u8 lbaf, pi_type;
 	u16 old_ms;
 	unsigned short bs;
 
-	if (nvme_identify_ns(dev, ns->ns_id, &id)) {
+	if (nvme_identify_ns(&dev->ctrl, ns->ns_id, &id)) {
 		dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
-						dev->instance, ns->ns_id);
+						dev->ctrl.instance, ns->ns_id);
 		return -ENODEV;
 	}
 	if (id->ncap == 0) {
@@ -1957,7 +2004,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
-	if (dev->oncs & NVME_CTRL_ONCS_DSM)
+	if (dev->ctrl.oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 	blk_mq_unfreeze_queue(disk->queue);
 
@@ -2095,10 +2142,10 @@ static int nvme_kthread(void *data)
 				spin_lock_irq(&nvmeq->q_lock);
 				nvme_process_cq(nvmeq);
 
-				while ((i == 0) && (dev->event_limit > 0)) {
+				while (i == 0 && dev->ctrl.event_limit > 0) {
 					if (nvme_submit_async_admin_req(dev))
 						break;
-					dev->event_limit--;
+					dev->ctrl.event_limit--;
 				}
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
@@ -2124,7 +2171,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 		goto out_free_ns;
 	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
-	ns->dev = dev;
+	ns->ctrl = &dev->ctrl;
 	ns->queue->queuedata = ns;
 
 	disk = alloc_disk_node(0, node);
@@ -2145,7 +2192,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	}
 	if (dev->stripe_size)
 		blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
-	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
+	if (dev->ctrl.vwc & NVME_CTRL_VWC_PRESENT)
 		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
 	blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
 
@@ -2156,7 +2203,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	disk->queue = ns->queue;
 	disk->driverfs_dev = dev->device;
 	disk->flags = GENHD_FL_EXT_DEVT;
-	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
+	sprintf(disk->disk_name, "nvme%dn%d", dev->ctrl.instance, nsid);
 
 	/*
 	 * Initialize capacity to 0 until we establish the namespace format and
@@ -2221,7 +2268,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 	u32 result;
 	u32 q_count = (count - 1) | ((count - 1) << 16);
 
-	status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
+	status = nvme_set_features(&dev->ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
 								&result);
 	if (status < 0)
 		return status;
@@ -2405,7 +2452,8 @@ static inline bool nvme_io_incapable(struct nvme_dev *dev)
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
-	bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+	bool kill = nvme_io_incapable(to_nvme_dev(ns->ctrl)) &&
+			!blk_queue_dying(ns->queue);
 
 	if (kill)
 		blk_set_queue_dying(ns->queue);
@@ -2462,7 +2510,7 @@ static void nvme_dev_scan(struct work_struct *work)
 
 	if (!dev->tagset.tags)
 		return;
-	if (nvme_identify_ctrl(dev, &ctrl))
+	if (nvme_identify_ctrl(&dev->ctrl, &ctrl))
 		return;
 	nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
 	kfree(ctrl);
@@ -2482,18 +2530,18 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	struct nvme_id_ctrl *ctrl;
 	int shift = NVME_CAP_MPSMIN(lo_hi_readq(dev->bar + NVME_REG_CAP)) + 12;
 
-	res = nvme_identify_ctrl(dev, &ctrl);
+	res = nvme_identify_ctrl(&dev->ctrl, &ctrl);
 	if (res) {
 		dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
 		return -EIO;
 	}
 
-	dev->oncs = le16_to_cpup(&ctrl->oncs);
-	dev->abort_limit = ctrl->acl + 1;
-	dev->vwc = ctrl->vwc;
-	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
-	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
-	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
+	dev->ctrl.oncs = le16_to_cpup(&ctrl->oncs);
+	dev->ctrl.abort_limit = ctrl->acl + 1;
+	dev->ctrl.vwc = ctrl->vwc;
+	memcpy(dev->ctrl.serial, ctrl->sn, sizeof(ctrl->sn));
+	memcpy(dev->ctrl.model, ctrl->mn, sizeof(ctrl->mn));
+	memcpy(dev->ctrl.firmware_rev, ctrl->fr, sizeof(ctrl->fr));
 	if (ctrl->mdts)
 		dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
 	else
@@ -2728,7 +2776,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
 	DEFINE_KTHREAD_WORKER_ONSTACK(worker);
 	struct nvme_delq_ctx dq;
 	struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
-					&worker, "nvme%d", dev->instance);
+					&worker, "nvme%d", dev->ctrl.instance);
 
 	if (IS_ERR(kworker_task)) {
 		dev_err(dev->dev,
@@ -2879,14 +2927,14 @@ static int nvme_set_instance(struct nvme_dev *dev)
 	if (error)
 		return -ENODEV;
 
-	dev->instance = instance;
+	dev->ctrl.instance = instance;
 	return 0;
 }
 
 static void nvme_release_instance(struct nvme_dev *dev)
 {
 	spin_lock(&dev_list_lock);
-	ida_remove(&nvme_instance_ida, dev->instance);
+	ida_remove(&nvme_instance_ida, dev->ctrl.instance);
 	spin_unlock(&dev_list_lock);
 }
 
@@ -2899,8 +2947,8 @@ static void nvme_free_dev(struct kref *kref)
 	nvme_release_instance(dev);
 	if (dev->tagset.tags)
 		blk_mq_free_tag_set(&dev->tagset);
-	if (dev->admin_q)
-		blk_put_queue(dev->admin_q);
+	if (dev->ctrl.admin_q)
+		blk_put_queue(dev->ctrl.admin_q);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2914,8 +2962,8 @@ static int nvme_dev_open(struct inode *inode, struct file *f)
 
 	spin_lock(&dev_list_lock);
 	list_for_each_entry(dev, &dev_list, node) {
-		if (dev->instance == instance) {
-			if (!dev->admin_q) {
+		if (dev->ctrl.instance == instance) {
+			if (!dev->ctrl.admin_q) {
 				ret = -EWOULDBLOCK;
 				break;
 			}
@@ -2945,12 +2993,12 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(&dev->ctrl, NULL, (void __user *)arg);
 	case NVME_IOCTL_IO_CMD:
 		if (list_empty(&dev->namespaces))
 			return -ENOTTY;
 		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
-		return nvme_user_cmd(dev, ns, (void __user *)arg);
+		return nvme_user_cmd(&dev->ctrl, ns, (void __user *)arg);
 	case NVME_IOCTL_RESET:
 		dev_warn(dev->dev, "resetting controller\n");
 		return nvme_reset(dev);
@@ -3011,7 +3059,7 @@ static void nvme_probe_work(struct work_struct *work)
 	if (result)
 		goto free_tags;
 
-	dev->event_limit = 1;
+	dev->ctrl.event_limit = 1;
 
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
@@ -3029,8 +3077,8 @@ static void nvme_probe_work(struct work_struct *work)
 
  free_tags:
 	nvme_dev_remove_admin(dev);
-	blk_put_queue(dev->admin_q);
-	dev->admin_q = NULL;
+	blk_put_queue(dev->ctrl.admin_q);
+	dev->ctrl.admin_q = NULL;
 	dev->queues[0]->tags = NULL;
  disable:
 	nvme_disable_queue(dev, 0);
@@ -3058,7 +3106,7 @@ static void nvme_dead_ctrl(struct nvme_dev *dev)
 	dev_warn(dev->dev, "Device failed to resume\n");
 	kref_get(&dev->kref);
 	if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
-						dev->instance))) {
+						dev->ctrl.instance))) {
 		dev_err(dev->dev,
 			"Failed to start controller remove task\n");
 		kref_put(&dev->kref, nvme_free_dev);
@@ -3100,7 +3148,7 @@ static int nvme_reset(struct nvme_dev *dev)
 {
 	int ret;
 
-	if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+	if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
 		return -ENODEV;
 
 	spin_lock(&dev_list_lock);
@@ -3131,6 +3179,16 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
 }
 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
 
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+{
+	*val = readl(to_nvme_dev(ctrl)->bar + off);
+	return 0;
+}
+
+static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+	.reg_read32		= nvme_pci_reg_read32,
+};
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
@@ -3156,6 +3214,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
 	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
+
+	dev->ctrl.ops = &nvme_pci_ctrl_ops;
+	dev->ctrl.dev = dev->dev;
+
 	result = nvme_set_instance(dev);
 	if (result)
 		goto put_pci;
@@ -3166,8 +3228,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	kref_init(&dev->kref);
 	dev->device = device_create(nvme_class, &pdev->dev,
-				MKDEV(nvme_char_major, dev->instance),
-				dev, "nvme%d", dev->instance);
+				MKDEV(nvme_char_major, dev->ctrl.instance),
+				dev, "nvme%d", dev->ctrl.instance);
 	if (IS_ERR(dev->device)) {
 		result = PTR_ERR(dev->device);
 		goto release_pools;
@@ -3186,7 +3248,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
  put_dev:
-	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->ctrl.instance));
 	put_device(dev->device);
  release_pools:
 	nvme_release_prp_pools(dev);
@@ -3233,7 +3295,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove_admin(dev);
-	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->ctrl.instance));
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index 0bf90b6..bba2955 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -524,7 +524,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *inq_response,
 					int alloc_len)
 {
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_id_ns *id_ns;
 	int res;
 	int nvme_sc;
@@ -532,10 +532,10 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	u8 resp_data_format = 0x02;
 	u8 protect;
 	u8 cmdque = 0x01 << 1;
-	u8 fw_offset = sizeof(dev->firmware_rev);
+	u8 fw_offset = sizeof(ctrl->firmware_rev);
 
 	/* nvme ns identify - use DPS value for PROTECT field */
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -553,12 +553,12 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	inq_response[5] = protect;	/* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
 	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
 	strncpy(&inq_response[8], "NVMe    ", 8);
-	strncpy(&inq_response[16], dev->model, 16);
+	strncpy(&inq_response[16], ctrl->model, 16);
 
-	while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
+	while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
 		fw_offset--;
 	fw_offset -= 4;
-	strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
+	strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
 	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -588,27 +588,26 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *inq_response,
 					int alloc_len)
 {
-	struct nvme_dev *dev = ns->dev;
 	int xfer_len;
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
 	inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
 	inq_response[3] = INQ_SERIAL_NUMBER_LENGTH;    /* Page Length */
-	strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
+	strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
 	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-		u8 *inq_response, int alloc_len)
+		u8 *inq_response, int alloc_len, u32 vs)
 {
 	struct nvme_id_ns *id_ns;
 	int nvme_sc, res;
 	size_t len;
 	void *eui;
 
-	nvme_sc = nvme_identify_ns(ns->dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -616,7 +615,7 @@ static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	eui = id_ns->eui64;
 	len = sizeof(id_ns->eui64);
 
-	if (readl(ns->dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) {
+	if (vs >= NVME_VS(1, 2)) {
 		if (bitmap_empty(eui, len * 8)) {
 			eui = id_ns->nguid;
 			len = sizeof(id_ns->nguid);
@@ -648,7 +647,7 @@ out_free_id:
 static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
 		struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
 {
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_id_ctrl *id_ctrl;
 	int nvme_sc, res;
 
@@ -659,7 +658,7 @@ static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
 				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	}
 
-	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -675,9 +674,9 @@ static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
 	inq_response[7] = 0x44;	/* Designator Length */
 
 	sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
-	memcpy(&inq_response[12], dev->model, sizeof(dev->model));
+	memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
 	sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
-	memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+	memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
 
 	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
 	kfree(id_ctrl);
@@ -688,9 +687,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *resp, int alloc_len)
 {
 	int res;
+	u32 vs;
 
-	if (readl(ns->dev->bar + NVME_REG_VS) >= NVME_VS(1, 1)) {
-		res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
+	res = ns->ctrl->ops->reg_read32(ns->ctrl, NVME_REG_VS, &vs);
+	if (res)
+		return res;
+
+	if (vs >= NVME_VS(1, 1)) {
+		res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len, vs);
 		if (res != -EOPNOTSUPP)
 			return res;
 	}
@@ -704,7 +708,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u8 *inq_response;
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_id_ctrl *id_ctrl;
 	struct nvme_id_ns *id_ns;
 	int xfer_len;
@@ -720,7 +724,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	if (inq_response == NULL)
 		return -ENOMEM;
 
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_free_inq;
@@ -736,7 +740,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	app_chk = protect << 1;
 	ref_chk = protect;
 
-	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_free_inq;
@@ -847,7 +851,6 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
 	u8 temp_c;
 	u16 temp_k;
@@ -856,7 +859,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	if (log_response == NULL)
 		return -ENOMEM;
 
-	res = nvme_get_log_page(dev, &smart_log);
+	res = nvme_get_log_page(ns->ctrl, &smart_log);
 	if (res < 0)
 		goto out_free_response;
 
@@ -894,7 +897,6 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
 	u32 feature_resp;
 	u8 temp_c_cur, temp_c_thresh;
@@ -904,7 +906,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	if (log_response == NULL)
 		return -ENOMEM;
 
-	res = nvme_get_log_page(dev, &smart_log);
+	res = nvme_get_log_page(ns->ctrl, &smart_log);
 	if (res < 0)
 		goto out_free_response;
 
@@ -918,7 +920,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	kfree(smart_log);
 
 	/* Get Features for Temp Threshold */
-	res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
+	res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0,
 								&feature_resp);
 	if (res != NVME_SC_SUCCESS)
 		temp_c_thresh = LOG_TEMP_UNKNOWN;
@@ -980,7 +982,6 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id_ns;
 	u8 flbas;
 	u32 lba_length;
@@ -990,7 +991,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
 		return -EINVAL;
 
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -1046,14 +1047,13 @@ static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
 {
 	int res = 0;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	u32 feature_resp;
 	u8 vwc;
 
 	if (len < MODE_PAGE_CACHING_LEN)
 		return -EINVAL;
 
-	nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
+	nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0,
 								&feature_resp);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
@@ -1239,12 +1239,11 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ctrl *id_ctrl;
 	int lowest_pow_st;	/* max npss = lowest power consumption */
 	unsigned ps_desired = 0;
 
-	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+	nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -1288,7 +1287,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		break;
 	}
-	nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
+	nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0,
 				    NULL);
 	return nvme_trans_status_code(hdr, nvme_sc);
 }
@@ -1312,7 +1311,6 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 					u8 buffer_id)
 {
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_command c;
 
 	if (hdr->iovec_count > 0) {
@@ -1329,7 +1327,7 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
 	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
 
-	nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+	nvme_sc = __nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL,
 			hdr->dxferp, tot_len, NULL, 0);
 	return nvme_trans_status_code(hdr, nvme_sc);
 }
@@ -1396,14 +1394,13 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
 	int res = 0;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	unsigned dword11;
 
 	switch (page_code) {
 	case MODE_PAGE_CACHING:
 		dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
-		nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
-					    0, NULL);
+		nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
+					    dword11, 0, NULL);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		break;
 	case MODE_PAGE_CONTROL:
@@ -1505,7 +1502,6 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 {
 	int res = 0;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	u8 flbas;
 
 	/*
@@ -1518,7 +1514,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 	if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
 		struct nvme_id_ns *id_ns;
 
-		nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+		nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
 			return res;
@@ -1602,7 +1598,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id_ns;
 	u8 i;
 	u8 flbas, nlbaf;
@@ -1611,7 +1606,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_command c;
 
 	/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -1643,7 +1638,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	c.format.nsid = cpu_to_le32(ns->ns_id);
 	c.format.cdw10 = cpu_to_le32(cdw10);
 
-	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
 	kfree(id_ns);
@@ -2072,7 +2067,6 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u32 alloc_len;
 	u32 resp_size;
 	u32 xfer_len;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id_ns;
 	u8 *response;
 
@@ -2084,7 +2078,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		resp_size = READ_CAP_10_RESP_SIZE;
 	}
 
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;	
@@ -2112,7 +2106,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int nvme_sc;
 	u32 alloc_len, xfer_len, resp_size;
 	u8 *response;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ctrl *id_ctrl;
 	u32 ll_length, lun_id;
 	u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
@@ -2126,7 +2119,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	case ALL_LUNS_RETURNED:
 	case ALL_WELL_KNOWN_LUNS_RETURNED:
 	case RESTRICTED_LUNS_RETURNED:
-		nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+		nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
 			return res;
@@ -2327,9 +2320,7 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr,
 					u8 *cmd)
 {
-	struct nvme_dev *dev = ns->dev;
-
-	if (!(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_RDY))
+	if (nvme_ctrl_ready(ns->ctrl))
 		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					    NOT_READY, SCSI_ASC_LUN_NOT_READY,
 					    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 08/23] nvme: simplify nvme_setup_prps calling convention
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 07/23] nvme: split a new struct nvme_ctrl out of struct nvme_dev Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 09/23] nvme: refactor nvme_queue_rq Christoph Hellwig
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


Pass back a true/false value instead of the length which needs a compare
with the bytes in the request and drop the pointless gfp_t argument.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/pci.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8a564f4..75970fd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -709,9 +709,8 @@ release_iod:
 		blk_mq_complete_request(req, error);
 }
 
-/* length is in bytes.  gfp flags indicates whether we may sleep. */
-static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
-		int total_len, gfp_t gfp)
+static bool nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
+		int total_len)
 {
 	struct dma_pool *pool;
 	int length = total_len;
@@ -727,7 +726,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
 
 	length -= (page_size - offset);
 	if (length <= 0)
-		return total_len;
+		return true;
 
 	dma_len -= (page_size - offset);
 	if (dma_len) {
@@ -740,7 +739,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
 
 	if (length <= page_size) {
 		iod->first_dma = dma_addr;
-		return total_len;
+		return true;
 	}
 
 	nprps = DIV_ROUND_UP(length, page_size);
@@ -752,11 +751,11 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
 		iod->npages = 1;
 	}
 
-	prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+	prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
 	if (!prp_list) {
 		iod->first_dma = dma_addr;
 		iod->npages = -1;
-		return (total_len - length) + page_size;
+		return false;
 	}
 	list[0] = prp_list;
 	iod->first_dma = prp_dma;
@@ -764,9 +763,9 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
 	for (;;) {
 		if (i == page_size >> 3) {
 			__le64 *old_prp_list = prp_list;
-			prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+			prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
 			if (!prp_list)
-				return total_len - length;
+				return false;
 			list[iod->npages++] = prp_list;
 			prp_list[0] = old_prp_list[i - 1];
 			old_prp_list[i - 1] = cpu_to_le64(prp_dma);
@@ -786,7 +785,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
 		dma_len = sg_dma_len(sg);
 	}
 
-	return total_len;
+	return true;
 }
 
 static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
@@ -952,8 +951,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 		if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir))
 			goto retry_cmd;
 
-		if (blk_rq_bytes(req) !=
-                    nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
+		if (!nvme_setup_prps(dev, iod, blk_rq_bytes(req))) {
 			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 			goto retry_cmd;
 		}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 09/23] nvme: refactor nvme_queue_rq
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 08/23] nvme: simplify nvme_setup_prps calling convention Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 10/23] nvme: factor out a nvme_unmap_data helper Christoph Hellwig
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


This "backports" the structure I've used for the fabrics driver.  It
mostly started out as a cleanup so that I could actually understand
the code, but I think it also qualifies as a micro-optimization due
to the reduced time we hold q_lock and disable interrupts.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/pci.c | 219 +++++++++++++++++++++---------------------------
 1 file changed, 97 insertions(+), 122 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 75970fd..e5f53f1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -788,19 +788,53 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
 	return true;
 }
 
-static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
-		struct nvme_iod *iod)
+static int nvme_map_data(struct nvme_dev *dev, struct nvme_iod *iod,
+		struct nvme_command *cmnd)
 {
-	struct nvme_command cmnd;
+	struct request *req = iod_get_private(iod);
+	struct request_queue *q = req->q;
+	enum dma_data_direction dma_dir = rq_data_dir(req) ?
+			DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	int ret = BLK_MQ_RQ_QUEUE_ERROR;
+
+	sg_init_table(iod->sg, req->nr_phys_segments);
+	iod->nents = blk_rq_map_sg(q, req, iod->sg);
+	if (!iod->nents)
+		goto out;
+
+	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir))
+		goto out;
+
+	if (!nvme_setup_prps(dev, iod, blk_rq_bytes(req)))
+		goto out_unmap;
+
+	ret = BLK_MQ_RQ_QUEUE_ERROR;
+	if (blk_integrity_rq(req)) {
+		if (blk_rq_count_integrity_sg(q, req->bio) != 1)
+			goto out_unmap;
+
+		sg_init_table(iod->meta_sg, 1);
+		if (blk_rq_map_integrity_sg(q, req->bio, iod->meta_sg) != 1)
+			goto out_unmap;
 
-	memcpy(&cmnd, req->cmd, sizeof(cmnd));
-	cmnd.rw.command_id = req->tag;
-	if (req->nr_phys_segments) {
-		cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-		cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
+		if (rq_data_dir(req))
+			nvme_dif_remap(req, nvme_dif_prep);
+
+		if (!dma_map_sg(dev->dev, iod->meta_sg, 1, dma_dir))
+			goto out_unmap;
 	}
 
-	__nvme_submit_cmd(nvmeq, &cmnd);
+	cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+	cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+	if (blk_integrity_rq(req))
+		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg));
+	return BLK_MQ_RQ_QUEUE_OK;
+
+out_unmap:
+	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+out:
+	return ret;
 }
 
 /*
@@ -808,46 +842,42 @@ static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
  * worth having a special pool for these or additional cases to handle freeing
  * the iod.
  */
-static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-		struct request *req, struct nvme_iod *iod)
+static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+		struct nvme_iod *iod, struct nvme_command *cmnd)
 {
-	struct nvme_dsm_range *range =
-				(struct nvme_dsm_range *)iod_list(iod)[0];
-	struct nvme_command cmnd;
+	struct request *req = iod_get_private(iod);
+	struct nvme_dsm_range *range;
+
+	range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC,
+						&iod->first_dma);
+	if (!range)
+		return BLK_MQ_RQ_QUEUE_BUSY;
+	iod_list(iod)[0] = (__le64 *)range;
+	iod->npages = 0;
 
 	range->cattr = cpu_to_le32(0);
 	range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
 	range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
 
-	memset(&cmnd, 0, sizeof(cmnd));
-	cmnd.dsm.opcode = nvme_cmd_dsm;
-	cmnd.dsm.command_id = req->tag;
-	cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
-	cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
-	cmnd.dsm.nr = 0;
-	cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
-	__nvme_submit_cmd(nvmeq, &cmnd);
+	memset(cmnd, 0, sizeof(*cmnd));
+	cmnd->dsm.opcode = nvme_cmd_dsm;
+	cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
+	cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
+	cmnd->dsm.nr = 0;
+	cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+	return BLK_MQ_RQ_QUEUE_OK;
 }
 
-static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-								int cmdid)
+static void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd)
 {
-	struct nvme_command cmnd;
-
-	memset(&cmnd, 0, sizeof(cmnd));
-	cmnd.common.opcode = nvme_cmd_flush;
-	cmnd.common.command_id = cmdid;
-	cmnd.common.nsid = cpu_to_le32(ns->ns_id);
-
-	__nvme_submit_cmd(nvmeq, &cmnd);
+	memset(cmnd, 0, sizeof(*cmnd));
+	cmnd->common.opcode = nvme_cmd_flush;
+	cmnd->common.nsid = cpu_to_le32(ns->ns_id);
 }
 
-static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
-							struct nvme_ns *ns)
+static void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
+		struct nvme_command *cmnd)
 {
-	struct request *req = iod_get_private(iod);
-	struct nvme_command cmnd;
 	u16 control = 0;
 	u32 dsmgmt = 0;
 
@@ -859,14 +889,12 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 	if (req->cmd_flags & REQ_RAHEAD)
 		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
 
-	memset(&cmnd, 0, sizeof(cmnd));
-	cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
-	cmnd.rw.command_id = req->tag;
-	cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
-	cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-	cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
-	cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
-	cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+	memset(cmnd, 0, sizeof(*cmnd));
+	cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+	cmnd->rw.command_id = req->tag;
+	cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
 	if (ns->ms) {
 		switch (ns->pi_type) {
@@ -877,23 +905,16 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 		case NVME_NS_DPS_PI_TYPE2:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD |
 					NVME_RW_PRINFO_PRCHK_REF;
-			cmnd.rw.reftag = cpu_to_le32(
+			cmnd->rw.reftag = cpu_to_le32(
 					nvme_block_nr(ns, blk_rq_pos(req)));
 			break;
 		}
-		if (blk_integrity_rq(req))
-			cmnd.rw.metadata =
-				cpu_to_le64(sg_dma_address(iod->meta_sg));
-		else
+		if (!blk_integrity_rq(req))
 			control |= NVME_RW_PRINFO_PRACT;
 	}
 
-	cmnd.rw.control = cpu_to_le16(control);
-	cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
-
-	__nvme_submit_cmd(nvmeq, &cmnd);
-
-	return 0;
+	cmnd->rw.control = cpu_to_le16(control);
+	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 }
 
 /*
@@ -908,7 +929,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct request *req = bd->rq;
 	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
 	struct nvme_iod *iod;
-	enum dma_data_direction dma_dir;
+	struct nvme_command cmnd;
+	int ret = BLK_MQ_RQ_QUEUE_OK;
 
 	/*
 	 * If formated with metadata, require the block layer provide a buffer
@@ -928,80 +950,33 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
 	if (req->cmd_flags & REQ_DISCARD) {
-		void *range;
-		/*
-		 * We reuse the small pool to allocate the 16-byte range here
-		 * as it is not worth having a special pool for these or
-		 * additional cases to handle freeing the iod.
-		 */
-		range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
-						&iod->first_dma);
-		if (!range)
-			goto retry_cmd;
-		iod_list(iod)[0] = (__le64 *)range;
-		iod->npages = 0;
-	} else if (req->nr_phys_segments) {
-		dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
-		sg_init_table(iod->sg, req->nr_phys_segments);
-		iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
-		if (!iod->nents)
-			goto error_cmd;
-
-		if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir))
-			goto retry_cmd;
-
-		if (!nvme_setup_prps(dev, iod, blk_rq_bytes(req))) {
-			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
-			goto retry_cmd;
-		}
-		if (blk_integrity_rq(req)) {
-			if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) {
-				dma_unmap_sg(dev->dev, iod->sg, iod->nents,
-						dma_dir);
-				goto error_cmd;
-			}
-
-			sg_init_table(iod->meta_sg, 1);
-			if (blk_rq_map_integrity_sg(
-					req->q, req->bio, iod->meta_sg) != 1) {
-				dma_unmap_sg(dev->dev, iod->sg, iod->nents,
-						dma_dir);
-				goto error_cmd;
-			}
-
-			if (rq_data_dir(req))
-				nvme_dif_remap(req, nvme_dif_prep);
+		ret = nvme_setup_discard(nvmeq, ns, iod, &cmnd);
+	} else {
+		if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+			memcpy(&cmnd, req->cmd, sizeof(cmnd));
+		else if (req->cmd_flags & REQ_FLUSH)
+			nvme_setup_flush(ns, &cmnd);
+		else
+			nvme_setup_rw(ns, req, &cmnd);
 
-			if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) {
-				dma_unmap_sg(dev->dev, iod->sg, iod->nents,
-						dma_dir);
-				goto error_cmd;
-			}
-		}
+		if (req->nr_phys_segments)
+			ret = nvme_map_data(dev, iod, &cmnd);
 	}
 
+	if (ret)
+		goto out;
+
+	cmnd.common.command_id = req->tag;
 	nvme_set_info(cmd, iod, req_completion);
-	spin_lock_irq(&nvmeq->q_lock);
-	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
-		nvme_submit_priv(nvmeq, req, iod);
-	else if (req->cmd_flags & REQ_DISCARD)
-		nvme_submit_discard(nvmeq, ns, req, iod);
-	else if (req->cmd_flags & REQ_FLUSH)
-		nvme_submit_flush(nvmeq, ns, req->tag);
-	else
-		nvme_submit_iod(nvmeq, iod, ns);
 
+	spin_lock_irq(&nvmeq->q_lock);
+	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
 	return BLK_MQ_RQ_QUEUE_OK;
-
- error_cmd:
-	nvme_free_iod(dev, iod);
-	return BLK_MQ_RQ_QUEUE_ERROR;
- retry_cmd:
+out:
 	nvme_free_iod(dev, iod);
-	return BLK_MQ_RQ_QUEUE_BUSY;
+	return ret;
 }
 
 static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 10/23] nvme: factor out a nvme_unmap_data helper
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (8 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 09/23] nvme: refactor nvme_queue_rq Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 11/23] nvme: move nvme_error_status to common code Christoph Hellwig
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


This is the counter part to nvme_map_data.

Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/host/pci.c | 43 +++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e5f53f1..801d51d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -89,10 +89,12 @@ static struct class *nvme_class;
 
 struct nvme_dev;
 struct nvme_queue;
+struct nvme_iod;
 
 static int __nvme_reset(struct nvme_dev *dev);
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_iod *iod);
 static void nvme_dead_ctrl(struct nvme_dev *dev);
 
 struct async_cmd_info {
@@ -655,7 +657,6 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 	struct request *req = iod_get_private(iod);
 	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
-	bool requeue = false;
 	int error = 0;
 
 	if (unlikely(status)) {
@@ -663,13 +664,14 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 		    && (jiffies - req->start_time) < req->timeout) {
 			unsigned long flags;
 
-			requeue = true;
+			nvme_unmap_data(nvmeq->dev, iod);
+
 			blk_mq_requeue_request(req);
 			spin_lock_irqsave(req->q->queue_lock, flags);
 			if (!blk_queue_stopped(req->q))
 				blk_mq_kick_requeue_list(req->q);
 			spin_unlock_irqrestore(req->q->queue_lock, flags);
-			goto release_iod;
+			return;
 		}
 
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
@@ -692,21 +694,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			"completing aborted command with status:%04x\n",
 			error);
 
-release_iod:
-	if (iod->nents) {
-		dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
-			rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		if (blk_integrity_rq(req)) {
-			if (!rq_data_dir(req))
-				nvme_dif_remap(req, nvme_dif_complete);
-			dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
-				rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		}
-	}
-	nvme_free_iod(nvmeq->dev, iod);
-
-	if (likely(!requeue))
-		blk_mq_complete_request(req, error);
+	nvme_unmap_data(nvmeq->dev, iod);
+	blk_mq_complete_request(req, error);
 }
 
 static bool nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
@@ -837,6 +826,24 @@ out:
 	return ret;
 }
 
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_iod *iod)
+{
+	struct request *req = iod_get_private(iod);
+	enum dma_data_direction dma_dir = rq_data_dir(req) ?
+			DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+	if (iod->nents) {
+		dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+		if (blk_integrity_rq(req)) {
+			if (!rq_data_dir(req))
+				nvme_dif_remap(req, nvme_dif_complete);
+			dma_unmap_sg(dev->dev, iod->meta_sg, 1, dma_dir);
+		}
+	}
+
+	nvme_free_iod(dev, iod);
+}
+
 /*
  * We reuse the small pool to allocate the 16-byte range here as it is not
  * worth having a special pool for these or additional cases to handle freeing
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 11/23] nvme: move nvme_error_status to common code
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (9 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 10/23] nvme: factor out a nvme_unmap_data helper Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 12/23] nvme: move nvme_setup_flush and nvme_setup_rw " Christoph Hellwig
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


And mark it inline so that we don't slow down the completion path by
having to turn it into a forced out of line call.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/nvme.h | 12 ++++++++++++
 drivers/nvme/host/pci.c  | 12 ------------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 19583e1..9f77126 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -85,6 +85,18 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
+static inline int nvme_error_status(u16 status)
+{
+	switch (status & 0x7ff) {
+	case NVME_SC_SUCCESS:
+		return 0;
+	case NVME_SC_CAP_EXCEEDED:
+		return -ENOSPC;
+	default:
+		return -EIO;
+	}
+}
+
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 801d51d..d29d36d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -547,18 +547,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
 		kfree(iod);
 }
 
-static int nvme_error_status(u16 status)
-{
-	switch (status & 0x7ff) {
-	case NVME_SC_SUCCESS:
-		return 0;
-	case NVME_SC_CAP_EXCEEDED:
-		return -ENOSPC;
-	default:
-		return -EIO;
-	}
-}
-
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
 {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 12/23] nvme: move nvme_setup_flush and nvme_setup_rw to common code
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (10 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 11/23] nvme: move nvme_error_status to common code Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 13/23] nvme: split __nvme_submit_sync_cmd Christoph Hellwig
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


And mark them inline so that we don't slow down the I/O submission path by
having to turn it into a forced out of line call.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/nvme.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/pci.c  | 49 ----------------------------------------------
 2 files changed, 51 insertions(+), 49 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9f77126..6417412 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -85,6 +85,57 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
+static inline void nvme_setup_flush(struct nvme_ns *ns,
+		struct nvme_command *cmnd)
+{
+	memset(cmnd, 0, sizeof(*cmnd));
+	cmnd->common.opcode = nvme_cmd_flush;
+	cmnd->common.nsid = cpu_to_le32(ns->ns_id);
+}
+
+static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
+		struct nvme_command *cmnd)
+{
+	u16 control = 0;
+	u32 dsmgmt = 0;
+
+	if (req->cmd_flags & REQ_FUA)
+		control |= NVME_RW_FUA;
+	if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+		control |= NVME_RW_LR;
+
+	if (req->cmd_flags & REQ_RAHEAD)
+		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+
+	memset(cmnd, 0, sizeof(*cmnd));
+	cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+	cmnd->rw.command_id = req->tag;
+	cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+
+	if (ns->ms) {
+		switch (ns->pi_type) {
+		case NVME_NS_DPS_PI_TYPE3:
+			control |= NVME_RW_PRINFO_PRCHK_GUARD;
+			break;
+		case NVME_NS_DPS_PI_TYPE1:
+		case NVME_NS_DPS_PI_TYPE2:
+			control |= NVME_RW_PRINFO_PRCHK_GUARD |
+					NVME_RW_PRINFO_PRCHK_REF;
+			cmnd->rw.reftag = cpu_to_le32(
+					nvme_block_nr(ns, blk_rq_pos(req)));
+			break;
+		}
+		if (!blk_integrity_rq(req))
+			control |= NVME_RW_PRINFO_PRACT;
+	}
+
+	cmnd->rw.control = cpu_to_le16(control);
+	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+}
+
+
 static inline int nvme_error_status(u16 status)
 {
 	switch (status & 0x7ff) {
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d29d36d..c2d2b8a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -863,55 +863,6 @@ static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
-static void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd)
-{
-	memset(cmnd, 0, sizeof(*cmnd));
-	cmnd->common.opcode = nvme_cmd_flush;
-	cmnd->common.nsid = cpu_to_le32(ns->ns_id);
-}
-
-static void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
-		struct nvme_command *cmnd)
-{
-	u16 control = 0;
-	u32 dsmgmt = 0;
-
-	if (req->cmd_flags & REQ_FUA)
-		control |= NVME_RW_FUA;
-	if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
-		control |= NVME_RW_LR;
-
-	if (req->cmd_flags & REQ_RAHEAD)
-		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
-
-	memset(cmnd, 0, sizeof(*cmnd));
-	cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
-	cmnd->rw.command_id = req->tag;
-	cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
-	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
-	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-
-	if (ns->ms) {
-		switch (ns->pi_type) {
-		case NVME_NS_DPS_PI_TYPE3:
-			control |= NVME_RW_PRINFO_PRCHK_GUARD;
-			break;
-		case NVME_NS_DPS_PI_TYPE1:
-		case NVME_NS_DPS_PI_TYPE2:
-			control |= NVME_RW_PRINFO_PRCHK_GUARD |
-					NVME_RW_PRINFO_PRCHK_REF;
-			cmnd->rw.reftag = cpu_to_le32(
-					nvme_block_nr(ns, blk_rq_pos(req)));
-			break;
-		}
-		if (!blk_integrity_rq(req))
-			control |= NVME_RW_PRINFO_PRACT;
-	}
-
-	cmnd->rw.control = cpu_to_le16(control);
-	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
-}
-
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 13/23] nvme: split __nvme_submit_sync_cmd
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (11 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 12/23] nvme: move nvme_setup_flush and nvme_setup_rw " Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 14/23] nvme: use the block layer for userspace passthrough metadata Christoph Hellwig
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


Add a separate nvme_submit_user_cmd for commands that directly DMA
to or from userspace.  We'll add metadata support to that soon and
the common version would become too messy.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 81 ++++++++++++++++++++++++++++++++++--------------
 drivers/nvme/host/nvme.h |  8 +++--
 drivers/nvme/host/pci.c  |  6 ++--
 drivers/nvme/host/scsi.c |  4 +--
 4 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ca54a34..c6b7b17 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -21,22 +21,15 @@
 
 #include "nvme.h"
 
-/*
- * Returns 0 on success.  If the result is negative, it's a Linux error code;
- * if the result is positive, it's an NVM Express status code
- */
-int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, void __user *ubuffer, unsigned bufflen,
-		u32 *result, unsigned timeout)
+struct request *nvme_alloc_request(struct request_queue *q,
+		struct nvme_command *cmd, unsigned int flags)
 {
 	bool write = cmd->common.opcode & 1;
-	struct bio *bio = NULL;
 	struct request *req;
-	int ret;
 
-	req = blk_mq_alloc_request(q, write, 0);
+	req = blk_mq_alloc_request(q, write, flags);
 	if (IS_ERR(req))
-		return PTR_ERR(req);
+		return req;
 
 	req->cmd_type = REQ_TYPE_DRV_PRIV;
 	req->cmd_flags |= REQ_FAILFAST_DRIVER;
@@ -44,17 +37,65 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	req->__sector = (sector_t) -1;
 	req->bio = req->biotail = NULL;
 
-	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
-
 	req->cmd = (unsigned char *)cmd;
 	req->cmd_len = sizeof(struct nvme_command);
 	req->special = (void *)0;
 
+	return req;
+}
+
+/*
+ * Returns 0 on success.  If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
+{
+	struct request *req;
+	int ret;
+
+	req = nvme_alloc_request(q, cmd, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
 	if (buffer && bufflen) {
 		ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
 		if (ret)
 			goto out;
-	} else if (ubuffer && bufflen) {
+	}
+
+	blk_execute_rq(req->q, NULL, req, 0);
+	if (result)
+		*result = (u32)(uintptr_t)req->special;
+	ret = req->errors;
+ out:
+	blk_mq_free_request(req);
+	return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, unsigned bufflen)
+{
+	return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
+}
+
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void __user *ubuffer, unsigned bufflen, u32 *result,
+		unsigned timeout)
+{
+	struct bio *bio = NULL;
+	struct request *req;
+	int ret;
+
+	req = nvme_alloc_request(q, cmd, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+	if (ubuffer && bufflen) {
 		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
 				GFP_KERNEL);
 		if (ret)
@@ -73,12 +114,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	return ret;
 }
 
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, unsigned bufflen)
-{
-	return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
-}
-
 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
@@ -131,8 +166,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 	c.features.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
-			result, 0);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
 }
 
 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
@@ -146,8 +180,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
-			result, 0);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
 }
 
 int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 6417412..0c1dc63 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -148,11 +148,15 @@ static inline int nvme_error_status(u16 status)
 	}
 }
 
+struct request *nvme_alloc_request(struct request_queue *q,
+		struct nvme_command *cmd, unsigned int flags);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, void __user *ubuffer, unsigned bufflen,
-		u32 *result, unsigned timeout);
+		void *buffer, unsigned bufflen,  u32 *result, unsigned timeout);
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void __user *ubuffer, unsigned bufflen, u32 *result,
+		unsigned timeout);
 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index c2d2b8a..91e013b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1697,7 +1697,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	c.rw.appmask = cpu_to_le16(io.appmask);
 	c.rw.metadata = cpu_to_le64(meta_dma);
 
-	status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+	status = nvme_submit_user_cmd(ns->queue, &c,
 			(void __user *)(uintptr_t)io.addr, length, NULL, 0);
  unmap:
 	if (meta) {
@@ -1739,8 +1739,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	if (cmd.timeout_ms)
 		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
-	status = __nvme_submit_sync_cmd(ns ? ns->queue : ctrl->admin_q, &c,
-			NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+			(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
 			&cmd.result, timeout);
 	if (status >= 0) {
 		if (put_user(cmd.result, &ucmd->result))
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index bba2955..eaf7256 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -1327,7 +1327,7 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
 	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
 
-	nvme_sc = __nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL,
+	nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
 			hdr->dxferp, tot_len, NULL, 0);
 	return nvme_trans_status_code(hdr, nvme_sc);
 }
@@ -1731,7 +1731,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 			nvme_sc = NVME_SC_LBA_RANGE;
 			break;
 		}
-		nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+		nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
 				next_mapping_addr, unit_len, NULL, 0);
 		if (nvme_sc)
 			break;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 14/23] nvme: use the block layer for userspace passthrough metadata
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (12 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 13/23] nvme: split __nvme_submit_sync_cmd Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 15/23] nvme: move block_device_operations and ns/ctrl freeing to common code Christoph Hellwig
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


From: Keith Busch <keith.busch@intel.com>

Use the integrity API to pass through metadata from userspace.  For PI
enabled devices this means that we now validate the reftag, which seems
like an unintentional ommission in the old code.

Thanks to Keith Busch for testing and fixes.

Signed-off-by: Christoph Hellwig <hch at lst.de>
[Skip metadata setup on admin commands]
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 83 ++++++++++++++++++++++++++++++++++++++++++------
 drivers/nvme/host/nvme.h |  4 +++
 drivers/nvme/host/pci.c  | 39 +++--------------------
 3 files changed, 83 insertions(+), 43 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c6b7b17..cc28150 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -81,12 +81,17 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
 }
 
-int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void __user *ubuffer, unsigned bufflen, u32 *result,
-		unsigned timeout)
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void __user *ubuffer, unsigned bufflen,
+		void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+		u32 *result, unsigned timeout)
 {
-	struct bio *bio = NULL;
+	bool write = cmd->common.opcode & 1;
+	struct nvme_ns *ns = q->queuedata;
+	struct gendisk *disk = ns ? ns->disk : NULL;
 	struct request *req;
+	struct bio *bio = NULL;
+	void *meta = NULL;
 	int ret;
 
 	req = nvme_alloc_request(q, cmd, 0);
@@ -101,19 +106,79 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		if (ret)
 			goto out;
 		bio = req->bio;
-	}
 
-	blk_execute_rq(req->q, NULL, req, 0);
-	if (bio)
-		blk_rq_unmap_user(bio);
+		if (!disk)
+			goto submit;
+		bio->bi_bdev = bdget_disk(disk, 0);
+		if (!bio->bi_bdev) {
+			ret = -ENODEV;
+			goto out_unmap;
+		}
+
+		if (meta_buffer) {
+			struct bio_integrity_payload *bip;
+
+			meta = kmalloc(meta_len, GFP_KERNEL);
+			if (!meta) {
+				ret = -ENOMEM;
+				goto out_unmap;
+			}
+
+			if (write) {
+				if (copy_from_user(meta, meta_buffer,
+						meta_len)) {
+					ret = -EFAULT;
+					goto out_free_meta;
+				}
+			}
+
+			bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+			if (!bip) {
+				ret = -ENOMEM;
+				goto out_free_meta;
+			}
+
+			bip->bip_iter.bi_size = meta_len;
+			bip->bip_iter.bi_sector = meta_seed;
+
+			ret = bio_integrity_add_page(bio, virt_to_page(meta),
+					meta_len, offset_in_page(meta));
+			if (ret != meta_len) {
+				ret = -ENOMEM;
+				goto out_free_meta;
+			}
+		}
+	}
+ submit:
+	blk_execute_rq(req->q, disk, req, 0);
+	ret = req->errors;
 	if (result)
 		*result = (u32)(uintptr_t)req->special;
-	ret = req->errors;
+	if (meta && !ret && !write) {
+		if (copy_to_user(meta_buffer, meta, meta_len))
+			ret = -EFAULT;
+	}
+ out_free_meta:
+	kfree(meta);
+ out_unmap:
+	if (bio) {
+		if (disk && bio->bi_bdev)
+			bdput(bio->bi_bdev);
+		blk_rq_unmap_user(bio);
+	}
  out:
 	blk_mq_free_request(req);
 	return ret;
 }
 
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void __user *ubuffer, unsigned bufflen, u32 *result,
+		unsigned timeout)
+{
+	return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
+			result, timeout);
+}
+
 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0c1dc63..5ba9acb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -157,6 +157,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void __user *ubuffer, unsigned bufflen, u32 *result,
 		unsigned timeout);
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void __user *ubuffer, unsigned bufflen,
+		void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+		u32 *result, unsigned timeout);
 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 91e013b..aa033f0 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1635,13 +1635,9 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
-	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
 	struct nvme_user_io io;
 	struct nvme_command c;
 	unsigned length, meta_len;
-	int status, write;
-	dma_addr_t meta_dma = 0;
-	void *meta = NULL;
 	void __user *metadata;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
@@ -1659,29 +1655,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	length = (io.nblocks + 1) << ns->lba_shift;
 	meta_len = (io.nblocks + 1) * ns->ms;
 	metadata = (void __user *)(uintptr_t)io.metadata;
-	write = io.opcode & 1;
 
 	if (ns->ext) {
 		length += meta_len;
 		meta_len = 0;
-	}
-	if (meta_len) {
-		if (((io.metadata & 3) || !io.metadata) && !ns->ext)
+	} else if (meta_len) {
+		if ((io.metadata & 3) || !io.metadata)
 			return -EINVAL;
-
-		meta = dma_alloc_coherent(dev->dev, meta_len,
-						&meta_dma, GFP_KERNEL);
-
-		if (!meta) {
-			status = -ENOMEM;
-			goto unmap;
-		}
-		if (write) {
-			if (copy_from_user(meta, metadata, meta_len)) {
-				status = -EFAULT;
-				goto unmap;
-			}
-		}
 	}
 
 	memset(&c, 0, sizeof(c));
@@ -1695,19 +1675,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	c.rw.reftag = cpu_to_le32(io.reftag);
 	c.rw.apptag = cpu_to_le16(io.apptag);
 	c.rw.appmask = cpu_to_le16(io.appmask);
-	c.rw.metadata = cpu_to_le64(meta_dma);
 
-	status = nvme_submit_user_cmd(ns->queue, &c,
-			(void __user *)(uintptr_t)io.addr, length, NULL, 0);
- unmap:
-	if (meta) {
-		if (status == NVME_SC_SUCCESS && !write) {
-			if (copy_to_user(metadata, meta, meta_len))
-				status = -EFAULT;
-		}
-		dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
-	}
-	return status;
+	return __nvme_submit_user_cmd(ns->queue, &c,
+			(void __user *)(uintptr_t)io.addr, length,
+			metadata, meta_len, io.slba, NULL, 0);
 }
 
 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 15/23] nvme: move block_device_operations and ns/ctrl freeing to common code
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (13 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 14/23] nvme: use the block layer for userspace passthrough metadata Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-11-30  8:36 ` [PATCH 16/23] nvme: add explicit quirk handling Christoph Hellwig
  2015-12-01 18:01 ` NVMe driver split for Linux 4.5 Jens Axboe
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


This moves the block_device_operations over to common code mostly
as-is.  The only change is that the ns and ctrl refcounting got some
small refcounting to have wrappers around the kref_put operations.

A new free_ctrl operation is added to allow the PCI driver to free
it's ressources on the final drop.

Signed-off-by: Christoph Hellwig <hch at lst.de>
[Moved the integrity and pr changes due to merge conflict]
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 413 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h |  14 ++
 drivers/nvme/host/pci.c  | 412 ++--------------------------------------------
 3 files changed, 439 insertions(+), 400 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cc28150..63ec86a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -15,12 +15,55 @@
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/errno.h>
+#include <linux/hdreg.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/pr.h>
+#include <linux/ptrace.h>
+#include <linux/nvme_ioctl.h>
+#include <linux/t10-pi.h>
+#include <scsi/sg.h>
+#include <asm/unaligned.h>
 
 #include "nvme.h"
 
+DEFINE_SPINLOCK(dev_list_lock);
+
+static void nvme_free_ns(struct kref *kref)
+{
+	struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+	if (ns->type == NVME_NS_LIGHTNVM)
+		nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
+
+	spin_lock(&dev_list_lock);
+	ns->disk->private_data = NULL;
+	spin_unlock(&dev_list_lock);
+
+	nvme_put_ctrl(ns->ctrl);
+	put_disk(ns->disk);
+	kfree(ns);
+}
+
+void nvme_put_ns(struct nvme_ns *ns)
+{
+	kref_put(&ns->kref, nvme_free_ns);
+}
+
+static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
+{
+	struct nvme_ns *ns;
+
+	spin_lock(&dev_list_lock);
+	ns = disk->private_data;
+	if (ns && !kref_get_unless_zero(&ns->kref))
+		ns = NULL;
+	spin_unlock(&dev_list_lock);
+
+	return ns;
+}
+
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags)
 {
@@ -269,3 +312,373 @@ int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
 		kfree(*log);
 	return error;
 }
+
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+	struct nvme_user_io io;
+	struct nvme_command c;
+	unsigned length, meta_len;
+	void __user *metadata;
+
+	if (copy_from_user(&io, uio, sizeof(io)))
+		return -EFAULT;
+
+	switch (io.opcode) {
+	case nvme_cmd_write:
+	case nvme_cmd_read:
+	case nvme_cmd_compare:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	length = (io.nblocks + 1) << ns->lba_shift;
+	meta_len = (io.nblocks + 1) * ns->ms;
+	metadata = (void __user *)(uintptr_t)io.metadata;
+
+	if (ns->ext) {
+		length += meta_len;
+		meta_len = 0;
+	} else if (meta_len) {
+		if ((io.metadata & 3) || !io.metadata)
+			return -EINVAL;
+	}
+
+	memset(&c, 0, sizeof(c));
+	c.rw.opcode = io.opcode;
+	c.rw.flags = io.flags;
+	c.rw.nsid = cpu_to_le32(ns->ns_id);
+	c.rw.slba = cpu_to_le64(io.slba);
+	c.rw.length = cpu_to_le16(io.nblocks);
+	c.rw.control = cpu_to_le16(io.control);
+	c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
+	c.rw.reftag = cpu_to_le32(io.reftag);
+	c.rw.apptag = cpu_to_le16(io.apptag);
+	c.rw.appmask = cpu_to_le16(io.appmask);
+
+	return __nvme_submit_user_cmd(ns->queue, &c,
+			(void __user *)(uintptr_t)io.addr, length,
+			metadata, meta_len, io.slba, NULL, 0);
+}
+
+int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+			struct nvme_passthru_cmd __user *ucmd)
+{
+	struct nvme_passthru_cmd cmd;
+	struct nvme_command c;
+	unsigned timeout = 0;
+	int status;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+		return -EFAULT;
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = cmd.opcode;
+	c.common.flags = cmd.flags;
+	c.common.nsid = cpu_to_le32(cmd.nsid);
+	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+	c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+	c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+	c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+	c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+	c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+	c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+	if (cmd.timeout_ms)
+		timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+			(void __user *)cmd.addr, cmd.data_len,
+			&cmd.result, timeout);
+	if (status >= 0) {
+		if (put_user(cmd.result, &ucmd->result))
+			return -EFAULT;
+	}
+
+	return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+		unsigned int cmd, unsigned long arg)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+	switch (cmd) {
+	case NVME_IOCTL_ID:
+		force_successful_syscall_return();
+		return ns->ns_id;
+	case NVME_IOCTL_ADMIN_CMD:
+		return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
+	case NVME_IOCTL_IO_CMD:
+		return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
+	case NVME_IOCTL_SUBMIT_IO:
+		return nvme_submit_io(ns, (void __user *)arg);
+	case SG_GET_VERSION_NUM:
+		return nvme_sg_get_version_num((void __user *)arg);
+	case SG_IO:
+		return nvme_sg_io(ns, (void __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case SG_IO:
+		return -ENOIOCTLCMD;
+	}
+	return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl	NULL
+#endif
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+	return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+}
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+	nvme_put_ns(disk->private_data);
+}
+
+static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	/* some standard values */
+	geo->heads = 1 << 6;
+	geo->sectors = 1 << 5;
+	geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
+	return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+	struct blk_integrity integrity;
+
+	switch (ns->pi_type) {
+	case NVME_NS_DPS_PI_TYPE3:
+		integrity.profile = &t10_pi_type3_crc;
+		break;
+	case NVME_NS_DPS_PI_TYPE1:
+	case NVME_NS_DPS_PI_TYPE2:
+		integrity.profile = &t10_pi_type1_crc;
+		break;
+	default:
+		integrity.profile = NULL;
+		break;
+	}
+	integrity.tuple_size = ns->ms;
+	blk_integrity_register(ns->disk, &integrity);
+	blk_queue_max_integrity_segments(ns->queue, 1);
+}
+#else
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+static void nvme_config_discard(struct nvme_ns *ns)
+{
+	u32 logical_block_size = queue_logical_block_size(ns->queue);
+	ns->queue->limits.discard_zeroes_data = 0;
+	ns->queue->limits.discard_alignment = logical_block_size;
+	ns->queue->limits.discard_granularity = logical_block_size;
+	blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+}
+
+int nvme_revalidate_disk(struct gendisk *disk)
+{
+	struct nvme_ns *ns = disk->private_data;
+	struct nvme_id_ns *id;
+	u8 lbaf, pi_type;
+	u16 old_ms;
+	unsigned short bs;
+
+	if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
+		dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
+				__func__, ns->ctrl->instance, ns->ns_id);
+		return -ENODEV;
+	}
+	if (id->ncap == 0) {
+		kfree(id);
+		return -ENODEV;
+	}
+
+	if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
+		if (nvme_nvm_register(ns->queue, disk->disk_name)) {
+			dev_warn(ns->ctrl->dev,
+				"%s: LightNVM init failure\n", __func__);
+			kfree(id);
+			return -ENODEV;
+		}
+		ns->type = NVME_NS_LIGHTNVM;
+	}
+
+	old_ms = ns->ms;
+	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
+	ns->lba_shift = id->lbaf[lbaf].ds;
+	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+	/*
+	 * If identify namespace failed, use default 512 byte block size so
+	 * block layer can use before failing read/write for 0 capacity.
+	 */
+	if (ns->lba_shift == 0)
+		ns->lba_shift = 9;
+	bs = 1 << ns->lba_shift;
+
+	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
+	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
+					id->dps & NVME_NS_DPS_PI_MASK : 0;
+
+	blk_mq_freeze_queue(disk->queue);
+	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
+				ns->ms != old_ms ||
+				bs != queue_logical_block_size(disk->queue) ||
+				(ns->ms && ns->ext)))
+		blk_integrity_unregister(disk);
+
+	ns->pi_type = pi_type;
+	blk_queue_logical_block_size(ns->queue, bs);
+
+	if (ns->ms && !ns->ext)
+		nvme_init_integrity(ns);
+
+	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
+		set_capacity(disk, 0);
+	else
+		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+	if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+		nvme_config_discard(ns);
+	blk_mq_unfreeze_queue(disk->queue);
+
+	kfree(id);
+	return 0;
+}
+
+static char nvme_pr_type(enum pr_type type)
+{
+	switch (type) {
+	case PR_WRITE_EXCLUSIVE:
+		return 1;
+	case PR_EXCLUSIVE_ACCESS:
+		return 2;
+	case PR_WRITE_EXCLUSIVE_REG_ONLY:
+		return 3;
+	case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+		return 4;
+	case PR_WRITE_EXCLUSIVE_ALL_REGS:
+		return 5;
+	case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+		return 6;
+	default:
+		return 0;
+	}
+};
+
+static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
+				u64 key, u64 sa_key, u8 op)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+	struct nvme_command c;
+	u8 data[16] = { 0, };
+
+	put_unaligned_le64(key, &data[0]);
+	put_unaligned_le64(sa_key, &data[8]);
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = op;
+	c.common.nsid = cpu_to_le32(ns->ns_id);
+	c.common.cdw10[0] = cpu_to_le32(cdw10);
+
+	return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
+}
+
+static int nvme_pr_register(struct block_device *bdev, u64 old,
+		u64 new, unsigned flags)
+{
+	u32 cdw10;
+
+	if (flags & ~PR_FL_IGNORE_KEY)
+		return -EOPNOTSUPP;
+
+	cdw10 = old ? 2 : 0;
+	cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
+	cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
+	return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_reserve(struct block_device *bdev, u64 key,
+		enum pr_type type, unsigned flags)
+{
+	u32 cdw10;
+
+	if (flags & ~PR_FL_IGNORE_KEY)
+		return -EOPNOTSUPP;
+
+	cdw10 = nvme_pr_type(type) << 8;
+	cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
+	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+		enum pr_type type, bool abort)
+{
+	u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+	return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_clear(struct block_device *bdev, u64 key)
+{
+	u32 cdw10 = 1 | key ? 1 << 3 : 0;
+	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+	u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static const struct pr_ops nvme_pr_ops = {
+	.pr_register	= nvme_pr_register,
+	.pr_reserve	= nvme_pr_reserve,
+	.pr_release	= nvme_pr_release,
+	.pr_preempt	= nvme_pr_preempt,
+	.pr_clear	= nvme_pr_clear,
+};
+
+const struct block_device_operations nvme_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= nvme_ioctl,
+	.compat_ioctl	= nvme_compat_ioctl,
+	.open		= nvme_open,
+	.release	= nvme_release,
+	.getgeo		= nvme_getgeo,
+	.revalidate_disk= nvme_revalidate_disk,
+	.pr_ops		= &nvme_pr_ops,
+};
+
+static void nvme_free_ctrl(struct kref *kref)
+{
+	struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+
+	ctrl->ops->free_ctrl(ctrl);
+}
+
+void nvme_put_ctrl(struct nvme_ctrl *ctrl)
+{
+	kref_put(&ctrl->kref, nvme_free_ctrl);
+}
+
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5ba9acb..3b3f855 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -19,6 +19,8 @@
 #include <linux/kref.h>
 #include <linux/blk-mq.h>
 
+struct nvme_passthru_cmd;
+
 extern unsigned char nvme_io_timeout;
 #define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
@@ -34,6 +36,7 @@ struct nvme_ctrl {
 	const struct nvme_ctrl_ops *ops;
 	struct request_queue *admin_q;
 	struct device *dev;
+	struct kref kref;
 	int instance;
 
 	char name[12];
@@ -69,6 +72,7 @@ struct nvme_ns {
 
 struct nvme_ctrl_ops {
 	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 };
 
 static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
@@ -148,6 +152,9 @@ static inline int nvme_error_status(u16 status)
 	}
 }
 
+void nvme_put_ctrl(struct nvme_ctrl *ctrl);
+void nvme_put_ns(struct nvme_ns *ns);
+
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
@@ -170,6 +177,13 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 			dma_addr_t dma_addr, u32 *result);
 
+extern const struct block_device_operations nvme_fops;
+extern spinlock_t dev_list_lock;
+
+int nvme_revalidate_disk(struct gendisk *disk);
+int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+			struct nvme_passthru_cmd __user *ucmd);
+
 struct sg_io_hdr;
 
 int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index aa033f0..e0f40af 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -79,7 +79,6 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
-static DEFINE_SPINLOCK(dev_list_lock);
 static LIST_HEAD(dev_list);
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
@@ -127,7 +126,6 @@ struct nvme_dev {
 	struct msix_entry *entry;
 	void __iomem *bar;
 	struct list_head namespaces;
-	struct kref kref;
 	struct device *device;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
@@ -601,27 +599,6 @@ static void nvme_dif_remap(struct request *req,
 	}
 	kunmap_atomic(pmap);
 }
-
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
-	struct blk_integrity integrity;
-
-	switch (ns->pi_type) {
-	case NVME_NS_DPS_PI_TYPE3:
-		integrity.profile = &t10_pi_type3_crc;
-		break;
-	case NVME_NS_DPS_PI_TYPE1:
-	case NVME_NS_DPS_PI_TYPE2:
-		integrity.profile = &t10_pi_type1_crc;
-		break;
-	default:
-		integrity.profile = NULL;
-		break;
-	}
-	integrity.tuple_size = ns->ms;
-	blk_integrity_register(ns->disk, &integrity);
-	blk_queue_max_integrity_segments(ns->queue, 1);
-}
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 static void nvme_dif_remap(struct request *req,
 			void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
@@ -633,9 +610,6 @@ static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
 static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 {
 }
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
-}
 #endif
 
 static void req_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1633,94 +1607,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	return result;
 }
 
-static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
-{
-	struct nvme_user_io io;
-	struct nvme_command c;
-	unsigned length, meta_len;
-	void __user *metadata;
-
-	if (copy_from_user(&io, uio, sizeof(io)))
-		return -EFAULT;
-
-	switch (io.opcode) {
-	case nvme_cmd_write:
-	case nvme_cmd_read:
-	case nvme_cmd_compare:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	length = (io.nblocks + 1) << ns->lba_shift;
-	meta_len = (io.nblocks + 1) * ns->ms;
-	metadata = (void __user *)(uintptr_t)io.metadata;
-
-	if (ns->ext) {
-		length += meta_len;
-		meta_len = 0;
-	} else if (meta_len) {
-		if ((io.metadata & 3) || !io.metadata)
-			return -EINVAL;
-	}
-
-	memset(&c, 0, sizeof(c));
-	c.rw.opcode = io.opcode;
-	c.rw.flags = io.flags;
-	c.rw.nsid = cpu_to_le32(ns->ns_id);
-	c.rw.slba = cpu_to_le64(io.slba);
-	c.rw.length = cpu_to_le16(io.nblocks);
-	c.rw.control = cpu_to_le16(io.control);
-	c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
-	c.rw.reftag = cpu_to_le32(io.reftag);
-	c.rw.apptag = cpu_to_le16(io.apptag);
-	c.rw.appmask = cpu_to_le16(io.appmask);
-
-	return __nvme_submit_user_cmd(ns->queue, &c,
-			(void __user *)(uintptr_t)io.addr, length,
-			metadata, meta_len, io.slba, NULL, 0);
-}
-
-static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-			struct nvme_passthru_cmd __user *ucmd)
-{
-	struct nvme_passthru_cmd cmd;
-	struct nvme_command c;
-	unsigned timeout = 0;
-	int status;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
-		return -EFAULT;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = cmd.opcode;
-	c.common.flags = cmd.flags;
-	c.common.nsid = cpu_to_le32(cmd.nsid);
-	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
-	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
-	c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
-	c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
-	c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
-	c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
-	c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
-	c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
-
-	if (cmd.timeout_ms)
-		timeout = msecs_to_jiffies(cmd.timeout_ms);
-
-	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
-			(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
-			&cmd.result, timeout);
-	if (status >= 0) {
-		if (put_user(cmd.result, &ucmd->result))
-			return -EFAULT;
-	}
-
-	return status;
-}
-
 static int nvme_subsys_reset(struct nvme_dev *dev)
 {
 	if (!dev->subsystem)
@@ -1730,281 +1616,6 @@ static int nvme_subsys_reset(struct nvme_dev *dev)
 	return 0;
 }
 
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
-							unsigned long arg)
-{
-	struct nvme_ns *ns = bdev->bd_disk->private_data;
-
-	switch (cmd) {
-	case NVME_IOCTL_ID:
-		force_successful_syscall_return();
-		return ns->ns_id;
-	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
-	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
-	case NVME_IOCTL_SUBMIT_IO:
-		return nvme_submit_io(ns, (void __user *)arg);
-	case SG_GET_VERSION_NUM:
-		return nvme_sg_get_version_num((void __user *)arg);
-	case SG_IO:
-		return nvme_sg_io(ns, (void __user *)arg);
-	default:
-		return -ENOTTY;
-	}
-}
-
-#ifdef CONFIG_COMPAT
-static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
-					unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case SG_IO:
-		return -ENOIOCTLCMD;
-	}
-	return nvme_ioctl(bdev, mode, cmd, arg);
-}
-#else
-#define nvme_compat_ioctl	NULL
-#endif
-
-static void nvme_free_dev(struct kref *kref);
-static void nvme_free_ns(struct kref *kref)
-{
-	struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
-	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
-
-	if (ns->type == NVME_NS_LIGHTNVM)
-		nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
-
-	spin_lock(&dev_list_lock);
-	ns->disk->private_data = NULL;
-	spin_unlock(&dev_list_lock);
-
-	kref_put(&dev->kref, nvme_free_dev);
-	put_disk(ns->disk);
-	kfree(ns);
-}
-
-static int nvme_open(struct block_device *bdev, fmode_t mode)
-{
-	int ret = 0;
-	struct nvme_ns *ns;
-
-	spin_lock(&dev_list_lock);
-	ns = bdev->bd_disk->private_data;
-	if (!ns)
-		ret = -ENXIO;
-	else if (!kref_get_unless_zero(&ns->kref))
-		ret = -ENXIO;
-	spin_unlock(&dev_list_lock);
-
-	return ret;
-}
-
-static void nvme_release(struct gendisk *disk, fmode_t mode)
-{
-	struct nvme_ns *ns = disk->private_data;
-	kref_put(&ns->kref, nvme_free_ns);
-}
-
-static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
-{
-	/* some standard values */
-	geo->heads = 1 << 6;
-	geo->sectors = 1 << 5;
-	geo->cylinders = get_capacity(bd->bd_disk) >> 11;
-	return 0;
-}
-
-static void nvme_config_discard(struct nvme_ns *ns)
-{
-	u32 logical_block_size = queue_logical_block_size(ns->queue);
-	ns->queue->limits.discard_zeroes_data = 0;
-	ns->queue->limits.discard_alignment = logical_block_size;
-	ns->queue->limits.discard_granularity = logical_block_size;
-	blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
-}
-
-static int nvme_revalidate_disk(struct gendisk *disk)
-{
-	struct nvme_ns *ns = disk->private_data;
-	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
-	struct nvme_id_ns *id;
-	u8 lbaf, pi_type;
-	u16 old_ms;
-	unsigned short bs;
-
-	if (nvme_identify_ns(&dev->ctrl, ns->ns_id, &id)) {
-		dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
-						dev->ctrl.instance, ns->ns_id);
-		return -ENODEV;
-	}
-	if (id->ncap == 0) {
-		kfree(id);
-		return -ENODEV;
-	}
-
-	if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
-		if (nvme_nvm_register(ns->queue, disk->disk_name)) {
-			dev_warn(dev->dev,
-				"%s: LightNVM init failure\n", __func__);
-			kfree(id);
-			return -ENODEV;
-		}
-		ns->type = NVME_NS_LIGHTNVM;
-	}
-
-	old_ms = ns->ms;
-	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
-	ns->lba_shift = id->lbaf[lbaf].ds;
-	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
-	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
-
-	/*
-	 * If identify namespace failed, use default 512 byte block size so
-	 * block layer can use before failing read/write for 0 capacity.
-	 */
-	if (ns->lba_shift == 0)
-		ns->lba_shift = 9;
-	bs = 1 << ns->lba_shift;
-
-	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
-	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
-					id->dps & NVME_NS_DPS_PI_MASK : 0;
-
-	blk_mq_freeze_queue(disk->queue);
-	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
-				ns->ms != old_ms ||
-				bs != queue_logical_block_size(disk->queue) ||
-				(ns->ms && ns->ext)))
-		blk_integrity_unregister(disk);
-
-	ns->pi_type = pi_type;
-	blk_queue_logical_block_size(ns->queue, bs);
-
-	if (ns->ms && !ns->ext)
-		nvme_init_integrity(ns);
-
-	if ((ns->ms && !(ns->ms == 8 && ns->pi_type) &&
-						!blk_get_integrity(disk)) ||
-						ns->type == NVME_NS_LIGHTNVM)
-		set_capacity(disk, 0);
-	else
-		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
-
-	if (dev->ctrl.oncs & NVME_CTRL_ONCS_DSM)
-		nvme_config_discard(ns);
-	blk_mq_unfreeze_queue(disk->queue);
-
-	kfree(id);
-	return 0;
-}
-
-static char nvme_pr_type(enum pr_type type)
-{
-	switch (type) {
-	case PR_WRITE_EXCLUSIVE:
-		return 1;
-	case PR_EXCLUSIVE_ACCESS:
-		return 2;
-	case PR_WRITE_EXCLUSIVE_REG_ONLY:
-		return 3;
-	case PR_EXCLUSIVE_ACCESS_REG_ONLY:
-		return 4;
-	case PR_WRITE_EXCLUSIVE_ALL_REGS:
-		return 5;
-	case PR_EXCLUSIVE_ACCESS_ALL_REGS:
-		return 6;
-	default:
-		return 0;
-	}
-};
-
-static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
-				u64 key, u64 sa_key, u8 op)
-{
-	struct nvme_ns *ns = bdev->bd_disk->private_data;
-	struct nvme_command c;
-	u8 data[16] = { 0, };
-
-	put_unaligned_le64(key, &data[0]);
-	put_unaligned_le64(sa_key, &data[8]);
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = op;
-	c.common.nsid = cpu_to_le32(ns->ns_id);
-	c.common.cdw10[0] = cpu_to_le32(cdw10);
-
-	return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
-}
-
-static int nvme_pr_register(struct block_device *bdev, u64 old,
-		u64 new, unsigned flags)
-{
-	u32 cdw10;
-
-	if (flags & ~PR_FL_IGNORE_KEY)
-		return -EOPNOTSUPP;
-
-	cdw10 = old ? 2 : 0;
-	cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
-	cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
-	return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_reserve(struct block_device *bdev, u64 key,
-		enum pr_type type, unsigned flags)
-{
-	u32 cdw10;
-
-	if (flags & ~PR_FL_IGNORE_KEY)
-		return -EOPNOTSUPP;
-
-	cdw10 = nvme_pr_type(type) << 8;
-	cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
-	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
-		enum pr_type type, bool abort)
-{
-	u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
-	return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_clear(struct block_device *bdev, u64 key)
-{
-	u32 cdw10 = 1 | (key ? 1 << 3 : 0);
-	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
-{
-	u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
-	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-static const struct pr_ops nvme_pr_ops = {
-	.pr_register	= nvme_pr_register,
-	.pr_reserve	= nvme_pr_reserve,
-	.pr_release	= nvme_pr_release,
-	.pr_preempt	= nvme_pr_preempt,
-	.pr_clear	= nvme_pr_clear,
-};
-
-static const struct block_device_operations nvme_fops = {
-	.owner		= THIS_MODULE,
-	.ioctl		= nvme_ioctl,
-	.compat_ioctl	= nvme_compat_ioctl,
-	.open		= nvme_open,
-	.release	= nvme_release,
-	.getgeo		= nvme_getgeo,
-	.revalidate_disk= nvme_revalidate_disk,
-	.pr_ops		= &nvme_pr_ops,
-};
-
 static int nvme_kthread(void *data)
 {
 	struct nvme_dev *dev, *next;
@@ -2105,7 +1716,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	if (nvme_revalidate_disk(ns->disk))
 		goto out_free_disk;
 
-	kref_get(&dev->kref);
+	kref_get(&dev->ctrl.kref);
 	if (ns->type != NVME_NS_LIGHTNVM) {
 		add_disk(ns->disk);
 		if (ns->ms) {
@@ -2354,7 +1965,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		blk_cleanup_queue(ns->queue);
 	}
 	list_del_init(&ns->list);
-	kref_put(&ns->kref, nvme_free_ns);
+	nvme_put_ns(ns);
 }
 
 static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
@@ -2828,9 +2439,9 @@ static void nvme_release_instance(struct nvme_dev *dev)
 	spin_unlock(&dev_list_lock);
 }
 
-static void nvme_free_dev(struct kref *kref)
+static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 {
-	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
+	struct nvme_dev *dev = to_nvme_dev(ctrl);
 
 	put_device(dev->dev);
 	put_device(dev->device);
@@ -2857,7 +2468,7 @@ static int nvme_dev_open(struct inode *inode, struct file *f)
 				ret = -EWOULDBLOCK;
 				break;
 			}
-			if (!kref_get_unless_zero(&dev->kref))
+			if (!kref_get_unless_zero(&dev->ctrl.kref))
 				break;
 			f->private_data = dev;
 			ret = 0;
@@ -2872,7 +2483,7 @@ static int nvme_dev_open(struct inode *inode, struct file *f)
 static int nvme_dev_release(struct inode *inode, struct file *f)
 {
 	struct nvme_dev *dev = f->private_data;
-	kref_put(&dev->kref, nvme_free_dev);
+	nvme_put_ctrl(&dev->ctrl);
 	return 0;
 }
 
@@ -2987,19 +2598,19 @@ static int nvme_remove_dead_ctrl(void *arg)
 
 	if (pci_get_drvdata(pdev))
 		pci_stop_and_remove_bus_device_locked(pdev);
-	kref_put(&dev->kref, nvme_free_dev);
+	nvme_put_ctrl(&dev->ctrl);
 	return 0;
 }
 
 static void nvme_dead_ctrl(struct nvme_dev *dev)
 {
 	dev_warn(dev->dev, "Device failed to resume\n");
-	kref_get(&dev->kref);
+	kref_get(&dev->ctrl.kref);
 	if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
 						dev->ctrl.instance))) {
 		dev_err(dev->dev,
 			"Failed to start controller remove task\n");
-		kref_put(&dev->kref, nvme_free_dev);
+		nvme_put_ctrl(&dev->ctrl);
 	}
 }
 
@@ -3077,6 +2688,7 @@ static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
 	.reg_read32		= nvme_pci_reg_read32,
+	.free_ctrl		= nvme_pci_free_ctrl,
 };
 
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -3116,7 +2728,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto release;
 
-	kref_init(&dev->kref);
+	kref_init(&dev->ctrl.kref);
 	dev->device = device_create(nvme_class, &pdev->dev,
 				MKDEV(nvme_char_major, dev->ctrl.instance),
 				dev, "nvme%d", dev->ctrl.instance);
@@ -3189,7 +2801,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
-	kref_put(&dev->kref, nvme_free_dev);
+	nvme_put_ctrl(&dev->ctrl);
 }
 
 /* These functions are yet to be implemented */
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 16/23] nvme: add explicit quirk handling
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (14 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 15/23] nvme: move block_device_operations and ns/ctrl freeing to common code Christoph Hellwig
@ 2015-11-30  8:36 ` Christoph Hellwig
  2015-12-01 18:01 ` NVMe driver split for Linux 4.5 Jens Axboe
  16 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2015-11-30  8:36 UTC (permalink / raw)


Add an enum for all workarounds not in the spec and identify the affected
controllers at probe time.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/nvme.h | 13 +++++++++++++
 drivers/nvme/host/pci.c  |  8 +++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 3b3f855..f7f16e3 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -32,6 +32,18 @@ enum {
 	NVME_NS_LIGHTNVM	= 1,
 };
 
+/*
+ * List of workarounds for devices that required behavior not specified in
+ * the standard.
+ */
+enum nvme_quirks {
+	/*
+	 * Prefers I/O aligned to a stripe size specified in a vendor
+	 * specific Identify field.
+	 */
+	NVME_QUIRK_STRIPE_SIZE			= (1 << 0),
+};
+
 struct nvme_ctrl {
 	const struct nvme_ctrl_ops *ops;
 	struct request_queue *admin_q;
@@ -47,6 +59,7 @@ struct nvme_ctrl {
 	u16 abort_limit;
 	u8 event_limit;
 	u8 vwc;
+	unsigned long quirks;
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e0f40af..27d7449 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2026,7 +2026,6 @@ static void nvme_dev_scan(struct work_struct *work)
  */
 static int nvme_dev_add(struct nvme_dev *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int res;
 	struct nvme_id_ctrl *ctrl;
 	int shift = NVME_CAP_MPSMIN(lo_hi_readq(dev->bar + NVME_REG_CAP)) + 12;
@@ -2047,8 +2046,8 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
 	else
 		dev->max_hw_sectors = UINT_MAX;
-	if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
-			(pdev->device == 0x0953) && ctrl->vs[3]) {
+
+	if ((dev->ctrl.quirks & NVME_QUIRK_STRIPE_SIZE) && ctrl->vs[3]) {
 		unsigned int max_hw_sectors;
 
 		dev->stripe_size = 1 << (ctrl->vs[3] + shift);
@@ -2719,6 +2718,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	dev->ctrl.ops = &nvme_pci_ctrl_ops;
 	dev->ctrl.dev = dev->dev;
+	dev->ctrl.quirks = id->driver_data;
 
 	result = nvme_set_instance(dev);
 	if (result)
@@ -2846,6 +2846,8 @@ static const struct pci_error_handlers nvme_err_handler = {
 #define PCI_CLASS_STORAGE_EXPRESS	0x010802
 
 static const struct pci_device_id nvme_id_table[] = {
+	{ PCI_VDEVICE(INTEL, 0x0953),
+		.driver_data = NVME_QUIRK_STRIPE_SIZE, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ 0, }
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* NVMe driver split for Linux 4.5
  2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
                   ` (15 preceding siblings ...)
  2015-11-30  8:36 ` [PATCH 16/23] nvme: add explicit quirk handling Christoph Hellwig
@ 2015-12-01 18:01 ` Jens Axboe
  16 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2015-12-01 18:01 UTC (permalink / raw)


On 11/30/2015 01:36 AM, Christoph Hellwig wrote:
> This series contains a the split of the NVMe driver into a common core part
> and a PCIe specific fontend.
> nvme driver split to show how I want to get started.
>
> To make testing easier I've pushed out a git tree with this and all
> dependencies:
>
>      git://git.infradead.org/users/hch/block.git nvme-split.6
>
> or in gitweb:
>
>      https://urldefense.proofpoint.com/v2/url?u=http-3A__git.infradead.org_users_hch_block.git_shortlog_refs_heads_nvme-2Dsplit.6&d=CwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=cK1a7KivzZRh1fKQMjSm2A&m=CHSSJfe8EfWpVJ_bCvWDDBlIAgwYVoXEPiF3tz_LeTY&s=x1qUJfAkEfzsYRtJchSLNoGBLTUdxeve1OOrHlVQuWM&e=
>
> Chances since the previous post from the patchbomb:
>    - use vendor ID from Identify data in the SCSI translation layer
>    - add common enable/shutdown code from the NVMe loop series

I've added this series for 4.5, it's in the for-4.5/nvme branch.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2015-12-01 18:01 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-11-30  8:36 NVMe driver split for Linux 4.5 Christoph Hellwig
2015-11-30  8:36 ` [PATCH 01/23] blk-mq: add a flags parameter to blk_mq_alloc_request Christoph Hellwig
2015-11-30  8:36 ` [PATCH 02/23] nvme: move struct nvme_iod to pci.c Christoph Hellwig
2015-11-30  8:36 ` [PATCH 03/23] nvme: split command submission helpers out of pci.c Christoph Hellwig
2015-11-30  8:36 ` [PATCH 04/23] nvme: use offset instead of a struct for registers Christoph Hellwig
2015-11-30  8:36 ` [PATCH 05/23] nvme: split nvme_trans_device_id_page Christoph Hellwig
2015-11-30  8:36 ` [PATCH 06/23] nvme: use vendor it from identify Christoph Hellwig
2015-11-30  8:36 ` [PATCH 07/23] nvme: split a new struct nvme_ctrl out of struct nvme_dev Christoph Hellwig
2015-11-30  8:36 ` [PATCH 08/23] nvme: simplify nvme_setup_prps calling convention Christoph Hellwig
2015-11-30  8:36 ` [PATCH 09/23] nvme: refactor nvme_queue_rq Christoph Hellwig
2015-11-30  8:36 ` [PATCH 10/23] nvme: factor out a nvme_unmap_data helper Christoph Hellwig
2015-11-30  8:36 ` [PATCH 11/23] nvme: move nvme_error_status to common code Christoph Hellwig
2015-11-30  8:36 ` [PATCH 12/23] nvme: move nvme_setup_flush and nvme_setup_rw " Christoph Hellwig
2015-11-30  8:36 ` [PATCH 13/23] nvme: split __nvme_submit_sync_cmd Christoph Hellwig
2015-11-30  8:36 ` [PATCH 14/23] nvme: use the block layer for userspace passthrough metadata Christoph Hellwig
2015-11-30  8:36 ` [PATCH 15/23] nvme: move block_device_operations and ns/ctrl freeing to common code Christoph Hellwig
2015-11-30  8:36 ` [PATCH 16/23] nvme: add explicit quirk handling Christoph Hellwig
2015-12-01 18:01 ` NVMe driver split for Linux 4.5 Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.