From: mlin@kernel.org (Ming Lin)
Subject: [RFC PATCH 1/2] virtio_nvme(kernel): virtual NVMe driver using virtio
Date: Wed, 9 Sep 2015 22:48:31 -0700 [thread overview]
Message-ID: <1441864112-12765-2-git-send-email-mlin@kernel.org> (raw)
In-Reply-To: <1441864112-12765-1-git-send-email-mlin@kernel.org>
Signed-off-by: Ming Lin <ming.l at ssi.samsung.com>
---
drivers/block/Kconfig | 7 +
drivers/block/Makefile | 1 +
drivers/block/nvme-core.c | 1 +
drivers/block/virtio_nvme.c | 853 +++++++++++++++++++++++++++++++++++++++
include/linux/virtio_nvme.h | 53 +++
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_nvme.h | 30 ++
7 files changed, 946 insertions(+)
create mode 100644 drivers/block/virtio_nvme.c
create mode 100644 include/linux/virtio_nvme.h
create mode 100644 include/uapi/linux/virtio_nvme.h
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1b8094d..7149885 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -519,6 +519,13 @@ config VIRTIO_BLK
This is the virtual block driver for virtio. It can be used with
lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+config VIRTIO_NVME
+ tristate "Virtio NVMe driver"
+ depends on VIRTIO
+ ---help---
+ This is the virtual NVMe driver for virtio. It can be used with
+ lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+
config BLK_DEV_HD
bool "Very old hard disk (MFM/RLL/IDE) driver"
depends on HAVE_IDE
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 02b688d..3b73f59 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
+obj-$(CONFIG_VIRTIO_NVME) += virtio_nvme.o
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
obj-$(CONFIG_BLK_DEV_HD) += hd.o
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7920c27..7895606 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1059,6 +1059,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
{
return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
}
+EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
static int nvme_submit_async_admin_req(struct nvme_dev *dev)
{
diff --git a/drivers/block/virtio_nvme.c b/drivers/block/virtio_nvme.c
new file mode 100644
index 0000000..57f81fc
--- /dev/null
+++ b/drivers/block/virtio_nvme.c
@@ -0,0 +1,853 @@
+/* Modified from virtio_blk.c and nvme-core.c */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/virtio.h>
+#include <linux/virtio_nvme.h>
+#include <linux/scatterlist.h>
+#include <linux/string_helpers.h>
+#include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include <linux/numa.h>
+#include <linux/virtio_nvme.h>
+#include <linux/nvme.h>
+#include <linux/blk-mq.h>
+
+#define ADMIN_TIMEOUT (2 * HZ)
+#define NVME_AQ_DEPTH 256
+
+static int virtnvme_major;
+module_param(virtnvme_major, int, 0);
+
+static unsigned int virtnvme_queue_depth;
+module_param_named(queue_depth, virtnvme_queue_depth, uint, 0444);
+
+static DEFINE_SPINLOCK(dev_list_lock);
+static LIST_HEAD(dev_list);
+
+static void virtnvme_free_namespaces(struct virtio_nvme_dev *dev);
+
+static const struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_NVME, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+struct virtnvme_req
+{
+ struct request *req;
+ struct nvme_command cmd;
+ struct virtio_nvme_resp resp;
+ struct scatterlist sg[];
+};
+
+static int virtnvme_identify_ctrl(struct virtio_nvme_dev *dev, struct nvme_id_ctrl **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(1);
+
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+static int virtnvme_identify_ns(struct virtio_nvme_dev *dev, unsigned nsid,
+ struct nvme_id_ns **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
+ *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ns));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+static int virtnvme_wait_ready(struct virtio_nvme_dev *dev, u64 cap)
+{
+ struct virtio_device *vdev = dev->vdev;
+ unsigned long timeout;
+ u32 csts;
+
+ timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+
+ while (1) {
+ virtio_cread(vdev, struct virtio_nvme_config, csts, &csts);
+ if ((csts & NVME_CSTS_RDY) == NVME_CSTS_RDY)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ printk("Device not ready; aborting initialisation\n");
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+static void virtnvme_admin_done(struct virtqueue *vq)
+{
+ struct virtio_nvme_dev *dev = vq->vdev->priv;
+ struct virtnvme_req *vnr;
+ int qid = vq->index;
+ unsigned long flags;
+ unsigned int len;
+
+ spin_lock_irqsave(&dev->vqs[qid].lock, flags);
+ do {
+ virtqueue_disable_cb(vq);
+ while ((vnr = virtqueue_get_buf(dev->vqs[qid].vq, &len)) != NULL)
+ blk_mq_complete_request(vnr->req);
+ if (unlikely(virtqueue_is_broken(vq)))
+ break;
+ } while (!virtqueue_enable_cb(vq));
+
+ spin_unlock_irqrestore(&dev->vqs[qid].lock, flags);
+}
+
+static void virtnvme_io_done(struct virtqueue *vq)
+{
+ struct virtio_nvme_dev *dev = vq->vdev->priv;
+ int qid = vq->index;
+ struct virtnvme_req *vnr;
+ unsigned long flags;
+ unsigned int len;
+ bool bio_done = false;
+
+ spin_lock_irqsave(&dev->vqs[qid].lock, flags);
+ do {
+ virtqueue_disable_cb(vq);
+ while ((vnr = virtqueue_get_buf(dev->vqs[qid].vq, &len)) != NULL) {
+ blk_mq_complete_request(vnr->req);
+ bio_done = true;
+ }
+
+ if (unlikely(virtqueue_is_broken(vq)))
+ break;
+ } while (!virtqueue_enable_cb(vq));
+
+ spin_unlock_irqrestore(&dev->vqs[qid].lock, flags);
+
+ if (bio_done)
+ wake_up(&dev->queue_wait);
+}
+
+static int virtnvme_init_vq(struct virtio_nvme_dev *dev)
+{
+ int err = 0;
+ int i;
+ vq_callback_t **callbacks;
+ const char **names;
+ struct virtqueue **vqs;
+ unsigned num_vqs;
+ struct virtio_device *vdev = dev->vdev;
+
+ err = virtio_cread_feature(vdev, VIRTIO_NVME_F_MQ,
+ struct virtio_nvme_config, num_queues,
+ &num_vqs);
+ if (err)
+ num_vqs = 1;
+
+ num_vqs++;
+
+ dev->vqs = kmalloc(sizeof(*dev->vqs) * num_vqs, GFP_KERNEL);
+ if (!dev->vqs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
+ if (!names)
+ goto err_names;
+
+ callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
+ if (!callbacks)
+ goto err_callbacks;
+
+ vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+ if (!vqs)
+ goto err_vqs;
+
+ callbacks[0] = virtnvme_admin_done;
+ names[0] = "admin";
+ dev->vqs[0].dev = dev;
+
+ for (i = 1; i < num_vqs; i++) {
+ callbacks[i] = virtnvme_io_done;
+ snprintf(dev->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
+ names[i] = dev->vqs[i].name;
+ dev->vqs[i].dev = dev;
+ }
+
+ /* Discover virtqueues and write information to configuration. */
+ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+ if (err)
+ goto err_find_vqs;
+
+ for (i = 0; i < num_vqs; i++) {
+ spin_lock_init(&dev->vqs[i].lock);
+ dev->vqs[i].vq = vqs[i];
+ }
+ dev->num_vqs = num_vqs;
+
+err_find_vqs:
+ kfree(vqs);
+err_vqs:
+ kfree(callbacks);
+err_callbacks:
+ kfree(names);
+err_names:
+ if (err)
+ kfree(dev->vqs);
+out:
+ return err;
+}
+
+static inline struct virtnvme_req *virtnvme_alloc_req(struct virtio_nvme_dev *dev,
+ gfp_t gfp_mask)
+{
+ struct virtnvme_req *vnr;
+
+ vnr = kmalloc(sizeof(*vnr) + dev->sg_elems*sizeof(struct scatterlist),
+ gfp_mask);
+ if (!vnr)
+ return NULL;
+
+ sg_init_table(vnr->sg, dev->sg_elems);
+
+ return vnr;
+}
+
+static inline u64 virtnvme_block_nr(struct virtio_nvme_ns *ns, sector_t sector)
+{
+ return (sector >> (ns->lba_shift - 9));
+}
+
+static int virtnvme_add_req(struct virtio_nvme_ns *ns, struct virtqueue *vq,
+ struct virtnvme_req *vnr,
+ struct scatterlist *data_sg,
+ bool have_data)
+{
+ struct scatterlist cmd, resp, *sgs[5];
+ unsigned int num_out = 0, num_in = 0;
+
+ sg_init_one(&cmd, vnr->req->cmd, sizeof(struct nvme_command));
+ sgs[num_out++] = &cmd;
+
+ if (have_data) {
+ if (rq_data_dir(vnr->req))
+ sgs[num_out++] = data_sg;
+ else
+ sgs[num_out + num_in++] = data_sg;
+ }
+
+ sg_init_one(&resp, &vnr->resp, sizeof(struct virtio_nvme_resp));
+ sgs[num_out + num_in++] = &resp;
+
+ return virtqueue_add_sgs(vq, sgs, num_out, num_in, vnr, GFP_ATOMIC);
+}
+
+static int virtnvme_setup_io(struct virtnvme_req *vnr, struct virtio_nvme_ns *ns)
+{
+ struct nvme_command *cmnd;
+ struct request *req = vnr->req;
+ u16 control = 0;
+ u32 dsmgmt = 0;
+
+#if 0 /* TODO */
+ if (req->cmd_flags & REQ_FUA)
+ control |= NVME_RW_FUA;
+ if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+ control |= NVME_RW_LR;
+
+ if (req->cmd_flags & REQ_RAHEAD)
+ dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+#endif
+
+ cmnd = &vnr->cmd;
+ req->cmd = (unsigned char *)cmnd;
+ req->cmd_len = sizeof(struct nvme_command);
+ memset(cmnd, 0, sizeof(*cmnd));
+
+ cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+ cmnd->rw.command_id = req->tag;
+ cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->rw.slba = cpu_to_le64(virtnvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ cmnd->rw.control = cpu_to_le16(control);
+ cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+
+ return 0;
+}
+
+static int virtnvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct virtio_nvme_ns *ns = hctx->queue->queuedata;
+ struct virtio_nvme_queue *nvmeq = hctx->driver_data;
+ struct request *req = bd->rq;
+ struct virtnvme_req *vnr = blk_mq_rq_to_pdu(req);
+ unsigned long flags;
+ unsigned int num;
+ int err;
+ bool notify = false;
+
+ vnr->req = req;
+
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ ; /* TODO: nvme_submit_priv(nvmeq, req, iod) */
+ else if (req->cmd_flags & REQ_DISCARD)
+ ; /* TODO: nvme_submit_discard(nvmeq, ns, req, iod) */
+ else if (req->cmd_flags & REQ_FLUSH)
+ ; /* TODO: nvme_submit_flush(nvmeq, ns, req->tag) */
+ else
+ virtnvme_setup_io(vnr, ns);
+
+ blk_mq_start_request(req);
+
+ num = blk_rq_map_sg(hctx->queue, vnr->req, vnr->sg);
+
+ spin_lock_irqsave(&nvmeq->lock, flags);
+ err = virtnvme_add_req(ns, nvmeq->vq, vnr, vnr->sg, num);
+ if (err) {
+ virtqueue_kick(nvmeq->vq);
+ blk_mq_stop_hw_queue(hctx);
+ spin_unlock_irqrestore(&nvmeq->lock, flags);
+ if (err == -ENOMEM || err == -ENOSPC)
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_MQ_RQ_QUEUE_ERROR;
+ }
+
+ if (bd->last && virtqueue_kick_prepare(nvmeq->vq))
+ notify = true;
+ spin_unlock_irqrestore(&nvmeq->lock, flags);
+
+ if (notify)
+ virtqueue_notify(nvmeq->vq);
+ return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static inline void virtnvme_request_done(struct request *req)
+{
+ struct virtnvme_req *vnr = blk_mq_rq_to_pdu(req);
+ int error = vnr->resp.status;
+
+#if 0 /* TODO */
+ if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
+ req->resid_len = virtio32_to_cpu(dev->vdev, vbr->in_hdr.residual);
+ req->sense_len = virtio32_to_cpu(dev->vdev, vbr->in_hdr.sense_len);
+ req->errors = virtio32_to_cpu(dev->vdev, vbr->in_hdr.errors);
+ } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ req->errors = (error != 0);
+ }
+#endif
+
+ blk_mq_end_request(req, error);
+}
+
+static int virtnvme_init_request(void *data, struct request *rq,
+ unsigned int hctx_idx, unsigned int request_idx,
+ unsigned int numa_node)
+{
+ struct virtio_nvme_dev *dev = data;
+ struct virtnvme_req *vnr = blk_mq_rq_to_pdu(rq);
+
+ sg_init_table(vnr->sg, dev->sg_elems);
+ return 0;
+}
+
+static int virtnvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct virtio_nvme_dev *dev = data;
+ struct virtio_nvme_queue *nvmeq = &dev->vqs[0];
+
+ hctx->driver_data = nvmeq;
+ return 0;
+}
+
+static int virtnvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct virtio_nvme_dev *dev = data;
+ struct virtio_nvme_queue *nvmeq = &dev->vqs[hctx_idx+1];
+
+ hctx->driver_data = nvmeq;
+ return 0;
+}
+
+static struct blk_mq_ops virtio_nvme_mq_admin_ops = {
+ .queue_rq = virtnvme_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .init_hctx = virtnvme_admin_init_hctx,
+ .complete = virtnvme_request_done,
+ .init_request = virtnvme_init_request,
+};
+
+static struct blk_mq_ops virtio_nvme_mq_ops = {
+ .queue_rq = virtnvme_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .init_hctx = virtnvme_init_hctx,
+ .complete = virtnvme_request_done,
+ .init_request = virtnvme_init_request,
+};
+
+static int virtnvme_open(struct block_device *bdev, fmode_t mode)
+{
+ struct virtio_nvme_ns *ns = bdev->bd_disk->private_data;
+ struct virtio_nvme_dev *dev = ns->dev;
+
+ kref_get(&dev->kref);
+ return 0;
+}
+
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct virtio_nvme_dev *dev)
+{
+ int instance, error;
+
+ do {
+ if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ error = ida_get_new(&nvme_instance_ida, &instance);
+ spin_unlock(&dev_list_lock);
+ } while (error == -EAGAIN);
+
+ if (error)
+ return -ENODEV;
+
+ dev->instance = instance;
+ return 0;
+}
+
+static void virtnvme_release_instance(struct virtio_nvme_dev *dev)
+{
+ spin_lock(&dev_list_lock);
+ ida_remove(&nvme_instance_ida, dev->instance);
+ spin_unlock(&dev_list_lock);
+}
+
+static void virtnvme_free_dev(struct kref *kref)
+{
+ struct virtio_nvme_dev *dev = container_of(kref,
+ struct virtio_nvme_dev, kref);
+
+ virtnvme_free_namespaces(dev);
+ virtnvme_release_instance(dev);
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ if (dev->admin_q)
+ blk_put_queue(dev->admin_q);
+ kfree(dev);
+}
+
+static void virtnvme_release(struct gendisk *disk, fmode_t mode)
+{
+ struct virtio_nvme_ns *ns = disk->private_data;
+ struct virtio_nvme_dev *dev = ns->dev;
+
+ kref_put(&dev->kref, virtnvme_free_dev);
+}
+
+static const struct block_device_operations virtnvme_fops = {
+ .owner = THIS_MODULE,
+ .open = virtnvme_open,
+ .release = virtnvme_release,
+};
+
+static struct virtio_nvme_ns *virtnvme_alloc_ns(struct virtio_nvme_dev *dev, unsigned nsid,
+ struct nvme_id_ns *id)
+{
+ struct virtio_nvme_ns *ns;
+ struct gendisk *disk;
+ int lbaf;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return NULL;
+ ns->queue = blk_mq_init_queue(&dev->tagset);
+ if (!ns->queue)
+ goto out_free_ns;
+ ns->queue->queue_flags = QUEUE_FLAG_DEFAULT;
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+ queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, ns->queue);
+ ns->dev = dev;
+ ns->queue->queuedata = ns;
+
+ disk = alloc_disk(0);
+ if (!disk)
+ goto out_free_queue;
+ ns->ns_id = nsid;
+ ns->disk = disk;
+ lbaf = id->flbas & 0xf;
+ ns->lba_shift = id->lbaf[lbaf].ds;
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+ if (dev->max_hw_sectors)
+ blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+ disk->major = virtnvme_major;
+ disk->first_minor = 0;
+ disk->fops = &virtnvme_fops;
+ disk->private_data = ns;
+ disk->queue = ns->queue;
+ disk->flags = GENHD_FL_EXT_DEVT;
+ sprintf(disk->disk_name, "vnvme%dn%d", dev->instance, nsid);
+ set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+ return ns;
+
+out_free_queue:
+ blk_cleanup_queue(ns->queue);
+out_free_ns:
+ kfree(ns);
+ return NULL;
+}
+
+static unsigned int virtnvme_cmd_size(struct virtio_nvme_dev *dev)
+{
+ unsigned int ret;
+
+ ret = sizeof(struct virtnvme_req) +
+ sizeof(struct scatterlist) * dev->sg_elems;
+
+ return ret;
+}
+
+static int virtnvme_dev_add(struct virtio_nvme_dev *dev)
+{
+ int res;
+ unsigned nn, i;
+ struct virtio_nvme_ns *ns;
+ struct nvme_id_ctrl *ctrl;
+ struct nvme_id_ns *id_ns;
+ int err;
+
+ res = virtnvme_identify_ctrl(dev, &ctrl);
+ if (res) {
+ printk("Identify Controller failed (%d)\n", res);
+ res = -EIO;
+ goto out;
+ }
+
+ nn = le32_to_cpup(&ctrl->nn);
+
+ memset(&dev->tagset, 0, sizeof(dev->tagset));
+ dev->tagset.ops = &virtio_nvme_mq_ops;
+ /* Default queue sizing is to fill the ring. */
+ if (!virtnvme_queue_depth)
+ virtnvme_queue_depth = dev->vqs[1].vq->num_free;
+ dev->tagset.queue_depth = virtnvme_queue_depth;
+ dev->tagset.numa_node = NUMA_NO_NODE;
+ dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+ dev->tagset.cmd_size = virtnvme_cmd_size(dev);
+ dev->tagset.driver_data = dev;
+ dev->tagset.nr_hw_queues = dev->num_vqs - 1;
+
+ err = blk_mq_alloc_tag_set(&dev->tagset);
+ if (err)
+ goto out;
+
+ for (i = 1; i <= nn; i++) {
+ res = virtnvme_identify_ns(dev, i, &id_ns);
+ if (res)
+ continue;
+
+ if (id_ns->ncap == 0)
+ continue;
+
+ ns = virtnvme_alloc_ns(dev, i, id_ns);
+ if (ns)
+ list_add_tail(&ns->list, &dev->namespaces);
+ }
+ list_for_each_entry(ns, &dev->namespaces, list)
+ add_disk(ns->disk);
+
+out:
+ return res;
+}
+
+static void virtnvme_dev_remove_admin(struct virtio_nvme_dev *dev)
+{
+ if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
+ blk_cleanup_queue(dev->admin_q);
+ blk_mq_free_tag_set(&dev->admin_tagset);
+ }
+}
+
+static int virtnvme_alloc_admin_tags(struct virtio_nvme_dev *dev)
+{
+ if (!dev->admin_q) {
+ dev->admin_tagset.ops = &virtio_nvme_mq_admin_ops;
+ dev->admin_tagset.nr_hw_queues = 1;
+ dev->admin_tagset.queue_depth = NVME_AQ_DEPTH;
+ dev->admin_tagset.reserved_tags = 1;
+ dev->admin_tagset.timeout = ADMIN_TIMEOUT;
+ dev->admin_tagset.numa_node = NUMA_NO_NODE;
+ dev->admin_tagset.cmd_size = virtnvme_cmd_size(dev);
+ dev->admin_tagset.driver_data = dev;
+
+ if (blk_mq_alloc_tag_set(&dev->admin_tagset))
+ return -ENOMEM;
+
+ dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
+ if (IS_ERR(dev->admin_q)) {
+ blk_mq_free_tag_set(&dev->admin_tagset);
+ return -ENOMEM;
+ }
+ if (!blk_get_queue(dev->admin_q)) {
+ virtnvme_dev_remove_admin(dev);
+ dev->admin_q = NULL;
+ return -ENODEV;
+ }
+ } else
+ blk_mq_unfreeze_queue(dev->admin_q);
+
+ return 0;
+}
+
+static int virtnvme_probe(struct virtio_device *vdev)
+{
+ struct virtio_nvme_dev *dev;
+ u64 cap;
+ u32 ctrl_config;
+ u32 sg_elems;
+ int err;
+
+ if (!vdev->config->get) {
+ printk("%s failure: config access disabled\n", __func__);
+ return -EINVAL;
+ }
+
+ vdev->priv = dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&dev->namespaces);
+ kref_init(&dev->kref);
+
+ init_waitqueue_head(&dev->queue_wait);
+ dev->vdev = vdev;
+
+ err = nvme_set_instance(dev);
+ if (err)
+ goto out_free_dev;
+
+ /* We need to know how many segments before we allocate. */
+ err = virtio_cread_feature(vdev, VIRTIO_NVME_F_SEG_MAX,
+ struct virtio_nvme_config, seg_max,
+ &sg_elems);
+ /* We need at least one SG element, whatever they say. */
+ if (err || !sg_elems)
+ sg_elems = 1;
+
+ /* We need two extra sg elements at head for command and response */
+ sg_elems += 2;
+ dev->sg_elems = sg_elems;
+
+ /*
+ * 1. The host determines the controller capabilities
+ */
+ virtio_cread(vdev, struct virtio_nvme_config, cap, &cap);
+
+ /*
+ * 2. The host configures controller settings. Specific settings include:
+ * a. The arbitration mechanism should be selected in CC.AMS.
+ * b. The memory page size should be initialized in CC.MPS.
+ * c. The I/O Command Set that is to be used should be selected in CC.CSS.
+ * 3. The controller should be enabled by setting CC.EN to 1
+ */
+ ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
+ ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
+ ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+ virtio_cwrite(vdev, struct virtio_nvme_config, ctrl_config, &ctrl_config);
+
+ /*
+ * 4. The host should wait for the controller to indicate it is ready to
+ * process commands. The controller is ready to process commands when
+ * CSTS.RDY is set to 1.
+ */
+ err = virtnvme_wait_ready(dev, cap);
+ if (err)
+ goto release;
+
+ /* Qemu starts controller and creates VQs */
+ err = virtnvme_init_vq(dev);
+ if (err)
+ goto release;
+
+ err = virtnvme_alloc_admin_tags(dev);
+ if (err)
+ goto release;
+
+ spin_lock(&dev_list_lock);
+ list_add(&dev->node, &dev_list);
+ spin_unlock(&dev_list_lock);
+
+ /*
+ * 6. The host should determine the configuration of the controller by
+ * issuing the Identify command, specifying the Controller data
+ * structure. The host should then determine the configuration of
+ * each namespace by issuing the Identify command for each namespace,
+ * specifying the Namespace data structure
+ */
+ err = virtnvme_dev_add(dev);
+ if (err)
+ goto out_free_vq;
+
+ return 0;
+
+out_free_vq:
+ vdev->config->del_vqs(vdev);
+
+release:
+ virtnvme_release_instance(dev);
+
+out_free_dev:
+ kfree(dev);
+ return err;
+}
+
+static void virtnvme_ns_remove(struct virtio_nvme_ns *ns)
+{
+ bool kill = !blk_queue_dying(ns->queue);
+
+ if (kill)
+ blk_set_queue_dying(ns->queue);
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
+ del_gendisk(ns->disk);
+ }
+ if (kill || !blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_cleanup_queue(ns->queue);
+ }
+}
+
+static void virtnvme_dev_remove(struct virtio_nvme_dev *dev)
+{
+ struct virtio_nvme_ns *ns;
+
+ list_for_each_entry(ns, &dev->namespaces, list)
+ virtnvme_ns_remove(ns);
+}
+
+static void virtnvme_free_namespace(struct virtio_nvme_ns *ns)
+{
+ list_del(&ns->list);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
+static void virtnvme_free_namespaces(struct virtio_nvme_dev *dev)
+{
+ struct virtio_nvme_ns *ns, *next;
+
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+ virtnvme_free_namespace(ns);
+}
+
+static void virtnvme_remove(struct virtio_device *vdev)
+{
+ struct virtio_nvme_dev *dev = vdev->priv;
+
+ spin_lock(&dev_list_lock);
+ list_del_init(&dev->node);
+ spin_unlock(&dev_list_lock);
+
+ /* Stop all the virtqueues. */
+ vdev->config->reset(vdev);
+
+ vdev->config->del_vqs(vdev);
+
+ virtnvme_dev_remove(dev);
+ virtnvme_dev_remove_admin(dev);
+
+ blk_mq_free_tag_set(&dev->tagset);
+ kfree(dev->vqs);
+
+ kref_put(&dev->kref, virtnvme_free_dev);
+}
+
+static unsigned int features[] = {
+ VIRTIO_NVME_F_SEG_MAX, VIRTIO_NVME_F_MQ,
+};
+
+static struct virtio_driver virtio_nvme_driver = {
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtnvme_probe,
+ .remove = virtnvme_remove,
+};
+
+static int __init virtnvme_init(void)
+{
+ int error;
+
+ virtnvme_major = register_blkdev(0, "virtnvme");
+ if (virtnvme_major < 0) {
+ error = virtnvme_major;
+ goto out;
+ }
+
+ error = register_virtio_driver(&virtio_nvme_driver);
+ if (error)
+ goto out_unregister_blkdev;
+ return 0;
+
+out_unregister_blkdev:
+ unregister_blkdev(virtnvme_major, "virtnvme");
+out:
+ return error;
+}
+
+static void __exit virtnvme_exit(void)
+{
+ unregister_virtio_driver(&virtio_nvme_driver);
+ unregister_blkdev(virtnvme_major, "virtnvme");
+}
+module_init(virtnvme_init);
+module_exit(virtnvme_exit);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio NVMe driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ming Lin <ming.l at ssi.samsung.com>");
diff --git a/include/linux/virtio_nvme.h b/include/linux/virtio_nvme.h
new file mode 100644
index 0000000..c8db9a2
--- /dev/null
+++ b/include/linux/virtio_nvme.h
@@ -0,0 +1,53 @@
+#ifndef _LINUX_VIRTIO_NVME_H
+#define _LINUX_VIRTIO_NVME_H
+
+#include <uapi/linux/virtio_nvme.h>
+#include <linux/blk-mq.h>
+
+#define VQ_NAME_LEN 16
+
+struct virtio_nvme_dev;
+struct virtio_nvme_queue {
+ struct virtio_nvme_dev *dev;
+ struct virtqueue *vq;
+ spinlock_t lock;
+ char name[VQ_NAME_LEN];
+} ____cacheline_aligned_in_smp;
+
+struct virtio_nvme_dev {
+ struct virtio_device *vdev;
+ wait_queue_head_t queue_wait;
+ struct request_queue *admin_q;
+ struct blk_mq_tag_set admin_tagset;
+ struct blk_mq_tag_set tagset;
+
+ /* num of vqs */
+ int num_vqs;
+ struct virtio_nvme_queue *vqs;
+ struct list_head node;
+ int instance;
+ u32 ctrl_config;
+ struct list_head namespaces;
+ struct kref kref;
+ char name[12];
+ char serial[20];
+ char model[40];
+ char firmware_rev[8];
+ u32 max_hw_sectors;
+
+ unsigned int sg_elems;
+};
+
+struct virtio_nvme_ns {
+ struct list_head list;
+
+ struct virtio_nvme_dev *dev;
+ struct request_queue *queue;
+ struct gendisk *disk;
+
+ unsigned ns_id;
+ int lba_shift;
+ int ms;
+};
+
+#endif
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 77925f5..d59d323 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -41,5 +41,6 @@
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_NVME 19 /* TBD: virtio NVMe, need Redhat's help to get this id */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_nvme.h b/include/uapi/linux/virtio_nvme.h
new file mode 100644
index 0000000..33f6077
--- /dev/null
+++ b/include/uapi/linux/virtio_nvme.h
@@ -0,0 +1,30 @@
+#ifndef _UAPI_LINUX_VIRTIO_NVME_H
+#define _UAPI_LINUX_VIRTIO_NVME_H
+
+#include <linux/types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
+
+/* Feature bits */
+#define VIRTIO_NVME_F_SEG_MAX 1 /* Indicates maximum # of segments */
+#define VIRTIO_NVME_F_MQ 2 /* support more than one vq */
+
+struct virtio_nvme_config {
+ __u64 cap;
+ __u32 ctrl_config;
+ __u32 csts;
+
+ /* The maximum number of segments (if VIRTIO_NVME_F_SEG_MAX) */
+ __u32 seg_max;
+ /* number of vqs, only available when VIRTIO_NVME_F_MQ is set */
+ __u32 num_queues;
+} __attribute__((packed));
+
+struct virtio_nvme_resp {
+ __u32 result;
+ __u16 cid;
+ __u16 status;
+};
+
+#endif
--
1.9.1
WARNING: multiple messages have this Message-ID (diff)
From: Ming Lin <mlin@kernel.org>
To: linux-nvme@lists.infradead.org,
virtualization@lists.linux-foundation.org
Cc: Ming Lin <mlin@kernel.org>, Ming Lin <ming.l@ssi.samsung.com>,
Christoph Hellwig <hch@lst.de>
Subject: [RFC PATCH 1/2] virtio_nvme(kernel): virtual NVMe driver using virtio
Date: Wed, 9 Sep 2015 22:48:31 -0700 [thread overview]
Message-ID: <1441864112-12765-2-git-send-email-mlin@kernel.org> (raw)
In-Reply-To: <1441864112-12765-1-git-send-email-mlin@kernel.org>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
---
drivers/block/Kconfig | 7 +
drivers/block/Makefile | 1 +
drivers/block/nvme-core.c | 1 +
drivers/block/virtio_nvme.c | 853 +++++++++++++++++++++++++++++++++++++++
include/linux/virtio_nvme.h | 53 +++
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_nvme.h | 30 ++
7 files changed, 946 insertions(+)
create mode 100644 drivers/block/virtio_nvme.c
create mode 100644 include/linux/virtio_nvme.h
create mode 100644 include/uapi/linux/virtio_nvme.h
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1b8094d..7149885 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -519,6 +519,13 @@ config VIRTIO_BLK
This is the virtual block driver for virtio. It can be used with
lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+config VIRTIO_NVME
+ tristate "Virtio NVMe driver"
+ depends on VIRTIO
+ ---help---
+ This is the virtual NVMe driver for virtio. It can be used with
+ lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+
config BLK_DEV_HD
bool "Very old hard disk (MFM/RLL/IDE) driver"
depends on HAVE_IDE
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 02b688d..3b73f59 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
+obj-$(CONFIG_VIRTIO_NVME) += virtio_nvme.o
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
obj-$(CONFIG_BLK_DEV_HD) += hd.o
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7920c27..7895606 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1059,6 +1059,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
{
return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
}
+EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
static int nvme_submit_async_admin_req(struct nvme_dev *dev)
{
diff --git a/drivers/block/virtio_nvme.c b/drivers/block/virtio_nvme.c
new file mode 100644
index 0000000..57f81fc
--- /dev/null
+++ b/drivers/block/virtio_nvme.c
@@ -0,0 +1,853 @@
+/* Modified from virtio_blk.c and nvme-core.c */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/virtio.h>
+#include <linux/virtio_nvme.h>
+#include <linux/scatterlist.h>
+#include <linux/string_helpers.h>
+#include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include <linux/numa.h>
+#include <linux/virtio_nvme.h>
+#include <linux/nvme.h>
+#include <linux/blk-mq.h>
+
+#define ADMIN_TIMEOUT (2 * HZ)
+#define NVME_AQ_DEPTH 256
+
+static int virtnvme_major;
+module_param(virtnvme_major, int, 0);
+
+static unsigned int virtnvme_queue_depth;
+module_param_named(queue_depth, virtnvme_queue_depth, uint, 0444);
+
+static DEFINE_SPINLOCK(dev_list_lock);
+static LIST_HEAD(dev_list);
+
+static void virtnvme_free_namespaces(struct virtio_nvme_dev *dev);
+
+static const struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_NVME, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+struct virtnvme_req
+{
+ struct request *req;
+ struct nvme_command cmd;
+ struct virtio_nvme_resp resp;
+ struct scatterlist sg[];
+};
+
+static int virtnvme_identify_ctrl(struct virtio_nvme_dev *dev, struct nvme_id_ctrl **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(1);
+
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+static int virtnvme_identify_ns(struct virtio_nvme_dev *dev, unsigned nsid,
+ struct nvme_id_ns **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
+ *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ns));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+static int virtnvme_wait_ready(struct virtio_nvme_dev *dev, u64 cap)
+{
+ struct virtio_device *vdev = dev->vdev;
+ unsigned long timeout;
+ u32 csts;
+
+ timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+
+ while (1) {
+ virtio_cread(vdev, struct virtio_nvme_config, csts, &csts);
+ if ((csts & NVME_CSTS_RDY) == NVME_CSTS_RDY)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ printk("Device not ready; aborting initialisation\n");
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+static void virtnvme_admin_done(struct virtqueue *vq)
+{
+ struct virtio_nvme_dev *dev = vq->vdev->priv;
+ struct virtnvme_req *vnr;
+ int qid = vq->index;
+ unsigned long flags;
+ unsigned int len;
+
+ spin_lock_irqsave(&dev->vqs[qid].lock, flags);
+ do {
+ virtqueue_disable_cb(vq);
+ while ((vnr = virtqueue_get_buf(dev->vqs[qid].vq, &len)) != NULL)
+ blk_mq_complete_request(vnr->req);
+ if (unlikely(virtqueue_is_broken(vq)))
+ break;
+ } while (!virtqueue_enable_cb(vq));
+
+ spin_unlock_irqrestore(&dev->vqs[qid].lock, flags);
+}
+
+static void virtnvme_io_done(struct virtqueue *vq)
+{
+ struct virtio_nvme_dev *dev = vq->vdev->priv;
+ int qid = vq->index;
+ struct virtnvme_req *vnr;
+ unsigned long flags;
+ unsigned int len;
+ bool bio_done = false;
+
+ spin_lock_irqsave(&dev->vqs[qid].lock, flags);
+ do {
+ virtqueue_disable_cb(vq);
+ while ((vnr = virtqueue_get_buf(dev->vqs[qid].vq, &len)) != NULL) {
+ blk_mq_complete_request(vnr->req);
+ bio_done = true;
+ }
+
+ if (unlikely(virtqueue_is_broken(vq)))
+ break;
+ } while (!virtqueue_enable_cb(vq));
+
+ spin_unlock_irqrestore(&dev->vqs[qid].lock, flags);
+
+ if (bio_done)
+ wake_up(&dev->queue_wait);
+}
+
+static int virtnvme_init_vq(struct virtio_nvme_dev *dev)
+{
+ int err = 0;
+ int i;
+ vq_callback_t **callbacks;
+ const char **names;
+ struct virtqueue **vqs;
+ unsigned num_vqs;
+ struct virtio_device *vdev = dev->vdev;
+
+ err = virtio_cread_feature(vdev, VIRTIO_NVME_F_MQ,
+ struct virtio_nvme_config, num_queues,
+ &num_vqs);
+ if (err)
+ num_vqs = 1;
+
+ num_vqs++;
+
+ dev->vqs = kmalloc(sizeof(*dev->vqs) * num_vqs, GFP_KERNEL);
+ if (!dev->vqs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
+ if (!names)
+ goto err_names;
+
+ callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
+ if (!callbacks)
+ goto err_callbacks;
+
+ vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+ if (!vqs)
+ goto err_vqs;
+
+ callbacks[0] = virtnvme_admin_done;
+ names[0] = "admin";
+ dev->vqs[0].dev = dev;
+
+ for (i = 1; i < num_vqs; i++) {
+ callbacks[i] = virtnvme_io_done;
+ snprintf(dev->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
+ names[i] = dev->vqs[i].name;
+ dev->vqs[i].dev = dev;
+ }
+
+ /* Discover virtqueues and write information to configuration. */
+ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+ if (err)
+ goto err_find_vqs;
+
+ for (i = 0; i < num_vqs; i++) {
+ spin_lock_init(&dev->vqs[i].lock);
+ dev->vqs[i].vq = vqs[i];
+ }
+ dev->num_vqs = num_vqs;
+
+err_find_vqs:
+ kfree(vqs);
+err_vqs:
+ kfree(callbacks);
+err_callbacks:
+ kfree(names);
+err_names:
+ if (err)
+ kfree(dev->vqs);
+out:
+ return err;
+}
+
+static inline struct virtnvme_req *virtnvme_alloc_req(struct virtio_nvme_dev *dev,
+ gfp_t gfp_mask)
+{
+ struct virtnvme_req *vnr;
+
+ vnr = kmalloc(sizeof(*vnr) + dev->sg_elems*sizeof(struct scatterlist),
+ gfp_mask);
+ if (!vnr)
+ return NULL;
+
+ sg_init_table(vnr->sg, dev->sg_elems);
+
+ return vnr;
+}
+
+static inline u64 virtnvme_block_nr(struct virtio_nvme_ns *ns, sector_t sector)
+{
+ return (sector >> (ns->lba_shift - 9));
+}
+
+static int virtnvme_add_req(struct virtio_nvme_ns *ns, struct virtqueue *vq,
+ struct virtnvme_req *vnr,
+ struct scatterlist *data_sg,
+ bool have_data)
+{
+ struct scatterlist cmd, resp, *sgs[5];
+ unsigned int num_out = 0, num_in = 0;
+
+ sg_init_one(&cmd, vnr->req->cmd, sizeof(struct nvme_command));
+ sgs[num_out++] = &cmd;
+
+ if (have_data) {
+ if (rq_data_dir(vnr->req))
+ sgs[num_out++] = data_sg;
+ else
+ sgs[num_out + num_in++] = data_sg;
+ }
+
+ sg_init_one(&resp, &vnr->resp, sizeof(struct virtio_nvme_resp));
+ sgs[num_out + num_in++] = &resp;
+
+ return virtqueue_add_sgs(vq, sgs, num_out, num_in, vnr, GFP_ATOMIC);
+}
+
+static int virtnvme_setup_io(struct virtnvme_req *vnr, struct virtio_nvme_ns *ns)
+{
+ struct nvme_command *cmnd;
+ struct request *req = vnr->req;
+ u16 control = 0;
+ u32 dsmgmt = 0;
+
+#if 0 /* TODO */
+ if (req->cmd_flags & REQ_FUA)
+ control |= NVME_RW_FUA;
+ if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+ control |= NVME_RW_LR;
+
+ if (req->cmd_flags & REQ_RAHEAD)
+ dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+#endif
+
+ cmnd = &vnr->cmd;
+ req->cmd = (unsigned char *)cmnd;
+ req->cmd_len = sizeof(struct nvme_command);
+ memset(cmnd, 0, sizeof(*cmnd));
+
+ cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+ cmnd->rw.command_id = req->tag;
+ cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->rw.slba = cpu_to_le64(virtnvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ cmnd->rw.control = cpu_to_le16(control);
+ cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+
+ return 0;
+}
+
+static int virtnvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct virtio_nvme_ns *ns = hctx->queue->queuedata;
+ struct virtio_nvme_queue *nvmeq = hctx->driver_data;
+ struct request *req = bd->rq;
+ struct virtnvme_req *vnr = blk_mq_rq_to_pdu(req);
+ unsigned long flags;
+ unsigned int num;
+ int err;
+ bool notify = false;
+
+ vnr->req = req;
+
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ ; /* TODO: nvme_submit_priv(nvmeq, req, iod) */
+ else if (req->cmd_flags & REQ_DISCARD)
+ ; /* TODO: nvme_submit_discard(nvmeq, ns, req, iod) */
+ else if (req->cmd_flags & REQ_FLUSH)
+ ; /* TODO: nvme_submit_flush(nvmeq, ns, req->tag) */
+ else
+ virtnvme_setup_io(vnr, ns);
+
+ blk_mq_start_request(req);
+
+ num = blk_rq_map_sg(hctx->queue, vnr->req, vnr->sg);
+
+ spin_lock_irqsave(&nvmeq->lock, flags);
+ err = virtnvme_add_req(ns, nvmeq->vq, vnr, vnr->sg, num);
+ if (err) {
+ virtqueue_kick(nvmeq->vq);
+ blk_mq_stop_hw_queue(hctx);
+ spin_unlock_irqrestore(&nvmeq->lock, flags);
+ if (err == -ENOMEM || err == -ENOSPC)
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_MQ_RQ_QUEUE_ERROR;
+ }
+
+ if (bd->last && virtqueue_kick_prepare(nvmeq->vq))
+ notify = true;
+ spin_unlock_irqrestore(&nvmeq->lock, flags);
+
+ if (notify)
+ virtqueue_notify(nvmeq->vq);
+ return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static inline void virtnvme_request_done(struct request *req)
+{
+ struct virtnvme_req *vnr = blk_mq_rq_to_pdu(req);
+ int error = vnr->resp.status;
+
+#if 0 /* TODO */
+ if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
+ req->resid_len = virtio32_to_cpu(dev->vdev, vbr->in_hdr.residual);
+ req->sense_len = virtio32_to_cpu(dev->vdev, vbr->in_hdr.sense_len);
+ req->errors = virtio32_to_cpu(dev->vdev, vbr->in_hdr.errors);
+ } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ req->errors = (error != 0);
+ }
+#endif
+
+ blk_mq_end_request(req, error);
+}
+
+static int virtnvme_init_request(void *data, struct request *rq,
+ unsigned int hctx_idx, unsigned int request_idx,
+ unsigned int numa_node)
+{
+ struct virtio_nvme_dev *dev = data;
+ struct virtnvme_req *vnr = blk_mq_rq_to_pdu(rq);
+
+ sg_init_table(vnr->sg, dev->sg_elems);
+ return 0;
+}
+
+static int virtnvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct virtio_nvme_dev *dev = data;
+ struct virtio_nvme_queue *nvmeq = &dev->vqs[0];
+
+ hctx->driver_data = nvmeq;
+ return 0;
+}
+
+static int virtnvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct virtio_nvme_dev *dev = data;
+ struct virtio_nvme_queue *nvmeq = &dev->vqs[hctx_idx+1];
+
+ hctx->driver_data = nvmeq;
+ return 0;
+}
+
+static struct blk_mq_ops virtio_nvme_mq_admin_ops = {
+ .queue_rq = virtnvme_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .init_hctx = virtnvme_admin_init_hctx,
+ .complete = virtnvme_request_done,
+ .init_request = virtnvme_init_request,
+};
+
+static struct blk_mq_ops virtio_nvme_mq_ops = {
+ .queue_rq = virtnvme_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .init_hctx = virtnvme_init_hctx,
+ .complete = virtnvme_request_done,
+ .init_request = virtnvme_init_request,
+};
+
+static int virtnvme_open(struct block_device *bdev, fmode_t mode)
+{
+ struct virtio_nvme_ns *ns = bdev->bd_disk->private_data;
+ struct virtio_nvme_dev *dev = ns->dev;
+
+ kref_get(&dev->kref);
+ return 0;
+}
+
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct virtio_nvme_dev *dev)
+{
+ int instance, error;
+
+ do {
+ if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ error = ida_get_new(&nvme_instance_ida, &instance);
+ spin_unlock(&dev_list_lock);
+ } while (error == -EAGAIN);
+
+ if (error)
+ return -ENODEV;
+
+ dev->instance = instance;
+ return 0;
+}
+
+static void virtnvme_release_instance(struct virtio_nvme_dev *dev)
+{
+ spin_lock(&dev_list_lock);
+ ida_remove(&nvme_instance_ida, dev->instance);
+ spin_unlock(&dev_list_lock);
+}
+
+static void virtnvme_free_dev(struct kref *kref)
+{
+ struct virtio_nvme_dev *dev = container_of(kref,
+ struct virtio_nvme_dev, kref);
+
+ virtnvme_free_namespaces(dev);
+ virtnvme_release_instance(dev);
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ if (dev->admin_q)
+ blk_put_queue(dev->admin_q);
+ kfree(dev);
+}
+
+static void virtnvme_release(struct gendisk *disk, fmode_t mode)
+{
+ struct virtio_nvme_ns *ns = disk->private_data;
+ struct virtio_nvme_dev *dev = ns->dev;
+
+ kref_put(&dev->kref, virtnvme_free_dev);
+}
+
+static const struct block_device_operations virtnvme_fops = {
+ .owner = THIS_MODULE,
+ .open = virtnvme_open,
+ .release = virtnvme_release,
+};
+
+static struct virtio_nvme_ns *virtnvme_alloc_ns(struct virtio_nvme_dev *dev, unsigned nsid,
+ struct nvme_id_ns *id)
+{
+ struct virtio_nvme_ns *ns;
+ struct gendisk *disk;
+ int lbaf;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return NULL;
+ ns->queue = blk_mq_init_queue(&dev->tagset);
+ if (!ns->queue)
+ goto out_free_ns;
+ ns->queue->queue_flags = QUEUE_FLAG_DEFAULT;
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+ queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, ns->queue);
+ ns->dev = dev;
+ ns->queue->queuedata = ns;
+
+ disk = alloc_disk(0);
+ if (!disk)
+ goto out_free_queue;
+ ns->ns_id = nsid;
+ ns->disk = disk;
+ lbaf = id->flbas & 0xf;
+ ns->lba_shift = id->lbaf[lbaf].ds;
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+ if (dev->max_hw_sectors)
+ blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+ disk->major = virtnvme_major;
+ disk->first_minor = 0;
+ disk->fops = &virtnvme_fops;
+ disk->private_data = ns;
+ disk->queue = ns->queue;
+ disk->flags = GENHD_FL_EXT_DEVT;
+ sprintf(disk->disk_name, "vnvme%dn%d", dev->instance, nsid);
+ set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+ return ns;
+
+out_free_queue:
+ blk_cleanup_queue(ns->queue);
+out_free_ns:
+ kfree(ns);
+ return NULL;
+}
+
+static unsigned int virtnvme_cmd_size(struct virtio_nvme_dev *dev)
+{
+ unsigned int ret;
+
+ ret = sizeof(struct virtnvme_req) +
+ sizeof(struct scatterlist) * dev->sg_elems;
+
+ return ret;
+}
+
+static int virtnvme_dev_add(struct virtio_nvme_dev *dev)
+{
+ int res;
+ unsigned nn, i;
+ struct virtio_nvme_ns *ns;
+ struct nvme_id_ctrl *ctrl;
+ struct nvme_id_ns *id_ns;
+ int err;
+
+ res = virtnvme_identify_ctrl(dev, &ctrl);
+ if (res) {
+ printk("Identify Controller failed (%d)\n", res);
+ res = -EIO;
+ goto out;
+ }
+
+ nn = le32_to_cpup(&ctrl->nn);
+
+ memset(&dev->tagset, 0, sizeof(dev->tagset));
+ dev->tagset.ops = &virtio_nvme_mq_ops;
+ /* Default queue sizing is to fill the ring. */
+ if (!virtnvme_queue_depth)
+ virtnvme_queue_depth = dev->vqs[1].vq->num_free;
+ dev->tagset.queue_depth = virtnvme_queue_depth;
+ dev->tagset.numa_node = NUMA_NO_NODE;
+ dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+ dev->tagset.cmd_size = virtnvme_cmd_size(dev);
+ dev->tagset.driver_data = dev;
+ dev->tagset.nr_hw_queues = dev->num_vqs - 1;
+
+ err = blk_mq_alloc_tag_set(&dev->tagset);
+ if (err)
+ goto out;
+
+ for (i = 1; i <= nn; i++) {
+ res = virtnvme_identify_ns(dev, i, &id_ns);
+ if (res)
+ continue;
+
+ if (id_ns->ncap == 0)
+ continue;
+
+ ns = virtnvme_alloc_ns(dev, i, id_ns);
+ if (ns)
+ list_add_tail(&ns->list, &dev->namespaces);
+ }
+ list_for_each_entry(ns, &dev->namespaces, list)
+ add_disk(ns->disk);
+
+out:
+ return res;
+}
+
+static void virtnvme_dev_remove_admin(struct virtio_nvme_dev *dev)
+{
+ if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
+ blk_cleanup_queue(dev->admin_q);
+ blk_mq_free_tag_set(&dev->admin_tagset);
+ }
+}
+
+static int virtnvme_alloc_admin_tags(struct virtio_nvme_dev *dev)
+{
+ if (!dev->admin_q) {
+ dev->admin_tagset.ops = &virtio_nvme_mq_admin_ops;
+ dev->admin_tagset.nr_hw_queues = 1;
+ dev->admin_tagset.queue_depth = NVME_AQ_DEPTH;
+ dev->admin_tagset.reserved_tags = 1;
+ dev->admin_tagset.timeout = ADMIN_TIMEOUT;
+ dev->admin_tagset.numa_node = NUMA_NO_NODE;
+ dev->admin_tagset.cmd_size = virtnvme_cmd_size(dev);
+ dev->admin_tagset.driver_data = dev;
+
+ if (blk_mq_alloc_tag_set(&dev->admin_tagset))
+ return -ENOMEM;
+
+ dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
+ if (IS_ERR(dev->admin_q)) {
+ blk_mq_free_tag_set(&dev->admin_tagset);
+ return -ENOMEM;
+ }
+ if (!blk_get_queue(dev->admin_q)) {
+ virtnvme_dev_remove_admin(dev);
+ dev->admin_q = NULL;
+ return -ENODEV;
+ }
+ } else
+ blk_mq_unfreeze_queue(dev->admin_q);
+
+ return 0;
+}
+
+static int virtnvme_probe(struct virtio_device *vdev)
+{
+ struct virtio_nvme_dev *dev;
+ u64 cap;
+ u32 ctrl_config;
+ u32 sg_elems;
+ int err;
+
+ if (!vdev->config->get) {
+ printk("%s failure: config access disabled\n", __func__);
+ return -EINVAL;
+ }
+
+ vdev->priv = dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&dev->namespaces);
+ kref_init(&dev->kref);
+
+ init_waitqueue_head(&dev->queue_wait);
+ dev->vdev = vdev;
+
+ err = nvme_set_instance(dev);
+ if (err)
+ goto out_free_dev;
+
+ /* We need to know how many segments before we allocate. */
+ err = virtio_cread_feature(vdev, VIRTIO_NVME_F_SEG_MAX,
+ struct virtio_nvme_config, seg_max,
+ &sg_elems);
+ /* We need at least one SG element, whatever they say. */
+ if (err || !sg_elems)
+ sg_elems = 1;
+
+ /* We need two extra sg elements at head for command and response */
+ sg_elems += 2;
+ dev->sg_elems = sg_elems;
+
+ /*
+ * 1. The host determines the controller capabilities
+ */
+ virtio_cread(vdev, struct virtio_nvme_config, cap, &cap);
+
+ /*
+ * 2. The host configures controller settings. Specific settings include:
+ * a. The arbitration mechanism should be selected in CC.AMS.
+ * b. The memory page size should be initialized in CC.MPS.
+ * c. The I/O Command Set that is to be used should be selected in CC.CSS.
+ * 3. The controller should be enabled by setting CC.EN to 1
+ */
+ ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
+ ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
+ ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+ virtio_cwrite(vdev, struct virtio_nvme_config, ctrl_config, &ctrl_config);
+
+ /*
+ * 4. The host should wait for the controller to indicate it is ready to
+ * process commands. The controller is ready to process commands when
+ * CSTS.RDY is set to 1.
+ */
+ err = virtnvme_wait_ready(dev, cap);
+ if (err)
+ goto release;
+
+ /* Qemu starts controller and creates VQs */
+ err = virtnvme_init_vq(dev);
+ if (err)
+ goto release;
+
+ err = virtnvme_alloc_admin_tags(dev);
+ if (err)
+ goto release;
+
+ spin_lock(&dev_list_lock);
+ list_add(&dev->node, &dev_list);
+ spin_unlock(&dev_list_lock);
+
+ /*
+ * 6. The host should determine the configuration of the controller by
+ * issuing the Identify command, specifying the Controller data
+ * structure. The host should then determine the configuration of
+ * each namespace by issuing the Identify command for each namespace,
+ * specifying the Namespace data structure
+ */
+ err = virtnvme_dev_add(dev);
+ if (err)
+ goto out_free_vq;
+
+ return 0;
+
+out_free_vq:
+ vdev->config->del_vqs(vdev);
+
+release:
+ virtnvme_release_instance(dev);
+
+out_free_dev:
+ kfree(dev);
+ return err;
+}
+
+static void virtnvme_ns_remove(struct virtio_nvme_ns *ns)
+{
+ bool kill = !blk_queue_dying(ns->queue);
+
+ if (kill)
+ blk_set_queue_dying(ns->queue);
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
+ del_gendisk(ns->disk);
+ }
+ if (kill || !blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_cleanup_queue(ns->queue);
+ }
+}
+
+static void virtnvme_dev_remove(struct virtio_nvme_dev *dev)
+{
+ struct virtio_nvme_ns *ns;
+
+ list_for_each_entry(ns, &dev->namespaces, list)
+ virtnvme_ns_remove(ns);
+}
+
+static void virtnvme_free_namespace(struct virtio_nvme_ns *ns)
+{
+ list_del(&ns->list);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
+static void virtnvme_free_namespaces(struct virtio_nvme_dev *dev)
+{
+ struct virtio_nvme_ns *ns, *next;
+
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+ virtnvme_free_namespace(ns);
+}
+
+static void virtnvme_remove(struct virtio_device *vdev)
+{
+ struct virtio_nvme_dev *dev = vdev->priv;
+
+ spin_lock(&dev_list_lock);
+ list_del_init(&dev->node);
+ spin_unlock(&dev_list_lock);
+
+ /* Stop all the virtqueues. */
+ vdev->config->reset(vdev);
+
+ vdev->config->del_vqs(vdev);
+
+ virtnvme_dev_remove(dev);
+ virtnvme_dev_remove_admin(dev);
+
+ blk_mq_free_tag_set(&dev->tagset);
+ kfree(dev->vqs);
+
+ kref_put(&dev->kref, virtnvme_free_dev);
+}
+
+static unsigned int features[] = {
+ VIRTIO_NVME_F_SEG_MAX, VIRTIO_NVME_F_MQ,
+};
+
+static struct virtio_driver virtio_nvme_driver = {
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtnvme_probe,
+ .remove = virtnvme_remove,
+};
+
+static int __init virtnvme_init(void)
+{
+ int error;
+
+ virtnvme_major = register_blkdev(0, "virtnvme");
+ if (virtnvme_major < 0) {
+ error = virtnvme_major;
+ goto out;
+ }
+
+ error = register_virtio_driver(&virtio_nvme_driver);
+ if (error)
+ goto out_unregister_blkdev;
+ return 0;
+
+out_unregister_blkdev:
+ unregister_blkdev(virtnvme_major, "virtnvme");
+out:
+ return error;
+}
+
+static void __exit virtnvme_exit(void)
+{
+ unregister_virtio_driver(&virtio_nvme_driver);
+ unregister_blkdev(virtnvme_major, "virtnvme");
+}
+module_init(virtnvme_init);
+module_exit(virtnvme_exit);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio NVMe driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ming Lin <ming.l@ssi.samsung.com>");
diff --git a/include/linux/virtio_nvme.h b/include/linux/virtio_nvme.h
new file mode 100644
index 0000000..c8db9a2
--- /dev/null
+++ b/include/linux/virtio_nvme.h
@@ -0,0 +1,53 @@
+#ifndef _LINUX_VIRTIO_NVME_H
+#define _LINUX_VIRTIO_NVME_H
+
+#include <uapi/linux/virtio_nvme.h>
+#include <linux/blk-mq.h>
+
+#define VQ_NAME_LEN 16
+
+struct virtio_nvme_dev;
+struct virtio_nvme_queue {
+ struct virtio_nvme_dev *dev;
+ struct virtqueue *vq;
+ spinlock_t lock;
+ char name[VQ_NAME_LEN];
+} ____cacheline_aligned_in_smp;
+
+struct virtio_nvme_dev {
+ struct virtio_device *vdev;
+ wait_queue_head_t queue_wait;
+ struct request_queue *admin_q;
+ struct blk_mq_tag_set admin_tagset;
+ struct blk_mq_tag_set tagset;
+
+ /* num of vqs */
+ int num_vqs;
+ struct virtio_nvme_queue *vqs;
+ struct list_head node;
+ int instance;
+ u32 ctrl_config;
+ struct list_head namespaces;
+ struct kref kref;
+ char name[12];
+ char serial[20];
+ char model[40];
+ char firmware_rev[8];
+ u32 max_hw_sectors;
+
+ unsigned int sg_elems;
+};
+
+struct virtio_nvme_ns {
+ struct list_head list;
+
+ struct virtio_nvme_dev *dev;
+ struct request_queue *queue;
+ struct gendisk *disk;
+
+ unsigned ns_id;
+ int lba_shift;
+ int ms;
+};
+
+#endif
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 77925f5..d59d323 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -41,5 +41,6 @@
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_NVME 19 /* TBD: virtio NVMe, need Redhat's help to get this id */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_nvme.h b/include/uapi/linux/virtio_nvme.h
new file mode 100644
index 0000000..33f6077
--- /dev/null
+++ b/include/uapi/linux/virtio_nvme.h
@@ -0,0 +1,30 @@
+#ifndef _UAPI_LINUX_VIRTIO_NVME_H
+#define _UAPI_LINUX_VIRTIO_NVME_H
+
+#include <linux/types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
+
+/* Feature bits */
+#define VIRTIO_NVME_F_SEG_MAX 1 /* Indicates maximum # of segments */
+#define VIRTIO_NVME_F_MQ 2 /* support more than one vq */
+
+struct virtio_nvme_config {
+ __u64 cap;
+ __u32 ctrl_config;
+ __u32 csts;
+
+ /* The maximum number of segments (if VIRTIO_NVME_F_SEG_MAX) */
+ __u32 seg_max;
+ /* number of vqs, only available when VIRTIO_NVME_F_MQ is set */
+ __u32 num_queues;
+} __attribute__((packed));
+
+struct virtio_nvme_resp {
+ __u32 result;
+ __u16 cid;
+ __u16 status;
+};
+
+#endif
--
1.9.1
next prev parent reply other threads:[~2015-09-10 5:48 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-10 5:48 [RFC PATCH 0/2] virtio nvme Ming Lin
2015-09-10 5:48 ` Ming Lin
2015-09-10 5:48 ` Ming Lin [this message]
2015-09-10 5:48 ` [RFC PATCH 1/2] virtio_nvme(kernel): virtual NVMe driver using virtio Ming Lin
2015-09-10 5:48 ` [RFC PATCH 2/2] virtio-nvme(qemu): NVMe device " Ming Lin
2015-09-10 5:48 ` Ming Lin
2015-09-10 14:02 ` [RFC PATCH 0/2] virtio nvme Keith Busch
2015-09-10 14:02 ` Keith Busch
2015-09-10 17:02 ` Ming Lin
2015-09-10 17:02 ` Ming Lin
2015-09-11 4:55 ` Ming Lin
2015-09-11 4:55 ` Ming Lin
2015-09-11 17:46 ` J Freyensee
2015-09-11 17:46 ` J Freyensee
2015-09-10 14:38 ` Stefan Hajnoczi
2015-09-10 14:38 ` Stefan Hajnoczi
2015-09-10 17:28 ` Ming Lin
2015-09-10 17:28 ` Ming Lin
2015-09-11 7:48 ` Stefan Hajnoczi
2015-09-11 7:48 ` Stefan Hajnoczi
2015-09-11 17:21 ` Ming Lin
2015-09-11 17:21 ` Ming Lin
2015-09-11 17:53 ` Stefan Hajnoczi
2015-09-11 17:53 ` Stefan Hajnoczi
2015-09-11 18:54 ` Ming Lin
2015-09-11 18:54 ` Ming Lin
2015-09-17 6:10 ` Nicholas A. Bellinger
2015-09-17 6:10 ` Nicholas A. Bellinger
2015-09-17 18:18 ` Ming Lin
2015-09-17 18:18 ` Ming Lin
2015-09-17 21:43 ` Nicholas A. Bellinger
2015-09-17 21:43 ` Nicholas A. Bellinger
2015-09-17 23:31 ` Ming Lin
2015-09-18 0:55 ` Nicholas A. Bellinger
2015-09-18 18:12 ` Ming Lin
2015-09-18 18:12 ` Ming Lin
2015-09-18 21:09 ` Nicholas A. Bellinger
2015-09-18 21:09 ` Nicholas A. Bellinger
2015-09-18 23:05 ` Ming Lin
2015-09-18 23:05 ` Ming Lin
2015-09-23 22:58 ` Ming Lin
2015-09-23 22:58 ` Ming Lin
2015-09-27 5:01 ` Nicholas A. Bellinger
2015-09-27 5:01 ` Nicholas A. Bellinger
2015-09-27 6:49 ` Ming Lin
2015-09-27 6:49 ` Ming Lin
2015-09-28 5:58 ` Hannes Reinecke
2015-09-28 5:58 ` Hannes Reinecke
2015-09-18 0:55 ` Nicholas A. Bellinger
2015-09-17 23:31 ` Ming Lin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1441864112-12765-2-git-send-email-mlin@kernel.org \
--to=mlin@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.