All of lore.kernel.org
 help / color / mirror / Atom feed
From: mlin@kernel.org (Ming Lin)
Subject: [RFC PATCH 9/9] nvme-vhost: add nvme queue handlers
Date: Thu, 19 Nov 2015 16:21:08 -0800	[thread overview]
Message-ID: <1447978868-17138-10-git-send-email-mlin@kernel.org> (raw)
In-Reply-To: <1447978868-17138-1-git-send-email-mlin@kernel.org>

From: Ming Lin <ming.l@ssi.samsung.com>

This adds nvme submission/completion queue handlers,
which are ported from qemu-nvme.

And hooks into nvme-target to do the real job.

Cc: Keith Busch <keith.busch at intel.com>
Signed-off-by: Ming Lin <ming.l at ssi.samsung.com>
---
 drivers/nvme/target/vhost.c | 420 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 416 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/vhost.c b/drivers/nvme/target/vhost.c
index 6847c86..3ce1348 100644
--- a/drivers/nvme/target/vhost.c
+++ b/drivers/nvme/target/vhost.c
@@ -6,10 +6,12 @@
 #include <linux/mutex.h>
 #include <linux/file.h>
 #include <linux/highmem.h>
+#include <linux/kthread.h>
 #include "../../vhost/vhost.h"
 #include "nvmet.h"
 
 #define NVMET_VHOST_AQ_DEPTH		256
+#define NVMET_VHOST_MAX_SEGMENTS	32
 
 enum NvmeCcShift {
 	CC_MPS_SHIFT	= 7,
@@ -52,6 +54,15 @@ struct nvmet_vhost_ctrl_eventfd {
 	int __user *vector;
 };
 
+struct nvmet_vhost_iod {
+	struct nvmet_vhost_sq	*sq;
+	struct scatterlist	sg[NVMET_VHOST_MAX_SEGMENTS];
+	struct nvme_command	cmd;
+	struct nvme_completion	rsp;
+	struct nvmet_req	req;
+	struct list_head	entry;
+};
+
 struct nvmet_vhost_cq {
 	struct nvmet_cq		cq;
 	struct nvmet_vhost_ctrl	*ctrl;
@@ -61,6 +72,12 @@ struct nvmet_vhost_cq {
 	u8			phase;
 	u64			dma_addr;
 	struct eventfd_ctx	*eventfd;
+
+	struct list_head	sq_list;
+	struct list_head	req_list;
+	spinlock_t		lock;
+	struct task_struct	*thread;
+	int			scheduled;
 };
 
 struct nvmet_vhost_sq {
@@ -71,6 +88,13 @@ struct nvmet_vhost_sq {
 	u32			tail;
 	u64			dma_addr;
 	u16			cqid;
+
+	struct nvmet_vhost_iod	*io_req;
+	struct list_head	req_list;
+	struct list_head	entry;
+	struct mutex            lock;
+	struct task_struct	*thread;
+	int			scheduled;
 };
 
 struct nvmet_vhost_ctrl {
@@ -191,13 +215,13 @@ static int nvmet_vhost_rw(struct vhost_dev *dev, u64 guest_pa,
 	return 0;
 }
 
-int nvmet_vhost_read(struct vhost_dev *dev, u64 guest_pa,
+static int nvmet_vhost_read(struct vhost_dev *dev, u64 guest_pa,
 		void *buf, uint32_t size)
 {
 	return nvmet_vhost_rw(dev, guest_pa, buf, size, 0);
 }
 
-int nvmet_vhost_write(struct vhost_dev *dev, u64 guest_pa,
+static int nvmet_vhost_write(struct vhost_dev *dev, u64 guest_pa,
 		void *buf, uint32_t size)
 {
 	return nvmet_vhost_rw(dev, guest_pa, buf, size, 1);
@@ -216,6 +240,289 @@ static int nvmet_vhost_check_cqid(struct nvmet_ctrl *n, u16 cqid)
 	return cqid <= n->subsys->max_qid && n->cqs[cqid] != NULL ? 0 : -1;
 }
 
+static void nvmet_vhost_inc_cq_tail(struct nvmet_vhost_cq *cq)
+{
+	cq->tail++;
+	if (cq->tail >= cq->cq.size) {
+		cq->tail = 0;
+		cq->phase = !cq->phase;
+	}
+}
+
+static void nvmet_vhost_inc_sq_head(struct nvmet_vhost_sq *sq)
+{
+	sq->head = (sq->head + 1) % sq->sq.size;
+}
+
+static uint8_t nvmet_vhost_cq_full(struct nvmet_vhost_cq *cq)
+{
+	return (cq->tail + 1) % cq->cq.size == cq->head;
+}
+
+static uint8_t nvmet_vhost_sq_empty(struct nvmet_vhost_sq *sq)
+{
+	return sq->head == sq->tail;
+}
+
+static void nvmet_vhost_post_cqes(struct nvmet_vhost_cq *cq)
+{
+	struct nvmet_vhost_ctrl *n = cq->ctrl;
+	struct nvmet_vhost_iod *req;
+	struct list_head *p, *tmp;
+	int signal = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cq->lock, flags);
+	list_for_each_safe(p, tmp, &cq->req_list) {
+		struct nvmet_vhost_sq *sq;
+		u64 addr;
+
+		if (nvmet_vhost_cq_full(cq))
+			goto unlock;
+
+		req = list_entry(p, struct nvmet_vhost_iod, entry);
+		list_del(p);
+
+		sq = req->sq;
+		req->rsp.status |= cq->phase;
+		req->rsp.sq_id = cpu_to_le16(sq->sq.qid);
+		req->rsp.sq_head = cpu_to_le16(sq->head);
+		addr = cq->dma_addr + cq->tail * n->cqe_size;
+		nvmet_vhost_inc_cq_tail(cq);
+		spin_unlock_irqrestore(&cq->lock, flags);
+
+		nvmet_vhost_write(&n->dev, addr, (void *)&req->rsp,
+			sizeof(req->rsp));
+
+		mutex_lock(&sq->lock);
+		list_add_tail(p, &sq->req_list);
+		mutex_unlock(&sq->lock);
+
+		signal = 1;
+
+		spin_lock_irqsave(&cq->lock, flags);
+	}
+
+	if (signal)
+		eventfd_signal(cq->eventfd, 1);
+
+unlock:
+	cq->scheduled = 0;
+	spin_unlock_irqrestore(&cq->lock, flags);
+}
+
+static int nvmet_vhost_cq_thread(void *arg)
+{
+	struct nvmet_vhost_cq *sq = arg;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		nvmet_vhost_post_cqes(sq);
+
+		schedule();
+	}
+
+	return 0;
+}
+
+static void nvmet_vhost_enqueue_req_completion(
+		struct nvmet_vhost_cq *cq, struct nvmet_vhost_iod *iod)
+{
+	unsigned long flags;
+
+	BUG_ON(cq->cq.qid != iod->sq->sq.qid);
+	spin_lock_irqsave(&cq->lock, flags);
+	list_add_tail(&iod->entry, &cq->req_list);
+	if (!cq->scheduled) {
+		wake_up_process(cq->thread);
+		cq->scheduled = 1;
+	}
+	spin_unlock_irqrestore(&cq->lock, flags);
+}
+
+static void nvmet_vhost_queue_response(struct nvmet_req *req)
+{
+	struct nvmet_vhost_iod *iod =
+		container_of(req, struct nvmet_vhost_iod, req);
+	struct nvmet_vhost_sq *sq = iod->sq;
+	struct nvmet_vhost_ctrl *n = sq->ctrl;
+	struct nvmet_vhost_cq *cq = n->cqs[sq->sq.qid];
+
+	nvmet_vhost_enqueue_req_completion(cq, iod);
+}
+
+static int nvmet_vhost_sglist_add(struct nvmet_vhost_ctrl *n, struct scatterlist *sg,
+		u64 guest_addr, int len, int is_write)
+{
+	void __user *host_addr;
+	struct page *page;
+	unsigned int offset, nbytes;
+	int ret;
+
+	host_addr = map_guest_to_host(&n->dev, guest_addr, len);
+	if (unlikely(!host_addr)) {
+		pr_warn("cannot map guest addr %p, error %ld\n",
+			(void *)guest_addr, PTR_ERR(host_addr));
+		return PTR_ERR(host_addr);
+	}
+
+	ret = get_user_pages(current, n->dev.mm, (unsigned long)host_addr, 1,
+			is_write, 0, &page, NULL);
+	BUG_ON(ret == 0); /* we should either get our page or fail */
+	if (ret < 0) {
+		pr_warn("get_user_pages faild: host_addr %p, %d\n",
+			host_addr, ret);
+		return ret;
+	}
+
+	offset = (uintptr_t)host_addr & ~PAGE_MASK;
+	nbytes = min_t(unsigned int, PAGE_SIZE - offset, len);
+	sg_set_page(sg, page, nbytes, offset);
+
+	return 0;
+}
+
+static int nvmet_vhost_map_prp(struct nvmet_vhost_ctrl *n, struct scatterlist *sgl,
+	u64 prp1, u64 prp2, unsigned int len)
+{
+	unsigned int trans_len = n->page_size - (prp1 % n->page_size);
+	int num_prps = (len >> n->page_bits) + 1;
+	//FIXME
+	int is_write = 1;
+
+	trans_len = min(len, trans_len);
+	if (!prp1)
+		return -1;
+
+	sg_init_table(sgl, num_prps);
+
+	nvmet_vhost_sglist_add(n, sgl, prp1, trans_len, is_write);
+
+	len -= trans_len;
+	if (len) {
+		if (!prp2)
+			goto error;
+		if (len > n->page_size) {
+			u64 prp_list[n->max_prp_ents];
+			u16 nents, prp_trans;
+			int i = 0;
+
+			nents = (len + n->page_size - 1) >> n->page_bits;
+			prp_trans = min(n->max_prp_ents, nents) * sizeof(u64);
+			nvmet_vhost_read(&n->dev, prp2, (void *)prp_list, prp_trans);
+
+			while (len != 0) {
+				u64 prp_ent = le64_to_cpu(prp_list[i]);
+
+				if (i == n->max_prp_ents - 1 && len > n->page_size) {
+					if (!prp_ent || prp_ent & (n->page_size - 1))
+						goto error;
+					i = 0;
+					nents = (len + n->page_size - 1) >> n->page_bits;
+					prp_trans = min(n->max_prp_ents, nents) * sizeof(u64);
+					nvmet_vhost_read(&n->dev, prp_ent, (void *)prp_list, prp_trans);
+					prp_ent = le64_to_cpu(prp_list[i]);
+				}
+
+				if (!prp_ent || prp_ent & (n->page_size - 1))
+					goto error;
+
+				trans_len = min(len, n->page_size);
+				nvmet_vhost_sglist_add(n, sgl, prp_ent, trans_len, is_write);
+				sgl++;
+				len -= trans_len;
+				i++;
+			}
+		} else {
+			if (prp2 & (n->page_size - 1))
+				goto error;
+			nvmet_vhost_sglist_add(n, sgl, prp2, trans_len, is_write);
+		}
+	}
+
+	return num_prps;
+
+error:
+	return -1;
+}
+
+static void nvmet_vhost_process_sq(struct nvmet_vhost_sq *sq)
+{
+	struct nvmet_vhost_ctrl *n = sq->ctrl;
+	struct nvmet_vhost_cq *cq = n->cqs[sq->sq.qid];
+	struct nvmet_vhost_iod *iod;
+	struct nvme_command *cmd;
+	int ret;
+
+	mutex_lock(&sq->lock);
+
+	while (!(nvmet_vhost_sq_empty(sq) || list_empty(&sq->req_list))) {
+		u64 addr = sq->dma_addr + sq->head * n->sqe_size;;
+
+		nvmet_vhost_inc_sq_head(sq);
+		iod = list_first_entry(&sq->req_list,
+					struct nvmet_vhost_iod, entry);
+		list_del(&iod->entry);
+		mutex_unlock(&sq->lock);
+
+		cmd = &iod->cmd;
+		ret = nvmet_vhost_read(&n->dev, addr,
+				(void *)cmd, sizeof(*cmd));
+		if (ret) {
+			pr_warn("nvmet_vhost_read fail\n");
+			goto out;
+		}
+
+		ret = nvmet_req_init(&iod->req, &cq->cq, &sq->sq,
+					nvmet_vhost_queue_response);
+		if (ret) {
+			pr_warn("nvmet_req_init error: ret 0x%x, qid %d\n", ret, sq->sq.qid);
+			goto out;
+		}
+		if (iod->req.data_len) {
+			ret = nvmet_vhost_map_prp(n, iod->sg, cmd->common.prp1,
+					cmd->common.prp2, iod->req.data_len);
+			if (ret > 0) {
+				iod->req.sg = iod->sg;
+				iod->req.sg_cnt = ret;
+			} else {
+				pr_warn("map prp error\n");
+				goto out;
+			}
+		}
+		iod->req.execute(&iod->req);
+		mutex_lock(&sq->lock);
+        }
+
+unlock:
+	sq->scheduled = 0;
+	mutex_unlock(&sq->lock);
+	return;
+
+out:
+	mutex_lock(&sq->lock);
+	list_add_tail(&iod->entry, &sq->req_list);
+	goto unlock;
+}
+
+static int nvmet_vhost_sq_thread(void *opaque)
+{
+	struct nvmet_vhost_sq *sq = opaque;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		nvmet_vhost_process_sq(sq);
+
+		schedule();
+	}
+
+	return 0;
+}
+
 static int nvmet_vhost_init_cq(struct nvmet_vhost_cq *cq,
 		struct nvmet_vhost_ctrl *n, u64 dma_addr,
 		u16 cqid, u16 size, struct eventfd_ctx *eventfd,
@@ -228,6 +535,12 @@ static int nvmet_vhost_init_cq(struct nvmet_vhost_cq *cq,
 	cq->eventfd = eventfd;
 	n->cqs[cqid] = cq;
 
+	spin_lock_init(&cq->lock);
+	INIT_LIST_HEAD(&cq->req_list);
+	INIT_LIST_HEAD(&cq->sq_list);
+	cq->scheduled = 0;
+	cq->thread = kthread_create(nvmet_vhost_cq_thread, cq, "nvmet_vhost_cq");
+
 	nvmet_cq_init(n->ctrl, &cq->cq, cqid, size);
 
 	return 0;
@@ -237,12 +550,36 @@ static int nvmet_vhost_init_sq(struct nvmet_vhost_sq *sq,
 		struct nvmet_vhost_ctrl *n, u64 dma_addr,
 		u16 sqid, u16 cqid, u16 size)
 {
+	struct nvmet_vhost_cq *cq;
+	struct nvmet_vhost_iod *iod;
+	int i;
+
 	sq->ctrl = n;
 	sq->dma_addr = dma_addr;
 	sq->cqid = cqid;
 	sq->head = sq->tail = 0;
 	n->sqs[sqid] = sq;
 
+	mutex_init(&sq->lock);
+	INIT_LIST_HEAD(&sq->req_list);
+	sq->io_req = kmalloc(sizeof(struct nvmet_vhost_iod) * size, GFP_KERNEL);
+	if (!sq->io_req)
+		return -ENOMEM;
+	for (i = 0; i < size; i++) {
+		iod = &sq->io_req[i];
+
+		iod->req.cmd = &iod->cmd;
+		iod->req.rsp = &iod->rsp;
+		iod->sq = sq;
+		list_add_tail(&iod->entry, &sq->req_list);
+	}
+	sq->scheduled = 0;
+	sq->thread = kthread_create(nvmet_vhost_sq_thread, sq, "nvmet_vhost_sq");
+
+	cq = n->cqs[cqid];
+	list_add_tail(&sq->entry, &cq->sq_list);
+	n->sqs[sqid] = sq;
+
 	nvmet_sq_init(n->ctrl, &sq->sq, sqid, size);
 
 	return 0;
@@ -564,12 +901,84 @@ static int nvmet_bar_write(struct nvmet_vhost_ctrl *n, int offset, u64 val)
 	return status;
 }
 
+static int nvmet_vhost_process_db(struct nvmet_ctrl *ctrl, int offset, u64 val)
+{
+	u16 qid;
+
+	if (offset & ((1 << 2) - 1))
+		return -EINVAL;
+
+	if (((offset - 0x1000) >> 2) & 1) {
+		u16 new_head = val & 0xffff;
+		int start_sqs;
+		struct nvmet_vhost_cq *vcq;
+		struct nvmet_cq *cq;
+		unsigned long flags;
+
+		qid = (offset - (0x1000 + (1 << 2))) >> 3;
+		if (nvmet_vhost_check_cqid(ctrl, qid))
+			return -EINVAL;
+
+		cq = ctrl->cqs[qid];
+		if (new_head >= cq->size)
+			return -EINVAL;
+
+		vcq = cq_to_vcq(cq);
+		spin_lock_irqsave(&vcq->lock, flags);
+		start_sqs = nvmet_vhost_cq_full(vcq) ? 1 : 0;
+		vcq->head = new_head;
+		spin_unlock_irqrestore(&vcq->lock, flags);
+		if (start_sqs) {
+			struct nvmet_vhost_sq *sq;
+			struct list_head *p;
+
+			list_for_each(p, &vcq->sq_list) {
+				sq = list_entry(p, struct nvmet_vhost_sq, entry);
+				if (!sq->scheduled) {
+					sq->scheduled = 1;
+					wake_up_process(sq->thread);
+				}
+			}
+			if (!vcq->scheduled) {
+				vcq->scheduled = 1;
+				wake_up_process(vcq->thread);
+			}
+		}
+
+		if (vcq->tail != vcq->head)
+			eventfd_signal(vcq->eventfd, 1);
+	} else {
+		struct nvmet_vhost_sq *vsq;
+		struct nvmet_sq *sq;
+		u16 new_tail = val & 0xffff;
+
+		qid = (offset - 0x1000) >> 3;
+		if (nvmet_vhost_check_sqid(ctrl, qid))
+			return -EINVAL;
+
+		sq = ctrl->sqs[qid];
+		if (new_tail >= sq->size)
+			return -ENOSPC;
+
+		vsq = sq_to_vsq(sq);
+		mutex_lock(&vsq->lock);
+		vsq->tail = new_tail;
+		if (!vsq->scheduled) {
+			vsq->scheduled = 1;
+			wake_up_process(vsq->thread);
+		}
+		mutex_unlock(&vsq->lock);
+	}
+
+	return 0;
+}
+
 static int nvmet_vhost_bar_write(struct nvmet_vhost_ctrl *n, int offset, u64 val)
 {
 	if (offset < 0x1000)
 		return nvmet_bar_write(n, offset, val);
-
-	return -1;
+	else
+		return nvmet_vhost_process_db(n->ctrl, offset, val);
 }
 
 static int nvmet_vhost_ioc_bar(struct nvmet_vhost_ctrl *n, void __user *argp)
@@ -612,6 +1021,8 @@ static void nvme_free_sq(struct nvmet_vhost_sq *sq,
 		struct nvmet_vhost_ctrl *n)
 {
 	n->sqs[sq->sq.qid] = NULL;
+	kthread_stop(sq->thread);
+	kfree(sq->io_req);
 	if (sq->sq.qid)
 		kfree(sq);
 }
@@ -620,6 +1031,7 @@ static void nvme_free_cq(struct nvmet_vhost_cq *cq,
 		struct nvmet_vhost_ctrl *n)
 {
 	n->cqs[cq->cq.qid] = NULL;
+	kthread_stop(cq->thread);
 	if (cq->cq.qid)
 		kfree(cq);
 }
-- 
1.9.1

WARNING: multiple messages have this Message-ID (diff)
From: Ming Lin <mlin@kernel.org>
To: linux-nvme@lists.infradead.org
Cc: Ming Lin <ming.l@ssi.samsung.com>,
	qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org,
	Keith Busch <keith.busch@intel.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [RFC PATCH 9/9] nvme-vhost: add nvme queue handlers
Date: Thu, 19 Nov 2015 16:21:08 -0800	[thread overview]
Message-ID: <1447978868-17138-10-git-send-email-mlin@kernel.org> (raw)
In-Reply-To: <1447978868-17138-1-git-send-email-mlin@kernel.org>

From: Ming Lin <ming.l@ssi.samsung.com>

This adds nvme submission/completion queue handlers,
which are ported from qemu-nvme.

And hooks into nvme-target to do the real job.

Cc: Keith Busch <keith.busch@intel.com>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
---
 drivers/nvme/target/vhost.c | 420 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 416 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/vhost.c b/drivers/nvme/target/vhost.c
index 6847c86..3ce1348 100644
--- a/drivers/nvme/target/vhost.c
+++ b/drivers/nvme/target/vhost.c
@@ -6,10 +6,12 @@
 #include <linux/mutex.h>
 #include <linux/file.h>
 #include <linux/highmem.h>
+#include <linux/kthread.h>
 #include "../../vhost/vhost.h"
 #include "nvmet.h"
 
 #define NVMET_VHOST_AQ_DEPTH		256
+#define NVMET_VHOST_MAX_SEGMENTS	32
 
 enum NvmeCcShift {
 	CC_MPS_SHIFT	= 7,
@@ -52,6 +54,15 @@ struct nvmet_vhost_ctrl_eventfd {
 	int __user *vector;
 };
 
+struct nvmet_vhost_iod {
+	struct nvmet_vhost_sq	*sq;
+	struct scatterlist	sg[NVMET_VHOST_MAX_SEGMENTS];
+	struct nvme_command	cmd;
+	struct nvme_completion	rsp;
+	struct nvmet_req	req;
+	struct list_head	entry;
+};
+
 struct nvmet_vhost_cq {
 	struct nvmet_cq		cq;
 	struct nvmet_vhost_ctrl	*ctrl;
@@ -61,6 +72,12 @@ struct nvmet_vhost_cq {
 	u8			phase;
 	u64			dma_addr;
 	struct eventfd_ctx	*eventfd;
+
+	struct list_head	sq_list;
+	struct list_head	req_list;
+	spinlock_t		lock;
+	struct task_struct	*thread;
+	int			scheduled;
 };
 
 struct nvmet_vhost_sq {
@@ -71,6 +88,13 @@ struct nvmet_vhost_sq {
 	u32			tail;
 	u64			dma_addr;
 	u16			cqid;
+
+	struct nvmet_vhost_iod	*io_req;
+	struct list_head	req_list;
+	struct list_head	entry;
+	struct mutex            lock;
+	struct task_struct	*thread;
+	int			scheduled;
 };
 
 struct nvmet_vhost_ctrl {
@@ -191,13 +215,13 @@ static int nvmet_vhost_rw(struct vhost_dev *dev, u64 guest_pa,
 	return 0;
 }
 
-int nvmet_vhost_read(struct vhost_dev *dev, u64 guest_pa,
+static int nvmet_vhost_read(struct vhost_dev *dev, u64 guest_pa,
 		void *buf, uint32_t size)
 {
 	return nvmet_vhost_rw(dev, guest_pa, buf, size, 0);
 }
 
-int nvmet_vhost_write(struct vhost_dev *dev, u64 guest_pa,
+static int nvmet_vhost_write(struct vhost_dev *dev, u64 guest_pa,
 		void *buf, uint32_t size)
 {
 	return nvmet_vhost_rw(dev, guest_pa, buf, size, 1);
@@ -216,6 +240,289 @@ static int nvmet_vhost_check_cqid(struct nvmet_ctrl *n, u16 cqid)
 	return cqid <= n->subsys->max_qid && n->cqs[cqid] != NULL ? 0 : -1;
 }
 
+static void nvmet_vhost_inc_cq_tail(struct nvmet_vhost_cq *cq)
+{
+	cq->tail++;
+	if (cq->tail >= cq->cq.size) {
+		cq->tail = 0;
+		cq->phase = !cq->phase;
+	}
+}
+
+static void nvmet_vhost_inc_sq_head(struct nvmet_vhost_sq *sq)
+{
+	sq->head = (sq->head + 1) % sq->sq.size;
+}
+
+static uint8_t nvmet_vhost_cq_full(struct nvmet_vhost_cq *cq)
+{
+	return (cq->tail + 1) % cq->cq.size == cq->head;
+}
+
+static uint8_t nvmet_vhost_sq_empty(struct nvmet_vhost_sq *sq)
+{
+	return sq->head == sq->tail;
+}
+
+static void nvmet_vhost_post_cqes(struct nvmet_vhost_cq *cq)
+{
+	struct nvmet_vhost_ctrl *n = cq->ctrl;
+	struct nvmet_vhost_iod *req;
+	struct list_head *p, *tmp;
+	int signal = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cq->lock, flags);
+	list_for_each_safe(p, tmp, &cq->req_list) {
+		struct nvmet_vhost_sq *sq;
+		u64 addr;
+
+		if (nvmet_vhost_cq_full(cq))
+			goto unlock;
+
+		req = list_entry(p, struct nvmet_vhost_iod, entry);
+		list_del(p);
+
+		sq = req->sq;
+		req->rsp.status |= cq->phase;
+		req->rsp.sq_id = cpu_to_le16(sq->sq.qid);
+		req->rsp.sq_head = cpu_to_le16(sq->head);
+		addr = cq->dma_addr + cq->tail * n->cqe_size;
+		nvmet_vhost_inc_cq_tail(cq);
+		spin_unlock_irqrestore(&cq->lock, flags);
+
+		nvmet_vhost_write(&n->dev, addr, (void *)&req->rsp,
+			sizeof(req->rsp));
+
+		mutex_lock(&sq->lock);
+		list_add_tail(p, &sq->req_list);
+		mutex_unlock(&sq->lock);
+
+		signal = 1;
+
+		spin_lock_irqsave(&cq->lock, flags);
+	}
+
+	if (signal)
+		eventfd_signal(cq->eventfd, 1);
+
+unlock:
+	cq->scheduled = 0;
+	spin_unlock_irqrestore(&cq->lock, flags);
+}
+
+static int nvmet_vhost_cq_thread(void *arg)
+{
+	struct nvmet_vhost_cq *sq = arg;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		nvmet_vhost_post_cqes(sq);
+
+		schedule();
+	}
+
+	return 0;
+}
+
+static void nvmet_vhost_enqueue_req_completion(
+		struct nvmet_vhost_cq *cq, struct nvmet_vhost_iod *iod)
+{
+	unsigned long flags;
+
+	BUG_ON(cq->cq.qid != iod->sq->sq.qid);
+	spin_lock_irqsave(&cq->lock, flags);
+	list_add_tail(&iod->entry, &cq->req_list);
+	if (!cq->scheduled) {
+		wake_up_process(cq->thread);
+		cq->scheduled = 1;
+	}
+	spin_unlock_irqrestore(&cq->lock, flags);
+}
+
+static void nvmet_vhost_queue_response(struct nvmet_req *req)
+{
+	struct nvmet_vhost_iod *iod =
+		container_of(req, struct nvmet_vhost_iod, req);
+	struct nvmet_vhost_sq *sq = iod->sq;
+	struct nvmet_vhost_ctrl *n = sq->ctrl;
+	struct nvmet_vhost_cq *cq = n->cqs[sq->sq.qid];
+
+	nvmet_vhost_enqueue_req_completion(cq, iod);
+}
+
+static int nvmet_vhost_sglist_add(struct nvmet_vhost_ctrl *n, struct scatterlist *sg,
+		u64 guest_addr, int len, int is_write)
+{
+	void __user *host_addr;
+	struct page *page;
+	unsigned int offset, nbytes;
+	int ret;
+
+	host_addr = map_guest_to_host(&n->dev, guest_addr, len);
+	if (unlikely(!host_addr)) {
+		pr_warn("cannot map guest addr %p, error %ld\n",
+			(void *)guest_addr, PTR_ERR(host_addr));
+		return PTR_ERR(host_addr);
+	}
+
+	ret = get_user_pages(current, n->dev.mm, (unsigned long)host_addr, 1,
+			is_write, 0, &page, NULL);
+	BUG_ON(ret == 0); /* we should either get our page or fail */
+	if (ret < 0) {
+		pr_warn("get_user_pages faild: host_addr %p, %d\n",
+			host_addr, ret);
+		return ret;
+	}
+
+	offset = (uintptr_t)host_addr & ~PAGE_MASK;
+	nbytes = min_t(unsigned int, PAGE_SIZE - offset, len);
+	sg_set_page(sg, page, nbytes, offset);
+
+	return 0;
+}
+
+static int nvmet_vhost_map_prp(struct nvmet_vhost_ctrl *n, struct scatterlist *sgl,
+	u64 prp1, u64 prp2, unsigned int len)
+{
+	unsigned int trans_len = n->page_size - (prp1 % n->page_size);
+	int num_prps = (len >> n->page_bits) + 1;
+	//FIXME
+	int is_write = 1;
+
+	trans_len = min(len, trans_len);
+	if (!prp1)
+		return -1;
+
+	sg_init_table(sgl, num_prps);
+
+	nvmet_vhost_sglist_add(n, sgl, prp1, trans_len, is_write);
+
+	len -= trans_len;
+	if (len) {
+		if (!prp2)
+			goto error;
+		if (len > n->page_size) {
+			u64 prp_list[n->max_prp_ents];
+			u16 nents, prp_trans;
+			int i = 0;
+
+			nents = (len + n->page_size - 1) >> n->page_bits;
+			prp_trans = min(n->max_prp_ents, nents) * sizeof(u64);
+			nvmet_vhost_read(&n->dev, prp2, (void *)prp_list, prp_trans);
+
+			while (len != 0) {
+				u64 prp_ent = le64_to_cpu(prp_list[i]);
+
+				if (i == n->max_prp_ents - 1 && len > n->page_size) {
+					if (!prp_ent || prp_ent & (n->page_size - 1))
+						goto error;
+					i = 0;
+					nents = (len + n->page_size - 1) >> n->page_bits;
+					prp_trans = min(n->max_prp_ents, nents) * sizeof(u64);
+					nvmet_vhost_read(&n->dev, prp_ent, (void *)prp_list, prp_trans);
+					prp_ent = le64_to_cpu(prp_list[i]);
+				}
+
+				if (!prp_ent || prp_ent & (n->page_size - 1))
+					goto error;
+
+				trans_len = min(len, n->page_size);
+				nvmet_vhost_sglist_add(n, sgl, prp_ent, trans_len, is_write);
+				sgl++;
+				len -= trans_len;
+				i++;
+			}
+		} else {
+			if (prp2 & (n->page_size - 1))
+				goto error;
+			nvmet_vhost_sglist_add(n, sgl, prp2, trans_len, is_write);
+		}
+	}
+
+	return num_prps;
+
+error:
+	return -1;
+}
+
+static void nvmet_vhost_process_sq(struct nvmet_vhost_sq *sq)
+{
+	struct nvmet_vhost_ctrl *n = sq->ctrl;
+	struct nvmet_vhost_cq *cq = n->cqs[sq->sq.qid];
+	struct nvmet_vhost_iod *iod;
+	struct nvme_command *cmd;
+	int ret;
+
+	mutex_lock(&sq->lock);
+
+	while (!(nvmet_vhost_sq_empty(sq) || list_empty(&sq->req_list))) {
+		u64 addr = sq->dma_addr + sq->head * n->sqe_size;;
+
+		nvmet_vhost_inc_sq_head(sq);
+		iod = list_first_entry(&sq->req_list,
+					struct nvmet_vhost_iod, entry);
+		list_del(&iod->entry);
+		mutex_unlock(&sq->lock);
+
+		cmd = &iod->cmd;
+		ret = nvmet_vhost_read(&n->dev, addr,
+				(void *)cmd, sizeof(*cmd));
+		if (ret) {
+			pr_warn("nvmet_vhost_read fail\n");
+			goto out;
+		}
+
+		ret = nvmet_req_init(&iod->req, &cq->cq, &sq->sq,
+					nvmet_vhost_queue_response);
+		if (ret) {
+			pr_warn("nvmet_req_init error: ret 0x%x, qid %d\n", ret, sq->sq.qid);
+			goto out;
+		}
+		if (iod->req.data_len) {
+			ret = nvmet_vhost_map_prp(n, iod->sg, cmd->common.prp1,
+					cmd->common.prp2, iod->req.data_len);
+			if (ret > 0) {
+				iod->req.sg = iod->sg;
+				iod->req.sg_cnt = ret;
+			} else {
+				pr_warn("map prp error\n");
+				goto out;
+			}
+		}
+		iod->req.execute(&iod->req);
+		mutex_lock(&sq->lock);
+        }
+
+unlock:
+	sq->scheduled = 0;
+	mutex_unlock(&sq->lock);
+	return;
+
+out:
+	mutex_lock(&sq->lock);
+	list_add_tail(&iod->entry, &sq->req_list);
+	goto unlock;
+}
+
+static int nvmet_vhost_sq_thread(void *opaque)
+{
+	struct nvmet_vhost_sq *sq = opaque;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		nvmet_vhost_process_sq(sq);
+
+		schedule();
+	}
+
+	return 0;
+}
+
 static int nvmet_vhost_init_cq(struct nvmet_vhost_cq *cq,
 		struct nvmet_vhost_ctrl *n, u64 dma_addr,
 		u16 cqid, u16 size, struct eventfd_ctx *eventfd,
@@ -228,6 +535,12 @@ static int nvmet_vhost_init_cq(struct nvmet_vhost_cq *cq,
 	cq->eventfd = eventfd;
 	n->cqs[cqid] = cq;
 
+	spin_lock_init(&cq->lock);
+	INIT_LIST_HEAD(&cq->req_list);
+	INIT_LIST_HEAD(&cq->sq_list);
+	cq->scheduled = 0;
+	cq->thread = kthread_create(nvmet_vhost_cq_thread, cq, "nvmet_vhost_cq");
+
 	nvmet_cq_init(n->ctrl, &cq->cq, cqid, size);
 
 	return 0;
@@ -237,12 +550,36 @@ static int nvmet_vhost_init_sq(struct nvmet_vhost_sq *sq,
 		struct nvmet_vhost_ctrl *n, u64 dma_addr,
 		u16 sqid, u16 cqid, u16 size)
 {
+	struct nvmet_vhost_cq *cq;
+	struct nvmet_vhost_iod *iod;
+	int i;
+
 	sq->ctrl = n;
 	sq->dma_addr = dma_addr;
 	sq->cqid = cqid;
 	sq->head = sq->tail = 0;
 	n->sqs[sqid] = sq;
 
+	mutex_init(&sq->lock);
+	INIT_LIST_HEAD(&sq->req_list);
+	sq->io_req = kmalloc(sizeof(struct nvmet_vhost_iod) * size, GFP_KERNEL);
+	if (!sq->io_req)
+		return -ENOMEM;
+	for (i = 0; i < size; i++) {
+		iod = &sq->io_req[i];
+
+		iod->req.cmd = &iod->cmd;
+		iod->req.rsp = &iod->rsp;
+		iod->sq = sq;
+		list_add_tail(&iod->entry, &sq->req_list);
+	}
+	sq->scheduled = 0;
+	sq->thread = kthread_create(nvmet_vhost_sq_thread, sq, "nvmet_vhost_sq");
+
+	cq = n->cqs[cqid];
+	list_add_tail(&sq->entry, &cq->sq_list);
+	n->sqs[sqid] = sq;
+
 	nvmet_sq_init(n->ctrl, &sq->sq, sqid, size);
 
 	return 0;
@@ -564,12 +901,84 @@ static int nvmet_bar_write(struct nvmet_vhost_ctrl *n, int offset, u64 val)
 	return status;
 }
 
+static int nvmet_vhost_process_db(struct nvmet_ctrl *ctrl, int offset, u64 val)
+{
+	u16 qid;
+
+	if (offset & ((1 << 2) - 1))
+		return -EINVAL;
+
+	if (((offset - 0x1000) >> 2) & 1) {
+		u16 new_head = val & 0xffff;
+		int start_sqs;
+		struct nvmet_vhost_cq *vcq;
+		struct nvmet_cq *cq;
+		unsigned long flags;
+
+		qid = (offset - (0x1000 + (1 << 2))) >> 3;
+		if (nvmet_vhost_check_cqid(ctrl, qid))
+			return -EINVAL;
+
+		cq = ctrl->cqs[qid];
+		if (new_head >= cq->size)
+			return -EINVAL;
+
+		vcq = cq_to_vcq(cq);
+		spin_lock_irqsave(&vcq->lock, flags);
+		start_sqs = nvmet_vhost_cq_full(vcq) ? 1 : 0;
+		vcq->head = new_head;
+		spin_unlock_irqrestore(&vcq->lock, flags);
+		if (start_sqs) {
+			struct nvmet_vhost_sq *sq;
+			struct list_head *p;
+
+			list_for_each(p, &vcq->sq_list) {
+				sq = list_entry(p, struct nvmet_vhost_sq, entry);
+				if (!sq->scheduled) {
+					sq->scheduled = 1;
+					wake_up_process(sq->thread);
+				}
+			}
+			if (!vcq->scheduled) {
+				vcq->scheduled = 1;
+				wake_up_process(vcq->thread);
+			}
+		}
+
+		if (vcq->tail != vcq->head)
+			eventfd_signal(vcq->eventfd, 1);
+	} else {
+		struct nvmet_vhost_sq *vsq;
+		struct nvmet_sq *sq;
+		u16 new_tail = val & 0xffff;
+
+		qid = (offset - 0x1000) >> 3;
+		if (nvmet_vhost_check_sqid(ctrl, qid))
+			return -EINVAL;
+
+		sq = ctrl->sqs[qid];
+		if (new_tail >= sq->size)
+			return -ENOSPC;
+
+		vsq = sq_to_vsq(sq);
+		mutex_lock(&vsq->lock);
+		vsq->tail = new_tail;
+		if (!vsq->scheduled) {
+			vsq->scheduled = 1;
+			wake_up_process(vsq->thread);
+		}
+		mutex_unlock(&vsq->lock);
+	}
+
+	return 0;
+}
+
 static int nvmet_vhost_bar_write(struct nvmet_vhost_ctrl *n, int offset, u64 val)
 {
 	if (offset < 0x1000)
 		return nvmet_bar_write(n, offset, val);
-
-	return -1;
+	else
+		return nvmet_vhost_process_db(n->ctrl, offset, val);
 }
 
 static int nvmet_vhost_ioc_bar(struct nvmet_vhost_ctrl *n, void __user *argp)
@@ -612,6 +1021,8 @@ static void nvme_free_sq(struct nvmet_vhost_sq *sq,
 		struct nvmet_vhost_ctrl *n)
 {
 	n->sqs[sq->sq.qid] = NULL;
+	kthread_stop(sq->thread);
+	kfree(sq->io_req);
 	if (sq->sq.qid)
 		kfree(sq);
 }
@@ -620,6 +1031,7 @@ static void nvme_free_cq(struct nvmet_vhost_cq *cq,
 		struct nvmet_vhost_ctrl *n)
 {
 	n->cqs[cq->cq.qid] = NULL;
+	kthread_stop(cq->thread);
 	if (cq->cq.qid)
 		kfree(cq);
 }
-- 
1.9.1

WARNING: multiple messages have this Message-ID (diff)
From: Ming Lin <mlin@kernel.org>
To: linux-nvme@lists.infradead.org
Cc: Ming Lin <ming.l@ssi.samsung.com>,
	qemu-devel@nongnu.org,
	"Nicholas A. Bellinger" <nab@linux-iscsi.org>,
	virtualization@lists.linux-foundation.org,
	Keith Busch <keith.busch@intel.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [Qemu-devel] [RFC PATCH 9/9] nvme-vhost: add nvme queue handlers
Date: Thu, 19 Nov 2015 16:21:08 -0800	[thread overview]
Message-ID: <1447978868-17138-10-git-send-email-mlin@kernel.org> (raw)
In-Reply-To: <1447978868-17138-1-git-send-email-mlin@kernel.org>

From: Ming Lin <ming.l@ssi.samsung.com>

This adds nvme submission/completion queue handlers,
which are ported from qemu-nvme.

And hooks into nvme-target to do the real job.

Cc: Keith Busch <keith.busch@intel.com>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
---
 drivers/nvme/target/vhost.c | 420 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 416 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/vhost.c b/drivers/nvme/target/vhost.c
index 6847c86..3ce1348 100644
--- a/drivers/nvme/target/vhost.c
+++ b/drivers/nvme/target/vhost.c
@@ -6,10 +6,12 @@
 #include <linux/mutex.h>
 #include <linux/file.h>
 #include <linux/highmem.h>
+#include <linux/kthread.h>
 #include "../../vhost/vhost.h"
 #include "nvmet.h"
 
 #define NVMET_VHOST_AQ_DEPTH		256
+#define NVMET_VHOST_MAX_SEGMENTS	32
 
 enum NvmeCcShift {
 	CC_MPS_SHIFT	= 7,
@@ -52,6 +54,15 @@ struct nvmet_vhost_ctrl_eventfd {
 	int __user *vector;
 };
 
+struct nvmet_vhost_iod {
+	struct nvmet_vhost_sq	*sq;
+	struct scatterlist	sg[NVMET_VHOST_MAX_SEGMENTS];
+	struct nvme_command	cmd;
+	struct nvme_completion	rsp;
+	struct nvmet_req	req;
+	struct list_head	entry;
+};
+
 struct nvmet_vhost_cq {
 	struct nvmet_cq		cq;
 	struct nvmet_vhost_ctrl	*ctrl;
@@ -61,6 +72,12 @@ struct nvmet_vhost_cq {
 	u8			phase;
 	u64			dma_addr;
 	struct eventfd_ctx	*eventfd;
+
+	struct list_head	sq_list;
+	struct list_head	req_list;
+	spinlock_t		lock;
+	struct task_struct	*thread;
+	int			scheduled;
 };
 
 struct nvmet_vhost_sq {
@@ -71,6 +88,13 @@ struct nvmet_vhost_sq {
 	u32			tail;
 	u64			dma_addr;
 	u16			cqid;
+
+	struct nvmet_vhost_iod	*io_req;
+	struct list_head	req_list;
+	struct list_head	entry;
+	struct mutex            lock;
+	struct task_struct	*thread;
+	int			scheduled;
 };
 
 struct nvmet_vhost_ctrl {
@@ -191,13 +215,13 @@ static int nvmet_vhost_rw(struct vhost_dev *dev, u64 guest_pa,
 	return 0;
 }
 
-int nvmet_vhost_read(struct vhost_dev *dev, u64 guest_pa,
+static int nvmet_vhost_read(struct vhost_dev *dev, u64 guest_pa,
 		void *buf, uint32_t size)
 {
 	return nvmet_vhost_rw(dev, guest_pa, buf, size, 0);
 }
 
-int nvmet_vhost_write(struct vhost_dev *dev, u64 guest_pa,
+static int nvmet_vhost_write(struct vhost_dev *dev, u64 guest_pa,
 		void *buf, uint32_t size)
 {
 	return nvmet_vhost_rw(dev, guest_pa, buf, size, 1);
@@ -216,6 +240,289 @@ static int nvmet_vhost_check_cqid(struct nvmet_ctrl *n, u16 cqid)
 	return cqid <= n->subsys->max_qid && n->cqs[cqid] != NULL ? 0 : -1;
 }
 
+static void nvmet_vhost_inc_cq_tail(struct nvmet_vhost_cq *cq)
+{
+	cq->tail++;
+	if (cq->tail >= cq->cq.size) {
+		cq->tail = 0;
+		cq->phase = !cq->phase;
+	}
+}
+
+static void nvmet_vhost_inc_sq_head(struct nvmet_vhost_sq *sq)
+{
+	sq->head = (sq->head + 1) % sq->sq.size;
+}
+
+static uint8_t nvmet_vhost_cq_full(struct nvmet_vhost_cq *cq)
+{
+	return (cq->tail + 1) % cq->cq.size == cq->head;
+}
+
+static uint8_t nvmet_vhost_sq_empty(struct nvmet_vhost_sq *sq)
+{
+	return sq->head == sq->tail;
+}
+
+static void nvmet_vhost_post_cqes(struct nvmet_vhost_cq *cq)
+{
+	struct nvmet_vhost_ctrl *n = cq->ctrl;
+	struct nvmet_vhost_iod *req;
+	struct list_head *p, *tmp;
+	int signal = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cq->lock, flags);
+	list_for_each_safe(p, tmp, &cq->req_list) {
+		struct nvmet_vhost_sq *sq;
+		u64 addr;
+
+		if (nvmet_vhost_cq_full(cq))
+			goto unlock;
+
+		req = list_entry(p, struct nvmet_vhost_iod, entry);
+		list_del(p);
+
+		sq = req->sq;
+		req->rsp.status |= cq->phase;
+		req->rsp.sq_id = cpu_to_le16(sq->sq.qid);
+		req->rsp.sq_head = cpu_to_le16(sq->head);
+		addr = cq->dma_addr + cq->tail * n->cqe_size;
+		nvmet_vhost_inc_cq_tail(cq);
+		spin_unlock_irqrestore(&cq->lock, flags);
+
+		nvmet_vhost_write(&n->dev, addr, (void *)&req->rsp,
+			sizeof(req->rsp));
+
+		mutex_lock(&sq->lock);
+		list_add_tail(p, &sq->req_list);
+		mutex_unlock(&sq->lock);
+
+		signal = 1;
+
+		spin_lock_irqsave(&cq->lock, flags);
+	}
+
+	if (signal)
+		eventfd_signal(cq->eventfd, 1);
+
+unlock:
+	cq->scheduled = 0;
+	spin_unlock_irqrestore(&cq->lock, flags);
+}
+
+static int nvmet_vhost_cq_thread(void *arg)
+{
+	struct nvmet_vhost_cq *sq = arg;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		nvmet_vhost_post_cqes(sq);
+
+		schedule();
+	}
+
+	return 0;
+}
+
+static void nvmet_vhost_enqueue_req_completion(
+		struct nvmet_vhost_cq *cq, struct nvmet_vhost_iod *iod)
+{
+	unsigned long flags;
+
+	BUG_ON(cq->cq.qid != iod->sq->sq.qid);
+	spin_lock_irqsave(&cq->lock, flags);
+	list_add_tail(&iod->entry, &cq->req_list);
+	if (!cq->scheduled) {
+		wake_up_process(cq->thread);
+		cq->scheduled = 1;
+	}
+	spin_unlock_irqrestore(&cq->lock, flags);
+}
+
+static void nvmet_vhost_queue_response(struct nvmet_req *req)
+{
+	struct nvmet_vhost_iod *iod =
+		container_of(req, struct nvmet_vhost_iod, req);
+	struct nvmet_vhost_sq *sq = iod->sq;
+	struct nvmet_vhost_ctrl *n = sq->ctrl;
+	struct nvmet_vhost_cq *cq = n->cqs[sq->sq.qid];
+
+	nvmet_vhost_enqueue_req_completion(cq, iod);
+}
+
+static int nvmet_vhost_sglist_add(struct nvmet_vhost_ctrl *n, struct scatterlist *sg,
+		u64 guest_addr, int len, int is_write)
+{
+	void __user *host_addr;
+	struct page *page;
+	unsigned int offset, nbytes;
+	int ret;
+
+	host_addr = map_guest_to_host(&n->dev, guest_addr, len);
+	if (unlikely(!host_addr)) {
+		pr_warn("cannot map guest addr %p, error %ld\n",
+			(void *)guest_addr, PTR_ERR(host_addr));
+		return PTR_ERR(host_addr);
+	}
+
+	ret = get_user_pages(current, n->dev.mm, (unsigned long)host_addr, 1,
+			is_write, 0, &page, NULL);
+	BUG_ON(ret == 0); /* we should either get our page or fail */
+	if (ret < 0) {
+		pr_warn("get_user_pages faild: host_addr %p, %d\n",
+			host_addr, ret);
+		return ret;
+	}
+
+	offset = (uintptr_t)host_addr & ~PAGE_MASK;
+	nbytes = min_t(unsigned int, PAGE_SIZE - offset, len);
+	sg_set_page(sg, page, nbytes, offset);
+
+	return 0;
+}
+
+static int nvmet_vhost_map_prp(struct nvmet_vhost_ctrl *n, struct scatterlist *sgl,
+	u64 prp1, u64 prp2, unsigned int len)
+{
+	unsigned int trans_len = n->page_size - (prp1 % n->page_size);
+	int num_prps = (len >> n->page_bits) + 1;
+	//FIXME
+	int is_write = 1;
+
+	trans_len = min(len, trans_len);
+	if (!prp1)
+		return -1;
+
+	sg_init_table(sgl, num_prps);
+
+	nvmet_vhost_sglist_add(n, sgl, prp1, trans_len, is_write);
+
+	len -= trans_len;
+	if (len) {
+		if (!prp2)
+			goto error;
+		if (len > n->page_size) {
+			u64 prp_list[n->max_prp_ents];
+			u16 nents, prp_trans;
+			int i = 0;
+
+			nents = (len + n->page_size - 1) >> n->page_bits;
+			prp_trans = min(n->max_prp_ents, nents) * sizeof(u64);
+			nvmet_vhost_read(&n->dev, prp2, (void *)prp_list, prp_trans);
+
+			while (len != 0) {
+				u64 prp_ent = le64_to_cpu(prp_list[i]);
+
+				if (i == n->max_prp_ents - 1 && len > n->page_size) {
+					if (!prp_ent || prp_ent & (n->page_size - 1))
+						goto error;
+					i = 0;
+					nents = (len + n->page_size - 1) >> n->page_bits;
+					prp_trans = min(n->max_prp_ents, nents) * sizeof(u64);
+					nvmet_vhost_read(&n->dev, prp_ent, (void *)prp_list, prp_trans);
+					prp_ent = le64_to_cpu(prp_list[i]);
+				}
+
+				if (!prp_ent || prp_ent & (n->page_size - 1))
+					goto error;
+
+				trans_len = min(len, n->page_size);
+				nvmet_vhost_sglist_add(n, sgl, prp_ent, trans_len, is_write);
+				sgl++;
+				len -= trans_len;
+				i++;
+			}
+		} else {
+			if (prp2 & (n->page_size - 1))
+				goto error;
+			nvmet_vhost_sglist_add(n, sgl, prp2, trans_len, is_write);
+		}
+	}
+
+	return num_prps;
+
+error:
+	return -1;
+}
+
+static void nvmet_vhost_process_sq(struct nvmet_vhost_sq *sq)
+{
+	struct nvmet_vhost_ctrl *n = sq->ctrl;
+	struct nvmet_vhost_cq *cq = n->cqs[sq->sq.qid];
+	struct nvmet_vhost_iod *iod;
+	struct nvme_command *cmd;
+	int ret;
+
+	mutex_lock(&sq->lock);
+
+	while (!(nvmet_vhost_sq_empty(sq) || list_empty(&sq->req_list))) {
+		u64 addr = sq->dma_addr + sq->head * n->sqe_size;;
+
+		nvmet_vhost_inc_sq_head(sq);
+		iod = list_first_entry(&sq->req_list,
+					struct nvmet_vhost_iod, entry);
+		list_del(&iod->entry);
+		mutex_unlock(&sq->lock);
+
+		cmd = &iod->cmd;
+		ret = nvmet_vhost_read(&n->dev, addr,
+				(void *)cmd, sizeof(*cmd));
+		if (ret) {
+			pr_warn("nvmet_vhost_read fail\n");
+			goto out;
+		}
+
+		ret = nvmet_req_init(&iod->req, &cq->cq, &sq->sq,
+					nvmet_vhost_queue_response);
+		if (ret) {
+			pr_warn("nvmet_req_init error: ret 0x%x, qid %d\n", ret, sq->sq.qid);
+			goto out;
+		}
+		if (iod->req.data_len) {
+			ret = nvmet_vhost_map_prp(n, iod->sg, cmd->common.prp1,
+					cmd->common.prp2, iod->req.data_len);
+			if (ret > 0) {
+				iod->req.sg = iod->sg;
+				iod->req.sg_cnt = ret;
+			} else {
+				pr_warn("map prp error\n");
+				goto out;
+			}
+		}
+		iod->req.execute(&iod->req);
+		mutex_lock(&sq->lock);
+        }
+
+unlock:
+	sq->scheduled = 0;
+	mutex_unlock(&sq->lock);
+	return;
+
+out:
+	mutex_lock(&sq->lock);
+	list_add_tail(&iod->entry, &sq->req_list);
+	goto unlock;
+}
+
+static int nvmet_vhost_sq_thread(void *opaque)
+{
+	struct nvmet_vhost_sq *sq = opaque;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		nvmet_vhost_process_sq(sq);
+
+		schedule();
+	}
+
+	return 0;
+}
+
 static int nvmet_vhost_init_cq(struct nvmet_vhost_cq *cq,
 		struct nvmet_vhost_ctrl *n, u64 dma_addr,
 		u16 cqid, u16 size, struct eventfd_ctx *eventfd,
@@ -228,6 +535,12 @@ static int nvmet_vhost_init_cq(struct nvmet_vhost_cq *cq,
 	cq->eventfd = eventfd;
 	n->cqs[cqid] = cq;
 
+	spin_lock_init(&cq->lock);
+	INIT_LIST_HEAD(&cq->req_list);
+	INIT_LIST_HEAD(&cq->sq_list);
+	cq->scheduled = 0;
+	cq->thread = kthread_create(nvmet_vhost_cq_thread, cq, "nvmet_vhost_cq");
+
 	nvmet_cq_init(n->ctrl, &cq->cq, cqid, size);
 
 	return 0;
@@ -237,12 +550,36 @@ static int nvmet_vhost_init_sq(struct nvmet_vhost_sq *sq,
 		struct nvmet_vhost_ctrl *n, u64 dma_addr,
 		u16 sqid, u16 cqid, u16 size)
 {
+	struct nvmet_vhost_cq *cq;
+	struct nvmet_vhost_iod *iod;
+	int i;
+
 	sq->ctrl = n;
 	sq->dma_addr = dma_addr;
 	sq->cqid = cqid;
 	sq->head = sq->tail = 0;
 	n->sqs[sqid] = sq;
 
+	mutex_init(&sq->lock);
+	INIT_LIST_HEAD(&sq->req_list);
+	sq->io_req = kmalloc(sizeof(struct nvmet_vhost_iod) * size, GFP_KERNEL);
+	if (!sq->io_req)
+		return -ENOMEM;
+	for (i = 0; i < size; i++) {
+		iod = &sq->io_req[i];
+
+		iod->req.cmd = &iod->cmd;
+		iod->req.rsp = &iod->rsp;
+		iod->sq = sq;
+		list_add_tail(&iod->entry, &sq->req_list);
+	}
+	sq->scheduled = 0;
+	sq->thread = kthread_create(nvmet_vhost_sq_thread, sq, "nvmet_vhost_sq");
+
+	cq = n->cqs[cqid];
+	list_add_tail(&sq->entry, &cq->sq_list);
+	n->sqs[sqid] = sq;
+
 	nvmet_sq_init(n->ctrl, &sq->sq, sqid, size);
 
 	return 0;
@@ -564,12 +901,84 @@ static int nvmet_bar_write(struct nvmet_vhost_ctrl *n, int offset, u64 val)
 	return status;
 }
 
+static int nvmet_vhost_process_db(struct nvmet_ctrl *ctrl, int offset, u64 val)
+{
+	u16 qid;
+
+	if (offset & ((1 << 2) - 1))
+		return -EINVAL;
+
+	if (((offset - 0x1000) >> 2) & 1) {
+		u16 new_head = val & 0xffff;
+		int start_sqs;
+		struct nvmet_vhost_cq *vcq;
+		struct nvmet_cq *cq;
+		unsigned long flags;
+
+		qid = (offset - (0x1000 + (1 << 2))) >> 3;
+		if (nvmet_vhost_check_cqid(ctrl, qid))
+			return -EINVAL;
+
+		cq = ctrl->cqs[qid];
+		if (new_head >= cq->size)
+			return -EINVAL;
+
+		vcq = cq_to_vcq(cq);
+		spin_lock_irqsave(&vcq->lock, flags);
+		start_sqs = nvmet_vhost_cq_full(vcq) ? 1 : 0;
+		vcq->head = new_head;
+		spin_unlock_irqrestore(&vcq->lock, flags);
+		if (start_sqs) {
+			struct nvmet_vhost_sq *sq;
+			struct list_head *p;
+
+			list_for_each(p, &vcq->sq_list) {
+				sq = list_entry(p, struct nvmet_vhost_sq, entry);
+				if (!sq->scheduled) {
+					sq->scheduled = 1;
+					wake_up_process(sq->thread);
+				}
+			}
+			if (!vcq->scheduled) {
+				vcq->scheduled = 1;
+				wake_up_process(vcq->thread);
+			}
+		}
+
+		if (vcq->tail != vcq->head)
+			eventfd_signal(vcq->eventfd, 1);
+	} else {
+		struct nvmet_vhost_sq *vsq;
+		struct nvmet_sq *sq;
+		u16 new_tail = val & 0xffff;
+
+		qid = (offset - 0x1000) >> 3;
+		if (nvmet_vhost_check_sqid(ctrl, qid))
+			return -EINVAL;
+
+		sq = ctrl->sqs[qid];
+		if (new_tail >= sq->size)
+			return -ENOSPC;
+
+		vsq = sq_to_vsq(sq);
+		mutex_lock(&vsq->lock);
+		vsq->tail = new_tail;
+		if (!vsq->scheduled) {
+			vsq->scheduled = 1;
+			wake_up_process(vsq->thread);
+		}
+		mutex_unlock(&vsq->lock);
+	}
+
+	return 0;
+}
+
 static int nvmet_vhost_bar_write(struct nvmet_vhost_ctrl *n, int offset, u64 val)
 {
 	if (offset < 0x1000)
 		return nvmet_bar_write(n, offset, val);
-
-	return -1;
+	else
+		return nvmet_vhost_process_db(n->ctrl, offset, val);
 }
 
 static int nvmet_vhost_ioc_bar(struct nvmet_vhost_ctrl *n, void __user *argp)
@@ -612,6 +1021,8 @@ static void nvme_free_sq(struct nvmet_vhost_sq *sq,
 		struct nvmet_vhost_ctrl *n)
 {
 	n->sqs[sq->sq.qid] = NULL;
+	kthread_stop(sq->thread);
+	kfree(sq->io_req);
 	if (sq->sq.qid)
 		kfree(sq);
 }
@@ -620,6 +1031,7 @@ static void nvme_free_cq(struct nvmet_vhost_cq *cq,
 		struct nvmet_vhost_ctrl *n)
 {
 	n->cqs[cq->cq.qid] = NULL;
+	kthread_stop(cq->thread);
 	if (cq->cq.qid)
 		kfree(cq);
 }
-- 
1.9.1

  parent reply	other threads:[~2015-11-20  0:21 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-20  0:20 [RFC PATCH 0/9] vhost-nvme: new qemu nvme backend using nvme target Ming Lin
2015-11-20  0:20 ` [Qemu-devel] " Ming Lin
2015-11-20  0:20 ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 1/9] nvme-vhost: add initial commit Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 2/9] nvme-vhost: add basic ioctl handlers Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 3/9] nvme-vhost: add basic nvme bar read/write Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 4/9] nvmet: add a controller "start" hook Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  5:13   ` Christoph Hellwig
2015-11-20  5:13   ` Christoph Hellwig
2015-11-20  5:13     ` [Qemu-devel] " Christoph Hellwig
2015-11-20  5:31     ` Ming Lin
2015-11-20  5:31       ` [Qemu-devel] " Ming Lin
2015-11-20  5:31       ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 5/9] nvme-vhost: add controller "start" callback Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 6/9] nvmet: add a "parse_extra_admin_cmd" hook Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 7/9] nvme-vhost: add "parse_extra_admin_cmd" callback Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  0:21 ` [RFC PATCH 8/9] nvme-vhost: add vhost memory helpers Ming Lin
2015-11-20  0:21   ` [Qemu-devel] " Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  0:21 ` Ming Lin [this message]
2015-11-20  0:21   ` [Qemu-devel] [RFC PATCH 9/9] nvme-vhost: add nvme queue handlers Ming Lin
2015-11-20  0:21   ` Ming Lin
2015-11-20  5:16 ` [RFC PATCH 0/9] vhost-nvme: new qemu nvme backend using nvme target Christoph Hellwig
2015-11-20  5:16 ` Christoph Hellwig
2015-11-20  5:16   ` [Qemu-devel] " Christoph Hellwig
2015-11-20  5:33   ` Ming Lin
2015-11-20  5:33     ` [Qemu-devel] " Ming Lin
2015-11-20  5:33     ` Ming Lin
2015-11-21 13:11 ` Paolo Bonzini
2015-11-21 13:11   ` [Qemu-devel] " Paolo Bonzini
2015-11-23  8:17   ` Ming Lin
2015-11-23  8:17     ` [Qemu-devel] " Ming Lin
2015-11-23  8:17     ` Ming Lin
2015-11-23 14:14     ` Paolo Bonzini
2015-11-23 14:14       ` [Qemu-devel] " Paolo Bonzini
2015-11-23 14:14       ` Paolo Bonzini
2015-11-24  7:27       ` Ming Lin
2015-11-24  7:27         ` [Qemu-devel] " Ming Lin
2015-11-24  7:27         ` Ming Lin
2015-11-24  8:23         ` Ming Lin
2015-11-24  8:23           ` [Qemu-devel] " Ming Lin
2015-11-24  8:23           ` Ming Lin
2015-11-24 10:51         ` Paolo Bonzini
2015-11-24 10:51           ` [Qemu-devel] " Paolo Bonzini
2015-11-24 10:51           ` Paolo Bonzini
2015-11-24 19:25           ` Ming Lin
2015-11-24 19:25             ` [Qemu-devel] " Ming Lin
2015-11-24 19:25             ` Ming Lin
2015-11-25 11:27             ` Paolo Bonzini
2015-11-25 11:27               ` [Qemu-devel] " Paolo Bonzini
2015-11-25 11:27               ` Paolo Bonzini
2015-11-25 18:51               ` Ming Lin
2015-11-25 18:51                 ` [Qemu-devel] " Ming Lin
2015-11-25 18:51                 ` Ming Lin
2015-11-25 19:32                 ` Paolo Bonzini
2015-11-25 19:32                   ` [Qemu-devel] " Paolo Bonzini
2015-11-25 19:32                   ` Paolo Bonzini
2015-11-30 23:20       ` Ming Lin
2015-11-30 23:20         ` [Qemu-devel] " Ming Lin
2015-11-30 23:20         ` Ming Lin
2015-12-01 16:02         ` Paolo Bonzini
2015-12-01 16:02           ` [Qemu-devel] " Paolo Bonzini
2015-12-01 16:02           ` Paolo Bonzini
2015-12-01 16:26           ` Ming Lin
2015-12-01 16:26             ` [Qemu-devel] " Ming Lin
2015-12-01 16:26             ` Ming Lin
2015-12-01 16:59             ` Paolo Bonzini
2015-12-01 16:59               ` [Qemu-devel] " Paolo Bonzini
2015-12-01 16:59               ` Paolo Bonzini
2015-12-02  5:13               ` Ming Lin
2015-12-02  5:13                 ` [Qemu-devel] " Ming Lin
2015-12-02  5:13                 ` Ming Lin
2015-12-02 10:07                 ` Paolo Bonzini
2015-12-02 10:07                   ` [Qemu-devel] " Paolo Bonzini
2015-12-02 10:07                   ` Paolo Bonzini
2015-11-21 13:11 ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1447978868-17138-10-git-send-email-mlin@kernel.org \
    --to=mlin@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.