linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] NVMe: Async event request
@ 2013-06-04 23:55 Keith Busch
  2013-06-05 20:01 ` Matthew Wilcox
  0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2013-06-04 23:55 UTC (permalink / raw)


Just taking a stab at the async event request... This follows "option 2"
of the original proposal, so if multiple openers are listening for events,
they may step on each other's toes where some reader may see an event and
another reader will miss it.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   62 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nvme.h      |    4 +++
 2 files changed, 65 insertions(+), 1 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 42abf72..04ca408 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -164,6 +164,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
 #define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
+#define CMD_CTX_ASYNC		(0x31C + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_dev *dev, void *ctx,
 						struct nvme_completion *cqe)
@@ -234,6 +235,19 @@ void put_nvmeq(struct nvme_queue *nvmeq)
 	put_cpu();
 }
 
+static void nvme_async_completion(struct nvme_dev *dev, void *ctx,
+						struct nvme_completion *cqe)
+{
+	u32 result = le32_to_cpup(&cqe->result);
+	u16 status = le16_to_cpup(&cqe->status) >> 1;
+
+	if (status == NVME_SC_SUCCESS) {
+		kfifo_in(&dev->aer_kfifo, &result, sizeof(result));
+		wake_up(&dev->aer_empty);
+		++dev->aerl;
+	}
+}
+
 /**
  * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
  * @nvmeq: The queue to use
@@ -976,7 +990,8 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
 			.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
 		};
 
-		if (timeout && !time_after(now, info[cmdid].timeout))
+		if (timeout && (!time_after(now, info[cmdid].timeout) ||
+				info[cmdid].ctx == CMD_CTX_ASYNC))
 			continue;
 		if (info[cmdid].ctx == CMD_CTX_CANCELLED)
 			continue;
@@ -1473,6 +1488,22 @@ static const struct block_device_operations nvme_fops = {
 	.compat_ioctl	= nvme_ioctl,
 };
 
+static void nvme_submit_async_req(struct nvme_dev *dev)
+{
+	int cmdid;
+	struct nvme_command c;
+	struct nvme_queue *nvmeq = dev->queues[0];
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_async_event;
+	cmdid = alloc_cmdid(nvmeq, CMD_CTX_ASYNC, nvme_async_completion, 0);
+	if (cmdid < 0)
+		return;
+
+	c.common.command_id = cmdid;
+	nvme_submit_cmd(dev->queues[0], &c);
+}
+
 static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
 {
 	while (bio_list_peek(&nvmeq->sq_cong)) {
@@ -1512,6 +1543,8 @@ static int nvme_kthread(void *data)
 				nvme_resubmit_bios(nvmeq);
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
+			for (; dev->aerl > 0; dev->aerl--)
+				nvme_submit_async_req(dev);
 		}
 		spin_unlock(&dev_list_lock);
 		schedule_timeout(round_jiffies_relative(HZ));
@@ -1740,8 +1773,15 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	}
 
 	ctrl = mem;
+	init_waitqueue_head(&dev->aer_empty);
+	res = kfifo_alloc(&dev->aer_kfifo, (ctrl->aerl + 1) * sizeof(u32),
+								GFP_KERNEL);
+	if (res)
+		goto out;
+
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
+	dev->aerl = ctrl->aerl + 1;
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -1853,6 +1893,7 @@ static void nvme_release_instance(struct nvme_dev *dev)
 static void nvme_free_dev(struct kref *kref)
 {
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
+	kfifo_free(&dev->aer_kfifo);
 	nvme_dev_remove(dev);
 	pci_disable_msix(dev->pci_dev);
 	iounmap(dev->bar);
@@ -1892,10 +1933,29 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 	}
 }
 
+ssize_t nvme_dev_read(struct file *f, char __user *buf, size_t count,
+								loff_t *off)
+{
+	int ret;
+	unsigned int copied;
+	struct nvme_dev *dev = f->private_data;
+
+	if (count < sizeof(u32))
+		return -EINVAL;
+	if (f->f_flags & O_NONBLOCK)
+		return -EINVAL;
+	if (wait_event_killable(dev->aer_empty,
+					!kfifo_is_empty(&dev->aer_kfifo)))
+		return -EINTR;
+	ret = kfifo_to_user(&dev->aer_kfifo, buf, sizeof(u32), &copied);
+	return ret ? ret : copied;
+}
+
 static const struct file_operations nvme_dev_fops = {
 	.owner		= THIS_MODULE,
 	.open		= nvme_dev_open,
 	.release	= nvme_dev_release,
+	.read		= nvme_dev_read,
 	.unlocked_ioctl	= nvme_dev_ioctl,
 	.compat_ioctl	= nvme_dev_ioctl,
 };
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index f451c8d..a61e594 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -512,6 +512,7 @@ struct nvme_admin_cmd {
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/kref.h>
+#include <linux/kfifo.h>
 
 #define NVME_IO_TIMEOUT	(5 * HZ)
 
@@ -534,6 +535,8 @@ struct nvme_dev {
 	struct list_head namespaces;
 	struct kref kref;
 	struct miscdevice miscdev;
+	struct kfifo aer_kfifo;
+	wait_queue_head_t aer_empty;
 	char name[12];
 	char serial[20];
 	char model[40];
@@ -541,6 +544,7 @@ struct nvme_dev {
 	u32 max_hw_sectors;
 	u32 stripe_size;
 	u16 oncs;
+	u8 aerl;
 };
 
 /*
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] NVMe: Async event request
  2013-06-04 23:55 [PATCH] NVMe: Async event request Keith Busch
@ 2013-06-05 20:01 ` Matthew Wilcox
  0 siblings, 0 replies; 4+ messages in thread
From: Matthew Wilcox @ 2013-06-05 20:01 UTC (permalink / raw)


On Tue, Jun 04, 2013@05:55:51PM -0600, Keith Busch wrote:
> Just taking a stab at the async event request... This follows "option 2"
> of the original proposal, so if multiple openers are listening for events,
> they may step on each other's toes where some reader may see an event and
> another reader will miss it.

Nice start ...

> @@ -234,6 +235,19 @@ void put_nvmeq(struct nvme_queue *nvmeq)
>  	put_cpu();
>  }
>  
> +static void nvme_async_completion(struct nvme_dev *dev, void *ctx,
> +						struct nvme_completion *cqe)
> +{
> +	u32 result = le32_to_cpup(&cqe->result);
> +	u16 status = le16_to_cpup(&cqe->status) >> 1;
> +
> +	if (status == NVME_SC_SUCCESS) {
> +		kfifo_in(&dev->aer_kfifo, &result, sizeof(result));
> +		wake_up(&dev->aer_empty);
> +		++dev->aerl;
> +	}
> +}

We don't check here whether the kfifo is full.  So if nobody's reading,
we'll drop later entries on the floor.  My sense is that this is a bad
idea; we should probably drop earlier entries rather than newer entries.
No idea if kfifo lets you do this or not.

>  /**
>   * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
>   * @nvmeq: The queue to use
> @@ -976,7 +990,8 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
>  			.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
>  		};
>  
> -		if (timeout && !time_after(now, info[cmdid].timeout))
> +		if (timeout && (!time_after(now, info[cmdid].timeout) ||
> +				info[cmdid].ctx == CMD_CTX_ASYNC))
>  			continue;
>  		if (info[cmdid].ctx == CMD_CTX_CANCELLED)
>  			continue;

My only concern here is that on shutting down a controller in an orderly
fashion, we'll print 'Cancelling I/O ' to the syslog N times.

> @@ -1473,6 +1488,22 @@ static const struct block_device_operations nvme_fops = {
>  	.compat_ioctl	= nvme_ioctl,
>  };
>  
> +static void nvme_submit_async_req(struct nvme_dev *dev)
> +{
> +	int cmdid;
> +	struct nvme_command c;
> +	struct nvme_queue *nvmeq = dev->queues[0];
> +
> +	memset(&c, 0, sizeof(c));
> +	c.common.opcode = nvme_admin_async_event;
> +	cmdid = alloc_cmdid(nvmeq, CMD_CTX_ASYNC, nvme_async_completion, 0);
> +	if (cmdid < 0)
> +		return;
> +
> +	c.common.command_id = cmdid;
> +	nvme_submit_cmd(dev->queues[0], &c);
> +}

Personally, I'd rearrange like so:

> +	cmdid = alloc_cmdid(nvmeq, CMD_CTX_ASYNC, nvme_async_completion, 0);
> +	if (cmdid < 0)
> +		return;
> +
> +	memset(&c, 0, sizeof(c));
> +	c.common.opcode = nvme_admin_async_event;
> +	c.common.command_id = cmdid;
> +	nvme_submit_cmd(dev->queues[0], &c);

It's a little nicer to see everything in the command initialised together.

>  	dev->oncs = le16_to_cpup(&ctrl->oncs);
> +	dev->aerl = ctrl->aerl + 1;

If the device specifies '255' in the data structure, then we're going
to submit 0 requests instead of 256.  We should probably just make
'aerl' 16-bit.

> @@ -1892,10 +1933,29 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
>  	}
>  }
>  
> +ssize_t nvme_dev_read(struct file *f, char __user *buf, size_t count,
> +								loff_t *off)
> +{
> +	int ret;
> +	unsigned int copied;
> +	struct nvme_dev *dev = f->private_data;
> +
> +	if (count < sizeof(u32))
> +		return -EINVAL;
> +	if (f->f_flags & O_NONBLOCK)
> +		return -EINVAL;
> +	if (wait_event_killable(dev->aer_empty,
> +					!kfifo_is_empty(&dev->aer_kfifo)))
> +		return -EINTR;
> +	ret = kfifo_to_user(&dev->aer_kfifo, buf, sizeof(u32), &copied);
> +	return ret ? ret : copied;
> +}

Now, we're only returning an array of 'result' here.  I think it'd be
better to return a slightly larger record, in case we end up specifying
Dword 1 for something else, or we want to report back information through
'read' that didn't actually come from the device.  I suggested 16 bytes
per entry in my original proposal, formatted in a way that closely
mirrors the CQE.

>  static const struct file_operations nvme_dev_fops = {
>  	.owner		= THIS_MODULE,
>  	.open		= nvme_dev_open,
>  	.release	= nvme_dev_release,
> +	.read		= nvme_dev_read,
>  	.unlocked_ioctl	= nvme_dev_ioctl,
>  	.compat_ioctl	= nvme_dev_ioctl,
>  };

Please also add a 'poll' entry so an app can use poll()/select() to wait
for new records to become available.

> @@ -534,6 +535,8 @@ struct nvme_dev {
>  	struct list_head namespaces;
>  	struct kref kref;
>  	struct miscdevice miscdev;
> +	struct kfifo aer_kfifo;
> +	wait_queue_head_t aer_empty;
>  	char name[12];
>  	char serial[20];
>  	char model[40];

We need to be careful calling things "aer".  That already means Advanced
Error Reporting in PCIe.  Better to call them event_fifo and event_empty,
I think.

> @@ -541,6 +544,7 @@ struct nvme_dev {
>  	u32 max_hw_sectors;
>  	u32 stripe_size;
>  	u16 oncs;
> +	u8 aerl;
>  };
>  
>  /*
> -- 
> 1.7.0.4
> 
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://merlin.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] NVMe: Async event request
@ 2014-06-17 22:53 Keith Busch
  2014-06-18  3:45 ` Matthew Wilcox
  0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2014-06-17 22:53 UTC (permalink / raw)


Submits NVMe asynchronous event requests, one event up to the controller
maximum or number of possible different event types (8), whichever is
smaller. Events successfully returned by the controller are logged.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
Taking a simpler approach with this feature than previous attempts. This
just sets up the driver to send asynchronous event requests and logs the
result as the controller completes them. If a better way of presenting
these is decided on, that can easily be changed in a follow on patch.

 drivers/block/nvme-core.c |   42
 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/nvme.h      |    1 +
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 02351e2..515b37f 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -207,6 +207,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
 #define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
+#define CMD_CTX_ASYNC		(0x31C + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
@@ -229,6 +230,18 @@ static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 				cqe->command_id, le16_to_cpup(&cqe->sq_id));
 		return;
 	}
+	if (ctx == CMD_CTX_ASYNC) {
+		u32 result = le32_to_cpup(&cqe->result);
+		u16 status = le16_to_cpup(&cqe->status) >> 1;
+
+		if (status != NVME_SC_SUCCESS)
+			return;
+		
+		dev_warn(nvmeq->q_dmadev,
+				"async event result %08x\n", result);
+		++nvmeq->dev->event_limit;
+		return;
+	}
 
 	dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
 }
@@ -1159,7 +1172,8 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
 
 		if (timeout && !time_after(now, info[cmdid].timeout))
 			continue;
-		if (info[cmdid].ctx == CMD_CTX_CANCELLED)
+		if (info[cmdid].ctx == CMD_CTX_CANCELLED ||
+					info[cmdid].ctx == CMD_CTX_ASYNC)
 			continue;
 		if (timeout && nvmeq->dev->initialized) {
 			nvme_abort_cmd(cmdid, nvmeq);
@@ -1823,6 +1837,25 @@ static void nvme_resubmit_iods(struct nvme_queue *nvmeq)
 	}
 }
 
+static void nvme_submit_async_req(struct nvme_queue *nvmeq)
+{
+	struct nvme_command *c;
+	int cmdid;
+
+	cmdid = alloc_cmdid(nvmeq, CMD_CTX_ASYNC, special_completion, 0);
+	if (cmdid < 0)
+		return;
+
+	c = &nvmeq->sq_cmds[nvmeq->sq_tail];
+	memset(c, 0, sizeof(*c));
+	c->common.opcode = nvme_admin_async_event;
+	c->common.command_id = cmdid;
+
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+	writel(nvmeq->sq_tail, nvmeq->q_db);
+}
+
 static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
 {
 	while (bio_list_peek(&nvmeq->sq_cong)) {
@@ -1876,6 +1909,12 @@ static int nvme_kthread(void *data)
 				nvme_cancel_ios(nvmeq, true);
 				nvme_resubmit_bios(nvmeq);
 				nvme_resubmit_iods(nvmeq);
+
+				if (!i) {
+					for (; dev->event_limit > 0;
+							dev->event_limit--)
+						nvme_submit_async_req(nvmeq);
+				}
  unlock:
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
@@ -2244,6 +2283,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
 	dev->vwc = ctrl->vwc;
+	dev->event_limit = min(ctrl->aerl + 1, 8);
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2bf4031..974efd0 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -99,6 +99,7 @@ struct nvme_dev {
 	u32 stripe_size;
 	u16 oncs;
 	u16 abort_limit;
+	u8 event_limit;
 	u8 vwc;
 	u8 initialized;
 };
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] NVMe: Async event request
  2014-06-17 22:53 Keith Busch
@ 2014-06-18  3:45 ` Matthew Wilcox
  0 siblings, 0 replies; 4+ messages in thread
From: Matthew Wilcox @ 2014-06-18  3:45 UTC (permalink / raw)


On Tue, Jun 17, 2014@04:53:58PM -0600, Keith Busch wrote:
> +static void nvme_submit_async_req(struct nvme_queue *nvmeq)
> +{
> +	struct nvme_command *c;
> +	int cmdid;
> +
> +	cmdid = alloc_cmdid(nvmeq, CMD_CTX_ASYNC, special_completion, 0);
> +	if (cmdid < 0)
> +		return;

Return -EBUSY here ...

> +	c = &nvmeq->sq_cmds[nvmeq->sq_tail];
> +	memset(c, 0, sizeof(*c));
> +	c->common.opcode = nvme_admin_async_event;
> +	c->common.command_id = cmdid;
> +
> +	if (++nvmeq->sq_tail == nvmeq->q_depth)
> +		nvmeq->sq_tail = 0;
> +	writel(nvmeq->sq_tail, nvmeq->q_db);
> +}

... and 0 here ...

>  static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
>  {
>  	while (bio_list_peek(&nvmeq->sq_cong)) {
> @@ -1876,6 +1909,12 @@ static int nvme_kthread(void *data)
>  				nvme_cancel_ios(nvmeq, true);
>  				nvme_resubmit_bios(nvmeq);
>  				nvme_resubmit_iods(nvmeq);
> +
> +				if (!i) {
> +					for (; dev->event_limit > 0;
> +							dev->event_limit--)
> +						nvme_submit_async_req(nvmeq);
> +				}

... and make this:

				while ((i == 0) && (dev->event_limit > 0)) {
					if (nvme_submit_async_req(nvmeq))
						break;
					dev->event_limit--;
				}

What do you think?

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-06-18  3:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-04 23:55 [PATCH] NVMe: Async event request Keith Busch
2013-06-05 20:01 ` Matthew Wilcox
  -- strict thread matches above, loose matches on Subject: below --
2014-06-17 22:53 Keith Busch
2014-06-18  3:45 ` Matthew Wilcox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).