[PATCH 5/7] NVMe: Per-cpu IO queues

All of lore.kernel.org
 help / color / mirror / Atom feed

From: willy@linux.intel.com (Matthew Wilcox)
Subject: [PATCH 5/7] NVMe: Per-cpu IO queues
Date: Fri, 31 Jan 2014 12:47:57 -0500	[thread overview]
Message-ID: <20140131174757.GB2285@linux.intel.com> (raw)
In-Reply-To: <1390607454-23273-6-git-send-email-keith.busch@intel.com>

On Fri, Jan 24, 2014@04:50:52PM -0700, Keith Busch wrote:
> NVMe IO queues are associated with CPUs, and linux provices a handy
> per-cpu implementation. This gives us a convienient way to optimally
> assign queues to multiple cpus when the device supports fewer queues
> than the host has cpus. The previous implementation did not share these
> optimally and may have shared very poorly in some situations. This new
> way will share queues among cpus that are "close" together and should
> have the lowest penalty for lock contention.

I got to thinking about this one after sparse flagged a couple of
problems.  Why not do it this way?

Advantage that it only requires one percpu allocation.

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e302f55..0afa8ee 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -268,16 +268,15 @@ static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid,
 
 struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
 {
-	struct nvme_queue __rcu **nvmeqp;
+	unsigned short i = get_cpu_var(*dev->io_queue);
 	rcu_read_lock();
-	nvmeqp = get_cpu_ptr(dev->io_queues);
-	return rcu_dereference(*nvmeqp);
+	return rcu_dereference(dev->queues[i]);
 }
 
 void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
 {
-	put_cpu_ptr(nvmeq->dev->io_queues);
 	rcu_read_unlock();
+	put_cpu_var(nvmeq->dev->io_queue);
 }
 
 /**
@@ -1171,9 +1170,6 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 		struct nvme_queue *nvmeq = dev->queues[i];
 
 		for_each_cpu(cpu, &nvmeq->cpu_mask) {
-			rcu_assign_pointer(
-				*per_cpu_ptr(dev->io_queues, cpu),
-				NULL);
 			cpumask_clear_cpu(cpu, &nvmeq->cpu_mask);
 		}
 		rcu_assign_pointer(dev->queues[i], NULL);
@@ -1923,9 +1919,7 @@ static void nvme_set_queue_cpus(cpumask_t *qmask, struct nvme_queue *nvmeq,
 		if (cpus_weight(nvmeq->cpu_mask) >= count)
 			break;
 		if (!cpumask_test_and_set_cpu(cpu, &nvmeq->cpu_mask))
-			rcu_assign_pointer(
-				*per_cpu_ptr(nvmeq->dev->io_queues, cpu),
-				nvmeq);
+			*per_cpu_ptr(nvmeq->dev->io_queue, cpu) = nvmeq->qid;
 	}
 }
 
@@ -2040,8 +2034,7 @@ static void nvme_assign_io_queues(struct nvme_dev *dev)
 	cpumask_andnot(&unassigned_cpus, cpu_possible_mask, cpu_online_mask);
 	i = 0;
 	for_each_cpu(cpu, &unassigned_cpus)
-		rcu_assign_pointer(*per_cpu_ptr(dev->io_queues, cpu),
-					dev->queues[(i++ % queues) + 1]);
+		*per_cpu_ptr(dev->io_queue, cpu) = (i++ % queues) + 1;
 }
 
 static int set_queue_count(struct nvme_dev *dev, int count)
@@ -2532,7 +2525,7 @@ static void nvme_free_dev(struct kref *kref)
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
 
 	nvme_free_namespaces(dev);
-	free_percpu(dev->io_queues);
+	free_percpu(dev->io_queue);
 	kfree(dev->affinity_masks);
 	kfree(dev->queues);
 	kfree(dev->entry);
@@ -2683,8 +2676,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 				sizeof(*dev->affinity_masks), GFP_KERNEL);
 	if (!dev->queues)
 		goto free;
-	dev->io_queues = alloc_percpu(struct nvme_queue *);
-	if (!dev->io_queues)
+	dev->io_queue = alloc_percpu(unsigned short);
+	if (!dev->io_queue)
 		goto free;
 
 	INIT_LIST_HEAD(&dev->namespaces);
@@ -2734,7 +2727,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  release:
 	nvme_release_instance(dev);
  free:
-	free_percpu(dev->io_queues);
+	free_percpu(dev->io_queue);
 	kfree(dev->affinity_masks);
 	kfree(dev->queues);
 	kfree(dev->entry);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 89966c0..0c051e5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -74,7 +74,7 @@ enum {
 struct nvme_dev {
 	struct list_head node;
 	struct nvme_queue __rcu **queues;
-	struct nvme_queue __rcu * __percpu *io_queues;
+	unsigned short __percpu *io_queue;
 	u32 __iomem *dbs;
 	struct pci_dev *pci_dev;
 	struct dma_pool *prp_page_pool;

next prev parent reply	other threads:[~2014-01-31 17:47 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-24 23:50 [PATCH 0/7] Fixes and improvements Keith Busch
2014-01-24 23:50 ` [PATCH 1/7] NVMe: Namespace use after free on surprise removal Keith Busch
2014-01-24 23:50 ` [PATCH 2/7] NVMe: RCU access to nvme_queue Keith Busch
2014-01-24 23:50 ` [PATCH 3/7] NVMe: Initialization clean-up Keith Busch
2014-01-24 23:50 ` [PATCH 4/7] NVMe: Clean-up character device bring-up Keith Busch
2014-01-24 23:50 ` [PATCH 5/7] NVMe: Per-cpu IO queues Keith Busch
2014-01-31 17:47   ` Matthew Wilcox [this message]
2014-01-24 23:50 ` [PATCH 6/7] NVMe: CPU hot plug notification Keith Busch
2014-01-24 23:50 ` [PATCH 7/7] NVMe: Share interrupt vectors among IO queues Keith Busch

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e302f55 dfblob:0afa8ee dfblob:89966c0 dfblob:0c051e5 )
 OR (
bs:"[PATCH 5/7] NVMe: Per-cpu IO queues" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140131174757.GB2285@linux.intel.com \
    --to=willy@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.