linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: akpm@linux-foundation.org, avi@redhat.com, nate@cpanel.net,
	cl@linux-foundation.org, oleg@redhat.com, axboe@kernel.dk,
	vgoyal@redhat.com
Cc: linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 6/7] mempool, percpu: implement percpu mempool
Date: Thu, 22 Dec 2011 13:45:25 -0800	[thread overview]
Message-ID: <1324590326-10135-7-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1324590326-10135-1-git-send-email-tj@kernel.org>

This patch implements mempool for percpu memory areas.  Percpu mempool
is mostly identical to regular mempool and shares most of code but has
some peculiarities.

Percpu memory allocator requires %GFP_KERNEL during allocation, which
comes from its on-demand nature and vmalloc area usage.  In most
cases, it's not a good idea to allocate percpu memory from more
constricted context and this doesn't cause a problem; however, there
are rare cases where opportunistic allocation from NOIO path makes
sense.

To ease such use cases, percpu mempool comes with refill mechanism
which can behave both synchronously and asynchronously depending on
the specified gfp mask.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Vivek Goyal <vgoyal@redhat.com>
---
 include/linux/mempool.h |   80 ++++++++++++++++++++++++++++++++++
 mm/mempool.c            |  111 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 191 insertions(+), 0 deletions(-)

diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 7c08052..129acbe 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -5,6 +5,7 @@
 #define _LINUX_MEMPOOL_H
 
 #include <linux/wait.h>
+#include <linux/workqueue.h>
 
 struct kmem_cache;
 
@@ -70,4 +71,83 @@ static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
 			      (void *)(long)order);
 }
 
+/*
+ * Percpu mempool - mempool backed by percpu memory allocator.
+ *
+ * Along with the usual mempool role, because percpu allocator doesn't
+ * support NOIO allocations, percpu mempool is useful as allocation buffer
+ * which is filled from IO context and consumed from atomic or non-IO one.
+ * To help this usage, percpu_mempool has built-in mechanism to refill the
+ * pool which supports both sync and async operations.  Refer to
+ * percpu_mempool_refill() for details.
+ */
+struct percpu_mempool {
+	mempool_t		pool;
+	size_t			size;		/* size of elements */
+	size_t			align;		/* align of elements */
+	struct work_struct	refill_work;	/* work item for async refill */
+};
+
+struct percpu_mempool *percpu_mempool_create(int min_nr, size_t size,
+					     size_t align);
+int percpu_mempool_refill(struct percpu_mempool *pcpu_pool, gfp_t gfp_mask);
+void percpu_mempool_destroy(struct percpu_mempool *pcpu_pool);
+
+/**
+ * percpu_mempool_resize - resize an existing percpu mempool
+ * @pcpu_pool:	percpu mempool to resize
+ * @new_min_nr:	new minimum number of elements guaranteed to be allocated
+ * @gfp_mask:	allocation mask to use
+ *
+ * Counterpart of mempool_resize().  If @gfp_mask doesn't contain
+ * %__GFP_IO, resizing itself may succeed but the implied filling (if
+ * necessary) will fail.
+ */
+static inline int percpu_mempool_resize(struct percpu_mempool *pcpu_pool,
+					int new_min_nr, gfp_t gfp_mask)
+{
+	return mempool_resize(&pcpu_pool->pool, new_min_nr, gfp_mask);
+}
+
+/**
+ * percpu_mempool_alloc - allocate an element from a percpu mempool
+ * @pcpu_pool:	percpu mempool to allocate from
+ * @gfp_mask:	allocation mask to use
+ *
+ * Counterpart of mempool_alloc().  If @gfp_mask doesn't contain %__GFP_IO,
+ * allocation is always from the reserved pool.
+ */
+static inline void __percpu *
+percpu_mempool_alloc(struct percpu_mempool *pcpu_pool, gfp_t gfp_mask)
+{
+	void *p = mempool_alloc(&pcpu_pool->pool, gfp_mask);
+
+	return (void __percpu __force *)p;
+}
+
+/**
+ * percpu_mempool_free - free an element to a percpu mempool
+ * @elem:	element being freed
+ * @pcpu_pool:	percpu mempool to free to
+ */
+static inline void percpu_mempool_free(void __percpu *elem,
+				       struct percpu_mempool *pcpu_pool)
+{
+	void *p = (void __kernel __force *)elem;
+
+	mempool_free(p, &pcpu_pool->pool);
+}
+
+/**
+ * percpu_mempool_nr_elems - return nr of reserved elems in a percpu mempool
+ * @pcpu_pool:	percpu mempool of interest
+ *
+ * Returns the number of reserved elements in @pcpu_pool.  Mostly useful
+ * for deciding when to refill.
+ */
+static inline int percpu_mempool_nr_elems(struct percpu_mempool *pcpu_pool)
+{
+	return pcpu_pool->pool.curr_nr;
+}
+
 #endif /* _LINUX_MEMPOOL_H */
diff --git a/mm/mempool.c b/mm/mempool.c
index 85e2c28..f25f731 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -14,6 +14,7 @@
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include <linux/percpu.h>
 
 static void add_element(mempool_t *pool, void *element)
 {
@@ -398,3 +399,113 @@ void mempool_free_pages(void *element, void *pool_data)
 	__free_pages(element, order);
 }
 EXPORT_SYMBOL(mempool_free_pages);
+
+/*
+ * Mempool for percpu memory.
+ */
+static void *percpu_mempool_alloc_fn(gfp_t gfp_mask, void *data)
+{
+	struct percpu_mempool *pcpu_pool = data;
+	void __percpu *p;
+
+	/*
+	 * Percpu allocator doesn't do NOIO.  This makes percpu mempool
+	 * always try reserved elements first, which isn't such a bad idea
+	 * given that percpu allocator is pretty heavy and percpu areas are
+	 * expensive.
+	 */
+	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+		return NULL;
+
+	p = __alloc_percpu(pcpu_pool->size, pcpu_pool->align);
+	return (void __kernel __force *)p;
+}
+
+static void percpu_mempool_free_fn(void *elem, void *data)
+{
+	void __percpu *p = (void __percpu __force *)elem;
+
+	free_percpu(p);
+}
+
+static void percpu_mempool_refill_workfn(struct work_struct *work)
+{
+	struct percpu_mempool *pcpu_pool =
+		container_of(work, struct percpu_mempool, refill_work);
+
+	percpu_mempool_refill(pcpu_pool, GFP_KERNEL);
+}
+
+/**
+ * percpu_mempool_create - create mempool for percpu memory
+ * @min_nr:	the minimum number of elements guaranteed to be
+ *		allocated for this pool.
+ * @size:	size of percpu memory areas in this pool
+ * @align:	alignment of percpu memory areas in this pool
+ *
+ * This is counterpart of mempool_create() for percpu memory areas.
+ * Allocations from the pool will return @size bytes percpu memory areas
+ * aligned at @align bytes.
+ */
+struct percpu_mempool *percpu_mempool_create(int min_nr, size_t size,
+					     size_t align)
+{
+	struct percpu_mempool *pcpu_pool;
+	mempool_t *pool;
+
+	BUILD_BUG_ON(offsetof(struct percpu_mempool, pool));
+
+	pool = __mempool_create(min_nr, percpu_mempool_alloc_fn,
+				percpu_mempool_free_fn, NULL, NUMA_NO_NODE,
+				sizeof(*pcpu_pool));
+	if (!pool)
+		return NULL;
+
+	/* fill in pcpu_pool part and set pool_data to self */
+	pcpu_pool = container_of(pool, struct percpu_mempool, pool);
+	pcpu_pool->size = size;
+	pcpu_pool->align = align;
+	INIT_WORK(&pcpu_pool->refill_work, percpu_mempool_refill_workfn);
+	pcpu_pool->pool.pool_data = pcpu_pool;
+
+	/* Pre-allocate the guaranteed number of buffers */
+	if (mempool_fill(&pcpu_pool->pool, GFP_KERNEL)) {
+		mempool_destroy(&pcpu_pool->pool);
+		return NULL;
+	}
+
+	return pcpu_pool;
+}
+EXPORT_SYMBOL_GPL(percpu_mempool_create);
+
+/**
+ * percpu_mempool_refill - refill a percpu mempool
+ * @pcpu_pool:	percpu mempool to refill
+ * @gfp_mask:	allocation mask to use
+ *
+ * Refill @pcpu_pool upto the configured min_nr using @gfp_mask.
+ *
+ * Percpu memory allocation depends on %GFP_KERNEL.  If @gfp_mask doesn't
+ * contain it, this function will schedule a work item to refill the pool
+ * and return -%EAGAIN indicating refilling is in progress.
+ */
+int percpu_mempool_refill(struct percpu_mempool *pcpu_pool, gfp_t gfp_mask)
+{
+	if ((gfp_mask & GFP_KERNEL) == GFP_KERNEL)
+		return mempool_fill(&pcpu_pool->pool, gfp_mask);
+
+	schedule_work(&pcpu_pool->refill_work);
+	return -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(percpu_mempool_refill);
+
+/**
+ * percpu_mempool_destroy - destroy a percpu mempool
+ * @pcpu_pool:	percpu mempool to destroy
+ */
+void percpu_mempool_destroy(struct percpu_mempool *pcpu_pool)
+{
+	cancel_work_sync(&pcpu_pool->refill_work);
+	mempool_destroy(&pcpu_pool->pool);
+}
+EXPORT_SYMBOL_GPL(percpu_mempool_destroy);
-- 
1.7.3.1


  parent reply	other threads:[~2011-12-22 21:45 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-12-22 21:45 [PATCHSET] block, mempool, percpu: implement percpu mempool and fix blkcg percpu alloc deadlock Tejun Heo
2011-12-22 21:45 ` [PATCH 1/7] mempool: fix and document synchronization and memory barrier usage Tejun Heo
2011-12-22 21:45 ` [PATCH 2/7] mempool: drop unnecessary and incorrect BUG_ON() from mempool_destroy() Tejun Heo
2011-12-22 21:45 ` [PATCH 3/7] mempool: fix first round failure behavior Tejun Heo
2011-12-22 21:45 ` [PATCH 4/7] mempool: factor out mempool_fill() Tejun Heo
2011-12-22 21:45 ` [PATCH 5/7] mempool: separate out __mempool_create() Tejun Heo
2011-12-22 21:45 ` Tejun Heo [this message]
2011-12-22 21:45 ` [PATCH 7/7] block: fix deadlock through percpu allocation in blk-cgroup Tejun Heo
2011-12-23  1:00   ` Vivek Goyal
2011-12-23 22:54     ` Tejun Heo
2011-12-22 21:59 ` [PATCHSET] block, mempool, percpu: implement percpu mempool and fix blkcg percpu alloc deadlock Andrew Morton
2011-12-22 22:09   ` Tejun Heo
2011-12-22 22:20     ` Andrew Morton
2011-12-22 22:41       ` Tejun Heo
2011-12-22 22:54         ` Andrew Morton
2011-12-22 23:00           ` Tejun Heo
2011-12-22 23:16             ` Andrew Morton
2011-12-22 23:24               ` Tejun Heo
2011-12-22 23:41                 ` Andrew Morton
2011-12-22 23:54                   ` Tejun Heo
2011-12-23  1:14                     ` Andrew Morton
2011-12-23 15:17                       ` Vivek Goyal
2011-12-27 18:34                       ` Tejun Heo
2011-12-27 21:20                         ` Andrew Morton
2011-12-27 21:44                           ` Tejun Heo
2011-12-27 21:58                             ` Andrew Morton
2011-12-27 22:22                               ` Tejun Heo
2011-12-23  1:21                   ` Vivek Goyal
2011-12-23  1:38                     ` Andrew Morton
2011-12-23  2:54                       ` Vivek Goyal
2011-12-23  3:11                         ` Andrew Morton
2011-12-23 14:58                           ` Vivek Goyal
2011-12-27 21:25                             ` Andrew Morton
2011-12-27 22:07                               ` Tejun Heo
2011-12-27 22:21                                 ` Andrew Morton
2011-12-27 22:30                                   ` Tejun Heo
2012-01-16 15:26                                     ` Vivek Goyal
2011-12-23  1:40       ` Vivek Goyal
2011-12-23  1:58         ` Andrew Morton
2011-12-23  2:56           ` Vivek Goyal
2011-12-26  6:05             ` KAMEZAWA Hiroyuki
2011-12-27 17:52               ` Tejun Heo
2011-12-28  0:14                 ` KAMEZAWA Hiroyuki
2011-12-28  0:41                   ` Tejun Heo
2012-01-05  1:28                     ` Tejun Heo
2012-01-16 15:28                       ` Vivek Goyal
2012-02-09 23:58                       ` Tejun Heo
2012-02-10 16:26                         ` Vivek Goyal
2012-02-13 22:31                           ` Tejun Heo
2012-02-15 15:43                             ` Vivek Goyal
2011-12-23 14:46           ` Vivek Goyal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1324590326-10135-7-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=avi@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=cl@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nate@cpanel.net \
    --cc=oleg@redhat.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).