From: Christoph Lameter <cl@linux-foundation.org>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Nick Piggin <npiggin@suse.de>,
David Rientjes <rientjes@google.com>
Subject: [S+Q3 19/23] slub: Object based NUMA policies
Date: Tue, 03 Aug 2010 21:45:33 -0500 [thread overview]
Message-ID: <20100804024534.772679940@linux.com> (raw)
In-Reply-To: 20100804024514.139976032@linux.com
[-- Attachment #1: unified_object_based_policies --]
[-- Type: text/plain, Size: 5513 bytes --]
Slub applies policies and cpuset restriction currently only on the page
level. The patch here changes that to apply policies to individual allocations
(like SLAB). This comes with a cost of increased complexiy in the allocator.
The allocation does not build alien queues (later patch) and is a bit
ineffective since a slab has to be taken from the partial lists (via lock
and unlock) and possibly shifted back after taking one object out of it.
Memory policies and cpuset redirection is only applied to slabs marked with
SLAB_MEM_SPREAD (also like SLAB).
Use Lee Schermerhorns new *_mem functionality to always find the nearest
node in case we are on a memoryless node.
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
---
include/linux/slub_def.h | 3 +
mm/slub.c | 94 +++++++++++++++++++++++++++++++++++------------
2 files changed, 73 insertions(+), 24 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2010-07-31 18:27:10.913898557 -0500
+++ linux-2.6/mm/slub.c 2010-07-31 18:27:15.733994218 -0500
@@ -1451,7 +1451,7 @@
static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
- int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
+ int searchnode = (node == NUMA_NO_NODE) ? numa_mem_id() : node;
page = get_partial_node(get_node(s, searchnode));
if (page || (flags & __GFP_THISNODE))
@@ -1622,6 +1622,7 @@
struct kmem_cache_cpu *c = per_cpu_ptr(k, cpu);
c->q.max = max;
+ c->node = cpu_to_mem(cpu);
}
s->cpu_queue = max;
@@ -1680,19 +1681,6 @@
free_percpu(f.c);
}
-/*
- * Check if the objects in a per cpu structure fit numa
- * locality expectations.
- */
-static inline int node_match(struct kmem_cache_cpu *c, int node)
-{
-#ifdef CONFIG_NUMA
- if (node != NUMA_NO_NODE && c->node != node)
- return 0;
-#endif
- return 1;
-}
-
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct page *))
{
@@ -1752,6 +1740,26 @@
}
/*
+ * Determine the final numa node from which the allocation will
+ * be occurring. Allocations can be redirected for slabs marked
+ * with SLAB_MEM_SPREAD by memory policies and cpusets options.
+ */
+static inline int find_numa_node(struct kmem_cache *s, int node)
+{
+#ifdef CONFIG_NUMA
+ if (unlikely(s->flags & SLAB_MEM_SPREAD)) {
+ if (node == NUMA_NO_NODE && !in_interrupt()) {
+ if (cpuset_do_slab_mem_spread())
+ node = cpuset_mem_spread_node();
+ else if (current->mempolicy)
+ node = slab_node(current->mempolicy);
+ }
+ }
+#endif
+ return node;
+}
+
+/*
* Retrieve pointers to nr objects from a slab into the object array.
* Slab must be locked.
*/
@@ -1802,6 +1810,42 @@
/* Handling of objects from other nodes */
+static void *slab_alloc_node(struct kmem_cache *s, struct kmem_cache_cpu *c,
+ gfp_t gfpflags, int node)
+{
+#ifdef CONFIG_NUMA
+ struct kmem_cache_node *n = get_node(s, node);
+ struct page *page;
+ void *object;
+
+ page = get_partial_node(n);
+ if (!page) {
+ gfpflags &= gfp_allowed_mask;
+
+ if (gfpflags & __GFP_WAIT)
+ local_irq_enable();
+
+ page = new_slab(s, gfpflags | GFP_THISNODE, node);
+
+ if (gfpflags & __GFP_WAIT)
+ local_irq_disable();
+
+ if (!page)
+ return NULL;
+
+ slab_lock(page);
+ }
+
+ retrieve_objects(s, page, &object, 1);
+
+ to_lists(s, page, 0);
+ slab_unlock(page);
+ return object;
+#else
+ return NULL;
+#endif
+}
+
static void slab_free_alien(struct kmem_cache *s,
struct kmem_cache_cpu *c, struct page *page, void *object, int node)
{
@@ -1827,13 +1871,20 @@
redo:
local_irq_save(flags);
c = __this_cpu_ptr(s->cpu);
- q = &c->q;
- if (unlikely(queue_empty(q) || !node_match(c, node))) {
- if (unlikely(!node_match(c, node))) {
- flush_cpu_objects(s, c);
- c->node = node;
+ node = find_numa_node(s, node);
+
+ if (NUMA_BUILD && node != NUMA_NO_NODE) {
+ if (unlikely(node != c->node)) {
+ object = slab_alloc_node(s, c, gfpflags, node);
+ if (!object)
+ goto oom;
+ stat(s, ALLOC_REMOTE);
+ goto got_it;
}
+ }
+ q = &c->q;
+ if (unlikely(queue_empty(q))) {
while (q->objects < s->batch) {
struct page *new;
@@ -1877,6 +1928,7 @@
object = queue_get(q);
+got_it:
if (kmem_cache_debug(s)) {
if (!alloc_debug_processing(s, object, addr))
goto redo;
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h 2010-07-31 18:26:09.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h 2010-07-31 18:27:15.733994218 -0500
@@ -23,6 +23,7 @@
FREE_REMOVE_PARTIAL, /* Freeing removed from partial list */
ALLOC_FROM_PARTIAL, /* slab with objects acquired from partial */
ALLOC_SLAB, /* New slab acquired from page allocator */
+ ALLOC_REMOTE, /* Allocation from remote slab */
FREE_ALIEN, /* Free to alien node */
FREE_SLAB, /* Slab freed to the page allocator */
QUEUE_FLUSH, /* Flushing of the per cpu queue */
@@ -40,7 +41,7 @@
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
- int node; /* objects only from this numa node */
+ int node; /* The memory node local to the cpu */
struct kmem_cache_queue q;
};
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-08-04 2:45 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-04 2:45 [S+Q3 00/23] SLUB: The Unified slab allocator (V3) Christoph Lameter
2010-08-04 2:45 ` [S+Q3 01/23] percpu: make @dyn_size always mean min dyn_size in first chunk init functions Christoph Lameter
2010-08-04 2:45 ` [S+Q3 02/23] percpu: allow limited allocation before slab is online Christoph Lameter
2010-08-04 2:45 ` [S+Q3 03/23] slub: Use a constant for a unspecified node Christoph Lameter
2010-08-04 3:34 ` David Rientjes
2010-08-04 16:15 ` Christoph Lameter
2010-08-05 7:40 ` David Rientjes
2010-08-04 2:45 ` [S+Q3 04/23] SLUB: Constants need UL Christoph Lameter
2010-08-04 2:45 ` [S+Q3 05/23] Subjec Slub: Force no inlining of debug functions Christoph Lameter
2010-08-04 2:45 ` [S+Q3 06/23] slub: Check kasprintf results in kmem_cache_init() Christoph Lameter
2010-08-04 2:45 ` [S+Q3 07/23] slub: Use kmem_cache flags to detect if slab is in debugging mode Christoph Lameter
2010-08-04 2:45 ` [S+Q3 08/23] slub: remove dynamic dma slab allocation Christoph Lameter
2010-08-04 2:45 ` [S+Q3 09/23] slub: Remove static kmem_cache_cpu array for boot Christoph Lameter
2010-08-04 2:45 ` [S+Q3 10/23] slub: Allow removal of slab caches during boot V2 Christoph Lameter
2010-08-04 2:45 ` [S+Q3 11/23] slub: Dynamically size kmalloc cache allocations Christoph Lameter
2010-08-04 2:45 ` [S+Q3 12/23] slub: Extract hooks for memory checkers from hotpaths Christoph Lameter
2010-08-04 2:45 ` [S+Q3 13/23] slub: Move gfpflag masking out of the hotpath Christoph Lameter
2010-08-04 2:45 ` [S+Q3 14/23] slub: Add SLAB style per cpu queueing Christoph Lameter
2010-08-04 2:45 ` [S+Q3 15/23] slub: Allow resizing of per cpu queues Christoph Lameter
2010-08-04 2:45 ` [S+Q3 16/23] slub: Get rid of useless function count_free() Christoph Lameter
2010-08-04 2:45 ` [S+Q3 17/23] slub: Remove MAX_OBJS limitation Christoph Lameter
2010-08-04 2:45 ` [S+Q3 18/23] slub: Drop allocator announcement Christoph Lameter
2010-08-04 2:45 ` Christoph Lameter [this message]
2010-08-04 2:45 ` [S+Q3 20/23] slub: Shared cache to exploit cross cpu caching abilities Christoph Lameter
2010-08-17 5:52 ` David Rientjes
2010-08-17 17:51 ` Christoph Lameter
2010-08-17 18:42 ` David Rientjes
2010-08-17 18:50 ` Christoph Lameter
2010-08-17 19:02 ` David Rientjes
2010-08-17 19:32 ` Christoph Lameter
2010-08-18 19:32 ` Christoph Lameter
2010-08-04 2:45 ` [S+Q3 21/23] slub: Support Alien Caches Christoph Lameter
2010-08-04 2:45 ` [S+Q3 22/23] slub: Cached object expiration Christoph Lameter
2010-08-04 2:45 ` [S+Q3 23/23] vmscan: Tie slub object expiration into page reclaim Christoph Lameter
2010-08-04 4:39 ` [S+Q3 00/23] SLUB: The Unified slab allocator (V3) David Rientjes
2010-08-04 16:17 ` Christoph Lameter
2010-08-05 8:38 ` David Rientjes
2010-08-05 17:33 ` Christoph Lameter
2010-08-17 4:56 ` David Rientjes
2010-08-17 7:55 ` Tejun Heo
2010-08-17 13:56 ` Christoph Lameter
2010-08-17 17:23 ` Christoph Lameter
2010-08-17 17:29 ` Christoph Lameter
2010-08-17 18:02 ` David Rientjes
2010-08-17 18:47 ` Christoph Lameter
2010-08-17 18:54 ` David Rientjes
2010-08-17 19:34 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100804024534.772679940@linux.com \
--to=cl@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=npiggin@suse.de \
--cc=penberg@cs.helsinki.fi \
--cc=rientjes@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).