From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Christoph Lameter <clameter@sgi.com>
Cc: Matt Mackall <mpm@selenic.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Thomas Graf <tgraf@suug.ch>, David Miller <davem@davemloft.net>,
Andrew Morton <akpm@linux-foundation.org>,
Daniel Phillips <phillips@google.com>,
Pekka Enberg <penberg@cs.helsinki.fi>
Subject: Re: [PATCH 0/5] make slab gfp fair
Date: Tue, 15 May 2007 19:27:16 +0200 [thread overview]
Message-ID: <1179250036.7173.7.camel@twins> (raw)
In-Reply-To: <1179170912.2942.37.camel@lappy>
On Mon, 2007-05-14 at 21:28 +0200, Peter Zijlstra wrote:
> One allocator is all I need; it would just be grand if all could be
> supported.
>
> So what you suggest is not placing the 'emergency' slab into the regular
> place so that normal allocations will not be able to find it. Then if an
> emergency allocation cannot be satified by the regular path, we fall
> back to the slow path and find the emergency slab.
How about something like this; it seems to sustain a little stress.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/slub_def.h | 3 +
mm/slub.c | 73 +++++++++++++++++++++++++++++++++++++++++------
2 files changed, 68 insertions(+), 8 deletions(-)
Index: linux-2.6-git/include/linux/slub_def.h
===================================================================
--- linux-2.6-git.orig/include/linux/slub_def.h
+++ linux-2.6-git/include/linux/slub_def.h
@@ -47,6 +47,9 @@ struct kmem_cache {
struct list_head list; /* List of slab caches */
struct kobject kobj; /* For sysfs */
+ spinlock_t reserve_lock;
+ struct page *reserve_slab;
+
#ifdef CONFIG_NUMA
int defrag_ratio;
struct kmem_cache_node *node[MAX_NUMNODES];
Index: linux-2.6-git/mm/slub.c
===================================================================
--- linux-2.6-git.orig/mm/slub.c
+++ linux-2.6-git/mm/slub.c
@@ -20,11 +20,13 @@
#include <linux/mempolicy.h>
#include <linux/ctype.h>
#include <linux/kallsyms.h>
+#include "internal.h"
/*
* Lock order:
- * 1. slab_lock(page)
- * 2. slab->list_lock
+ * 1. slab->reserve_lock
+ * 2. slab_lock(page)
+ * 3. node->list_lock
*
* The slab_lock protects operations on the object of a particular
* slab and its metadata in the page struct. If the slab lock
@@ -981,7 +983,7 @@ static void setup_object(struct kmem_cac
s->ctor(object, s, SLAB_CTOR_CONSTRUCTOR);
}
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *rank)
{
struct page *page;
struct kmem_cache_node *n;
@@ -999,6 +1001,7 @@ static struct page *new_slab(struct kmem
if (!page)
goto out;
+ *rank = page->rank;
n = get_node(s, page_to_nid(page));
if (n)
atomic_long_inc(&n->nr_slabs);
@@ -1286,7 +1289,7 @@ static void putback_slab(struct kmem_cac
/*
* Remove the cpu slab
*/
-static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
+static void __deactivate_slab(struct kmem_cache *s, struct page *page)
{
/*
* Merge cpu freelist into freelist. Typically we get here
@@ -1305,8 +1308,13 @@ static void deactivate_slab(struct kmem_
page->freelist = object;
page->inuse--;
}
- s->cpu_slab[cpu] = NULL;
ClearPageActive(page);
+}
+
+static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
+{
+ __deactivate_slab(s, page);
+ s->cpu_slab[cpu] = NULL;
putback_slab(s, page);
}
@@ -1372,6 +1380,7 @@ static void *__slab_alloc(struct kmem_ca
{
void **object;
int cpu = smp_processor_id();
+ int rank = 0;
if (!page)
goto new_slab;
@@ -1403,10 +1412,42 @@ have_slab:
s->cpu_slab[cpu] = page;
SetPageActive(page);
goto load_freelist;
+ } else if (gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS) {
+ spin_lock(&s->reserve_lock);
+ page = s->reserve_slab;
+ if (page) {
+ if (page->freelist) {
+ slab_lock(page);
+ spin_unlock(&s->reserve_lock);
+ goto load_freelist;
+ } else
+ s->reserve_slab = NULL;
+ }
+ spin_unlock(&s->reserve_lock);
+
+ if (page) {
+ slab_lock(page);
+ __deactivate_slab(s, page);
+ putback_slab(s, page);
+ }
}
- page = new_slab(s, gfpflags, node);
- if (page) {
+ page = new_slab(s, gfpflags, node, &rank);
+ if (page && rank) {
+ if (unlikely(s->reserve_slab)) {
+ struct page *reserve;
+
+ spin_lock(&s->reserve_lock);
+ reserve = s->reserve_slab;
+ s->reserve_slab = NULL;
+ spin_unlock(&s->reserve_lock);
+
+ if (reserve) {
+ slab_lock(reserve);
+ __deactivate_slab(s, reserve);
+ putback_slab(s, reserve);
+ }
+ }
cpu = smp_processor_id();
if (s->cpu_slab[cpu]) {
/*
@@ -1432,6 +1473,18 @@ have_slab:
}
slab_lock(page);
goto have_slab;
+ } else if (page) {
+ spin_lock(&s->reserve_lock);
+ if (s->reserve_slab) {
+ discard_slab(s, page);
+ page = s->reserve_slab;
+ }
+ slab_lock(page);
+ SetPageActive(page);
+ s->reserve_slab = page;
+ spin_unlock(&s->reserve_lock);
+
+ goto load_freelist;
}
return NULL;
debug:
@@ -1788,10 +1841,11 @@ static struct kmem_cache_node * __init e
{
struct page *page;
struct kmem_cache_node *n;
+ int rank;
BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
- page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node);
+ page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &rank);
/* new_slab() disables interupts */
local_irq_enable();
@@ -2002,6 +2056,9 @@ static int kmem_cache_open(struct kmem_c
s->defrag_ratio = 100;
#endif
+ spin_lock_init(&s->reserve_lock);
+ s->reserve_slab = NULL;
+
if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
return 1;
error:
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Christoph Lameter <clameter@sgi.com>
Cc: Matt Mackall <mpm@selenic.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Thomas Graf <tgraf@suug.ch>, David Miller <davem@davemloft.net>,
Andrew Morton <akpm@linux-foundation.org>,
Daniel Phillips <phillips@google.com>,
Pekka Enberg <penberg@cs.helsinki.fi>
Subject: Re: [PATCH 0/5] make slab gfp fair
Date: Tue, 15 May 2007 19:27:16 +0200 [thread overview]
Message-ID: <1179250036.7173.7.camel@twins> (raw)
In-Reply-To: <1179170912.2942.37.camel@lappy>
On Mon, 2007-05-14 at 21:28 +0200, Peter Zijlstra wrote:
> One allocator is all I need; it would just be grand if all could be
> supported.
>
> So what you suggest is not placing the 'emergency' slab into the regular
> place so that normal allocations will not be able to find it. Then if an
> emergency allocation cannot be satified by the regular path, we fall
> back to the slow path and find the emergency slab.
How about something like this; it seems to sustain a little stress.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/slub_def.h | 3 +
mm/slub.c | 73 +++++++++++++++++++++++++++++++++++++++++------
2 files changed, 68 insertions(+), 8 deletions(-)
Index: linux-2.6-git/include/linux/slub_def.h
===================================================================
--- linux-2.6-git.orig/include/linux/slub_def.h
+++ linux-2.6-git/include/linux/slub_def.h
@@ -47,6 +47,9 @@ struct kmem_cache {
struct list_head list; /* List of slab caches */
struct kobject kobj; /* For sysfs */
+ spinlock_t reserve_lock;
+ struct page *reserve_slab;
+
#ifdef CONFIG_NUMA
int defrag_ratio;
struct kmem_cache_node *node[MAX_NUMNODES];
Index: linux-2.6-git/mm/slub.c
===================================================================
--- linux-2.6-git.orig/mm/slub.c
+++ linux-2.6-git/mm/slub.c
@@ -20,11 +20,13 @@
#include <linux/mempolicy.h>
#include <linux/ctype.h>
#include <linux/kallsyms.h>
+#include "internal.h"
/*
* Lock order:
- * 1. slab_lock(page)
- * 2. slab->list_lock
+ * 1. slab->reserve_lock
+ * 2. slab_lock(page)
+ * 3. node->list_lock
*
* The slab_lock protects operations on the object of a particular
* slab and its metadata in the page struct. If the slab lock
@@ -981,7 +983,7 @@ static void setup_object(struct kmem_cac
s->ctor(object, s, SLAB_CTOR_CONSTRUCTOR);
}
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *rank)
{
struct page *page;
struct kmem_cache_node *n;
@@ -999,6 +1001,7 @@ static struct page *new_slab(struct kmem
if (!page)
goto out;
+ *rank = page->rank;
n = get_node(s, page_to_nid(page));
if (n)
atomic_long_inc(&n->nr_slabs);
@@ -1286,7 +1289,7 @@ static void putback_slab(struct kmem_cac
/*
* Remove the cpu slab
*/
-static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
+static void __deactivate_slab(struct kmem_cache *s, struct page *page)
{
/*
* Merge cpu freelist into freelist. Typically we get here
@@ -1305,8 +1308,13 @@ static void deactivate_slab(struct kmem_
page->freelist = object;
page->inuse--;
}
- s->cpu_slab[cpu] = NULL;
ClearPageActive(page);
+}
+
+static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
+{
+ __deactivate_slab(s, page);
+ s->cpu_slab[cpu] = NULL;
putback_slab(s, page);
}
@@ -1372,6 +1380,7 @@ static void *__slab_alloc(struct kmem_ca
{
void **object;
int cpu = smp_processor_id();
+ int rank = 0;
if (!page)
goto new_slab;
@@ -1403,10 +1412,42 @@ have_slab:
s->cpu_slab[cpu] = page;
SetPageActive(page);
goto load_freelist;
+ } else if (gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS) {
+ spin_lock(&s->reserve_lock);
+ page = s->reserve_slab;
+ if (page) {
+ if (page->freelist) {
+ slab_lock(page);
+ spin_unlock(&s->reserve_lock);
+ goto load_freelist;
+ } else
+ s->reserve_slab = NULL;
+ }
+ spin_unlock(&s->reserve_lock);
+
+ if (page) {
+ slab_lock(page);
+ __deactivate_slab(s, page);
+ putback_slab(s, page);
+ }
}
- page = new_slab(s, gfpflags, node);
- if (page) {
+ page = new_slab(s, gfpflags, node, &rank);
+ if (page && rank) {
+ if (unlikely(s->reserve_slab)) {
+ struct page *reserve;
+
+ spin_lock(&s->reserve_lock);
+ reserve = s->reserve_slab;
+ s->reserve_slab = NULL;
+ spin_unlock(&s->reserve_lock);
+
+ if (reserve) {
+ slab_lock(reserve);
+ __deactivate_slab(s, reserve);
+ putback_slab(s, reserve);
+ }
+ }
cpu = smp_processor_id();
if (s->cpu_slab[cpu]) {
/*
@@ -1432,6 +1473,18 @@ have_slab:
}
slab_lock(page);
goto have_slab;
+ } else if (page) {
+ spin_lock(&s->reserve_lock);
+ if (s->reserve_slab) {
+ discard_slab(s, page);
+ page = s->reserve_slab;
+ }
+ slab_lock(page);
+ SetPageActive(page);
+ s->reserve_slab = page;
+ spin_unlock(&s->reserve_lock);
+
+ goto load_freelist;
}
return NULL;
debug:
@@ -1788,10 +1841,11 @@ static struct kmem_cache_node * __init e
{
struct page *page;
struct kmem_cache_node *n;
+ int rank;
BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
- page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node);
+ page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &rank);
/* new_slab() disables interupts */
local_irq_enable();
@@ -2002,6 +2056,9 @@ static int kmem_cache_open(struct kmem_c
s->defrag_ratio = 100;
#endif
+ spin_lock_init(&s->reserve_lock);
+ s->reserve_slab = NULL;
+
if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
return 1;
error:
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-05-15 17:27 UTC|newest]
Thread overview: 138+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-14 13:19 [PATCH 0/5] make slab gfp fair Peter Zijlstra
2007-05-14 13:19 ` Peter Zijlstra
2007-05-14 13:19 ` [PATCH 1/5] mm: page allocation rank Peter Zijlstra
2007-05-14 13:19 ` Peter Zijlstra
2007-05-14 13:19 ` [PATCH 2/5] mm: slab allocation fairness Peter Zijlstra
2007-05-14 13:19 ` Peter Zijlstra
2007-05-14 15:51 ` Christoph Lameter
2007-05-14 15:51 ` Christoph Lameter
2007-05-14 13:19 ` [PATCH 3/5] mm: slub " Peter Zijlstra
2007-05-14 13:19 ` Peter Zijlstra
2007-05-14 15:49 ` Christoph Lameter
2007-05-14 15:49 ` Christoph Lameter
2007-05-14 16:14 ` Peter Zijlstra
2007-05-14 16:14 ` Peter Zijlstra
2007-05-14 16:35 ` Christoph Lameter
2007-05-14 16:35 ` Christoph Lameter
2007-05-14 13:19 ` [PATCH 4/5] mm: slob " Peter Zijlstra
2007-05-14 13:19 ` Peter Zijlstra
2007-05-14 13:19 ` [PATCH 5/5] mm: allow mempool to fall back to memalloc reserves Peter Zijlstra
2007-05-14 13:19 ` Peter Zijlstra
2007-05-14 15:53 ` [PATCH 0/5] make slab gfp fair Christoph Lameter
2007-05-14 15:53 ` Christoph Lameter
2007-05-14 16:10 ` Peter Zijlstra
2007-05-14 16:10 ` Peter Zijlstra
2007-05-14 16:37 ` Christoph Lameter
2007-05-14 16:37 ` Christoph Lameter
2007-05-14 16:12 ` Matt Mackall
2007-05-14 16:12 ` Matt Mackall
2007-05-14 16:29 ` Christoph Lameter
2007-05-14 16:29 ` Christoph Lameter
2007-05-14 17:40 ` Peter Zijlstra
2007-05-14 17:40 ` Peter Zijlstra
2007-05-14 17:57 ` Christoph Lameter
2007-05-14 17:57 ` Christoph Lameter
2007-05-14 19:28 ` Peter Zijlstra
2007-05-14 19:28 ` Peter Zijlstra
2007-05-14 19:56 ` Christoph Lameter
2007-05-14 19:56 ` Christoph Lameter
2007-05-14 20:03 ` Peter Zijlstra
2007-05-14 20:03 ` Peter Zijlstra
2007-05-14 20:06 ` Christoph Lameter
2007-05-14 20:06 ` Christoph Lameter
2007-05-14 20:12 ` Peter Zijlstra
2007-05-14 20:12 ` Peter Zijlstra
2007-05-14 20:25 ` Christoph Lameter
2007-05-14 20:25 ` Christoph Lameter
2007-05-15 17:27 ` Peter Zijlstra [this message]
2007-05-15 17:27 ` Peter Zijlstra
2007-05-15 22:02 ` Christoph Lameter
2007-05-15 22:02 ` Christoph Lameter
2007-05-16 6:59 ` Peter Zijlstra
2007-05-16 6:59 ` Peter Zijlstra
2007-05-16 18:43 ` Christoph Lameter
2007-05-16 18:43 ` Christoph Lameter
2007-05-16 19:25 ` Peter Zijlstra
2007-05-16 19:25 ` Peter Zijlstra
2007-05-16 19:53 ` Christoph Lameter
2007-05-16 19:53 ` Christoph Lameter
2007-05-16 20:18 ` Peter Zijlstra
2007-05-16 20:18 ` Peter Zijlstra
2007-05-16 20:27 ` Christoph Lameter
2007-05-16 20:27 ` Christoph Lameter
2007-05-16 20:40 ` Peter Zijlstra
2007-05-16 20:40 ` Peter Zijlstra
2007-05-16 20:44 ` Christoph Lameter
2007-05-16 20:44 ` Christoph Lameter
2007-05-16 20:54 ` Peter Zijlstra
2007-05-16 20:54 ` Peter Zijlstra
2007-05-16 20:59 ` Christoph Lameter
2007-05-16 20:59 ` Christoph Lameter
2007-05-16 21:04 ` Peter Zijlstra
2007-05-16 21:04 ` Peter Zijlstra
2007-05-16 21:13 ` Christoph Lameter
2007-05-16 21:13 ` Christoph Lameter
2007-05-16 21:20 ` Peter Zijlstra
2007-05-16 21:20 ` Peter Zijlstra
2007-05-16 21:42 ` Christoph Lameter
2007-05-16 21:42 ` Christoph Lameter
2007-05-17 7:28 ` Peter Zijlstra
2007-05-17 7:28 ` Peter Zijlstra
2007-05-17 17:30 ` Christoph Lameter
2007-05-17 17:30 ` Christoph Lameter
2007-05-17 17:53 ` Peter Zijlstra
2007-05-17 17:53 ` Peter Zijlstra
2007-05-17 18:01 ` Christoph Lameter
2007-05-17 18:01 ` Christoph Lameter
2007-05-14 19:44 ` Andrew Morton
2007-05-14 19:44 ` Andrew Morton
2007-05-14 20:01 ` Matt Mackall
2007-05-14 20:01 ` Matt Mackall
2007-05-14 20:05 ` Peter Zijlstra
2007-05-14 20:05 ` Peter Zijlstra
2007-05-17 3:02 ` Christoph Lameter
2007-05-17 3:02 ` Christoph Lameter
2007-05-17 7:08 ` Peter Zijlstra
2007-05-17 7:08 ` Peter Zijlstra
2007-05-17 17:29 ` Christoph Lameter
2007-05-17 17:29 ` Christoph Lameter
2007-05-17 17:52 ` Peter Zijlstra
2007-05-17 17:52 ` Peter Zijlstra
2007-05-17 17:59 ` Christoph Lameter
2007-05-17 17:59 ` Christoph Lameter
2007-05-17 17:53 ` Matt Mackall
2007-05-17 17:53 ` Matt Mackall
2007-05-17 18:02 ` Christoph Lameter
2007-05-17 18:02 ` Christoph Lameter
2007-05-17 19:18 ` Peter Zijlstra
2007-05-17 19:18 ` Peter Zijlstra
2007-05-17 19:24 ` Christoph Lameter
2007-05-17 19:24 ` Christoph Lameter
2007-05-17 21:26 ` Peter Zijlstra
2007-05-17 21:26 ` Peter Zijlstra
2007-05-17 21:44 ` Paul Jackson
2007-05-17 21:44 ` Paul Jackson
2007-05-17 22:27 ` Christoph Lameter
2007-05-17 22:27 ` Christoph Lameter
2007-05-18 9:54 ` Peter Zijlstra
2007-05-18 9:54 ` Peter Zijlstra
2007-05-18 17:11 ` Paul Jackson
2007-05-18 17:11 ` Paul Jackson
2007-05-18 17:11 ` Christoph Lameter
2007-05-18 17:11 ` Christoph Lameter
2007-05-20 8:39 ` Peter Zijlstra
2007-05-20 8:39 ` Peter Zijlstra
2007-05-21 16:45 ` Christoph Lameter
2007-05-21 16:45 ` Christoph Lameter
2007-05-21 19:33 ` Peter Zijlstra
2007-05-21 19:33 ` Peter Zijlstra
2007-05-21 19:43 ` Christoph Lameter
2007-05-21 19:43 ` Christoph Lameter
2007-05-21 20:08 ` Peter Zijlstra
2007-05-21 20:08 ` Peter Zijlstra
2007-05-21 20:32 ` Christoph Lameter
2007-05-21 20:32 ` Christoph Lameter
2007-05-21 20:54 ` Peter Zijlstra
2007-05-21 20:54 ` Peter Zijlstra
2007-05-21 21:04 ` Christoph Lameter
2007-05-21 21:04 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1179250036.7173.7.camel@twins \
--to=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=clameter@sgi.com \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mpm@selenic.com \
--cc=penberg@cs.helsinki.fi \
--cc=phillips@google.com \
--cc=tgraf@suug.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.