* [patch 01/10] SLUB: Consolidate add_partial and add_partial_tail to one function
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
@ 2007-10-28 3:31 ` Christoph Lameter
2007-10-28 13:07 ` Pekka J Enberg
2007-10-28 3:31 ` [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free Christoph Lameter
` (8 subsequent siblings)
9 siblings, 1 reply; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:31 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slab_defrag_add_partial_tail --]
[-- Type: text/plain, Size: 3601 bytes --]
Add a parameter to add_partial instead of having separate functions.
That allows the detailed control from multiple places when putting
slabs back to the partial list. If we put slabs back to the front
then they are likely immediately used for allocations. If they are
put at the end then we can maximize the time that the partial slabs
spent without allocations.
When deactivating slab we can put the slabs that had remote objects freed
to them at the end of the list so that the cache lines can cool down.
Slabs that had objects from the local cpu freed to them are put in the
front of the list to be reused ASAP in order to exploit the cache hot state.
[This patch is already in mm]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 31 +++++++++++++++----------------
1 file changed, 15 insertions(+), 16 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-24 08:33:01.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-24 09:19:52.000000000 -0700
@@ -1197,19 +1197,15 @@
/*
* Management of partially allocated slabs
*/
-static void add_partial_tail(struct kmem_cache_node *n, struct page *page)
+static void add_partial(struct kmem_cache_node *n,
+ struct page *page, int tail)
{
spin_lock(&n->list_lock);
n->nr_partial++;
- list_add_tail(&page->lru, &n->partial);
- spin_unlock(&n->list_lock);
-}
-
-static void add_partial(struct kmem_cache_node *n, struct page *page)
-{
- spin_lock(&n->list_lock);
- n->nr_partial++;
- list_add(&page->lru, &n->partial);
+ if (tail)
+ list_add_tail(&page->lru, &n->partial);
+ else
+ list_add(&page->lru, &n->partial);
spin_unlock(&n->list_lock);
}
@@ -1337,7 +1333,7 @@
*
* On exit the slab lock will have been dropped.
*/
-static void unfreeze_slab(struct kmem_cache *s, struct page *page)
+static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
@@ -1345,7 +1341,7 @@
if (page->inuse) {
if (page->freelist)
- add_partial(n, page);
+ add_partial(n, page, tail);
else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
add_full(n, page);
slab_unlock(page);
@@ -1360,7 +1356,7 @@
* partial list stays small. kmem_cache_shrink can
* reclaim empty slabs from the partial list.
*/
- add_partial_tail(n, page);
+ add_partial(n, page, 1);
slab_unlock(page);
} else {
slab_unlock(page);
@@ -1375,6 +1371,7 @@
static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
struct page *page = c->page;
+ int tail = 1;
/*
* Merge cpu freelist into freelist. Typically we get here
* because both freelists are empty. So this is unlikely
@@ -1383,6 +1380,8 @@
while (unlikely(c->freelist)) {
void **object;
+ tail = 0; /* Hot objects. Put the slab first */
+
/* Retrieve object from cpu_freelist */
object = c->freelist;
c->freelist = c->freelist[c->offset];
@@ -1393,7 +1392,7 @@
page->inuse--;
}
c->page = NULL;
- unfreeze_slab(s, page);
+ unfreeze_slab(s, page, tail);
}
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -1633,7 +1632,7 @@
* then add it.
*/
if (unlikely(!prior))
- add_partial(get_node(s, page_to_nid(page)), page);
+ add_partial(get_node(s, page_to_nid(page)), page, 0);
out_unlock:
slab_unlock(page);
@@ -2041,7 +2040,7 @@
#endif
init_kmem_cache_node(n);
atomic_long_inc(&n->nr_slabs);
- add_partial(n, page);
+ add_partial(n, page, 0);
return n;
}
--
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 01/10] SLUB: Consolidate add_partial and add_partial_tail to one function
2007-10-28 3:31 ` [patch 01/10] SLUB: Consolidate add_partial and add_partial_tail to one function Christoph Lameter
@ 2007-10-28 13:07 ` Pekka J Enberg
0 siblings, 0 replies; 35+ messages in thread
From: Pekka J Enberg @ 2007-10-28 13:07 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sat, 27 Oct 2007, Christoph Lameter wrote:
> Add a parameter to add_partial instead of having separate functions.
> That allows the detailed control from multiple places when putting
> slabs back to the partial list. If we put slabs back to the front
> then they are likely immediately used for allocations. If they are
> put at the end then we can maximize the time that the partial slabs
> spent without allocations.
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
2007-10-28 3:31 ` [patch 01/10] SLUB: Consolidate add_partial and add_partial_tail to one function Christoph Lameter
@ 2007-10-28 3:31 ` Christoph Lameter
2007-10-28 13:08 ` Pekka J Enberg
2007-10-29 23:25 ` Matt Mackall
2007-10-28 3:31 ` [patch 03/10] SLUB: Move kmem_cache_node determination into add_full and add_partial Christoph Lameter
` (7 subsequent siblings)
9 siblings, 2 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:31 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_noinline --]
[-- Type: text/plain, Size: 1807 bytes --]
Some function tend to get folded into __slab_free and __slab_alloc
although they are rarely called. They cause register pressure that
leads to bad code generation.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-25 19:36:10.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-25 19:36:59.000000000 -0700
@@ -831,8 +831,8 @@ static void setup_object_debug(struct km
init_tracking(s, object);
}
-static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
- void *object, void *addr)
+static noinline int alloc_debug_processing(struct kmem_cache *s,
+ struct page *page, void *object, void *addr)
{
if (!check_slab(s, page))
goto bad;
@@ -871,8 +871,8 @@ bad:
return 0;
}
-static int free_debug_processing(struct kmem_cache *s, struct page *page,
- void *object, void *addr)
+static noinline int free_debug_processing(struct kmem_cache *s,
+ struct page *page, void *object, void *addr)
{
if (!check_slab(s, page))
goto fail;
@@ -1075,7 +1075,8 @@ static void setup_object(struct kmem_cac
s->ctor(s, object);
}
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static noinline struct page *new_slab(struct kmem_cache *s,
+ gfp_t flags, int node)
{
struct page *page;
struct kmem_cache_node *n;
@@ -1209,7 +1210,7 @@ static void add_partial(struct kmem_cach
spin_unlock(&n->list_lock);
}
-static void remove_partial(struct kmem_cache *s,
+static noinline void remove_partial(struct kmem_cache *s,
struct page *page)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
--
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free
2007-10-28 3:31 ` [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free Christoph Lameter
@ 2007-10-28 13:08 ` Pekka J Enberg
2007-10-29 23:25 ` Matt Mackall
1 sibling, 0 replies; 35+ messages in thread
From: Pekka J Enberg @ 2007-10-28 13:08 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sat, 27 Oct 2007, Christoph Lameter wrote:
> Some function tend to get folded into __slab_free and __slab_alloc
> although they are rarely called. They cause register pressure that
> leads to bad code generation.
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free
2007-10-28 3:31 ` [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free Christoph Lameter
2007-10-28 13:08 ` Pekka J Enberg
@ 2007-10-29 23:25 ` Matt Mackall
1 sibling, 0 replies; 35+ messages in thread
From: Matt Mackall @ 2007-10-29 23:25 UTC (permalink / raw)
To: Christoph Lameter
Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm, Pekka Enberg
On Sat, Oct 27, 2007 at 08:31:58PM -0700, Christoph Lameter wrote:
> Some function tend to get folded into __slab_free and __slab_alloc
> although they are rarely called. They cause register pressure that
> leads to bad code generation.
Nice - an example of uninlining to directly improve performance!
--
Mathematics is the supreme nostalgia of our time.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* [patch 03/10] SLUB: Move kmem_cache_node determination into add_full and add_partial
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
2007-10-28 3:31 ` [patch 01/10] SLUB: Consolidate add_partial and add_partial_tail to one function Christoph Lameter
2007-10-28 3:31 ` [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free Christoph Lameter
@ 2007-10-28 3:31 ` Christoph Lameter
2007-10-28 13:09 ` Pekka J Enberg
2007-10-28 3:32 ` [patch 04/10] SLUB: Avoid checking for a valid object before zeroing on the fast path Christoph Lameter
` (6 subsequent siblings)
9 siblings, 1 reply; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:31 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_add_partial_kmem_cache_parameter --]
[-- Type: text/plain, Size: 3409 bytes --]
The kmem_cache_node determination can be moved into add_full()
and add_partial(). This removes some code from the slab_free()
slow path and reduces the register overhead that has to be managed
in the slow path.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-25 19:36:59.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-25 19:37:38.000000000 -0700
@@ -800,8 +800,12 @@ static void trace(struct kmem_cache *s,
/*
* Tracking of fully allocated slabs for debugging purposes.
*/
-static void add_full(struct kmem_cache_node *n, struct page *page)
+static void add_full(struct kmem_cache *s, struct page *page)
{
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+
+ if (!SlabDebug(page) || !(s->flags & SLAB_STORE_USER))
+ return;
spin_lock(&n->list_lock);
list_add(&page->lru, &n->full);
spin_unlock(&n->list_lock);
@@ -1025,7 +1029,7 @@ static inline int slab_pad_check(struct
{ return 1; }
static inline int check_object(struct kmem_cache *s, struct page *page,
void *object, int active) { return 1; }
-static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
+static inline void add_full(struct kmem_cache *s, struct page *page) {}
static inline unsigned long kmem_cache_flags(unsigned long objsize,
unsigned long flags, const char *name,
void (*ctor)(struct kmem_cache *, void *))
@@ -1198,9 +1202,11 @@ static __always_inline int slab_trylock(
/*
* Management of partially allocated slabs
*/
-static void add_partial(struct kmem_cache_node *n,
+static void add_partial(struct kmem_cache *s,
struct page *page, int tail)
{
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+
spin_lock(&n->list_lock);
n->nr_partial++;
if (tail)
@@ -1336,19 +1342,18 @@ static struct page *get_partial(struct k
*/
static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
{
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
ClearSlabFrozen(page);
if (page->inuse) {
if (page->freelist)
- add_partial(n, page, tail);
- else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
- add_full(n, page);
+ add_partial(s, page, tail);
+ else
+ add_full(s, page);
slab_unlock(page);
} else {
- if (n->nr_partial < MIN_PARTIAL) {
+ if (get_node(s, page_to_nid(page))->nr_partial
+ < MIN_PARTIAL) {
/*
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
@@ -1357,7 +1362,7 @@ static void unfreeze_slab(struct kmem_ca
* partial list stays small. kmem_cache_shrink can
* reclaim empty slabs from the partial list.
*/
- add_partial(n, page, 1);
+ add_partial(s, page, 1);
slab_unlock(page);
} else {
slab_unlock(page);
@@ -1633,7 +1638,7 @@ checks_ok:
* then add it.
*/
if (unlikely(!prior))
- add_partial(get_node(s, page_to_nid(page)), page, 0);
+ add_partial(s, page, 0);
out_unlock:
slab_unlock(page);
@@ -2041,7 +2046,7 @@ static struct kmem_cache_node *early_kme
#endif
init_kmem_cache_node(n);
atomic_long_inc(&n->nr_slabs);
- add_partial(n, page, 0);
+ add_partial(kmalloc_caches, page, 0);
return n;
}
--
^ permalink raw reply [flat|nested] 35+ messages in thread* [patch 04/10] SLUB: Avoid checking for a valid object before zeroing on the fast path
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
` (2 preceding siblings ...)
2007-10-28 3:31 ` [patch 03/10] SLUB: Move kmem_cache_node determination into add_full and add_partial Christoph Lameter
@ 2007-10-28 3:32 ` Christoph Lameter
2007-10-28 13:10 ` Pekka J Enberg
2007-10-28 3:32 ` [patch 05/10] SLUB: __slab_alloc() exit path consolidation Christoph Lameter
` (5 subsequent siblings)
9 siblings, 1 reply; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:32 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_slab_alloc_move_oom --]
[-- Type: text/plain, Size: 1253 bytes --]
The fast path always results in a valid object. Move the check
for the NULL pointer to the slow branch that calls
__slab_alloc. Only __slab_alloc can return NULL if there is no
memory available anymore and that case is exceedingly rare.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-25 19:37:38.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-25 19:38:14.000000000 -0700
@@ -1573,19 +1573,22 @@ static void __always_inline *slab_alloc(
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
- if (unlikely(!c->freelist || !node_match(c, node)))
+ if (unlikely(!c->freelist || !node_match(c, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
-
- else {
+ if (unlikely(!object)) {
+ local_irq_restore(flags);
+ goto out;
+ }
+ } else {
object = c->freelist;
c->freelist = object[c->offset];
}
local_irq_restore(flags);
- if (unlikely((gfpflags & __GFP_ZERO) && object))
+ if (unlikely((gfpflags & __GFP_ZERO)))
memset(object, 0, c->objsize);
-
+out:
return object;
}
--
^ permalink raw reply [flat|nested] 35+ messages in thread* [patch 05/10] SLUB: __slab_alloc() exit path consolidation
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
` (3 preceding siblings ...)
2007-10-28 3:32 ` [patch 04/10] SLUB: Avoid checking for a valid object before zeroing on the fast path Christoph Lameter
@ 2007-10-28 3:32 ` Christoph Lameter
2007-10-28 13:11 ` Pekka J Enberg
2007-10-28 3:32 ` [patch 06/10] SLUB: Provide unique end marker for each slab Christoph Lameter
` (4 subsequent siblings)
9 siblings, 1 reply; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:32 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_slab_alloc_exit_paths --]
[-- Type: text/plain, Size: 1023 bytes --]
Use a single exit path by using goto's to the hottest exit path.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-25 19:38:14.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-25 19:38:47.000000000 -0700
@@ -1493,7 +1493,9 @@ load_freelist:
c->page->inuse = s->objects;
c->page->freelist = NULL;
c->node = page_to_nid(c->page);
+unlock_out:
slab_unlock(c->page);
+out:
return object;
another_slab:
@@ -1541,7 +1543,8 @@ new_slab:
c->page = new;
goto load_freelist;
}
- return NULL;
+ object = NULL;
+ goto out;
debug:
object = c->page->freelist;
if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1550,8 +1553,7 @@ debug:
c->page->inuse++;
c->page->freelist = object[c->offset];
c->node = -1;
- slab_unlock(c->page);
- return object;
+ goto unlock_out;
}
/*
--
^ permalink raw reply [flat|nested] 35+ messages in thread* [patch 06/10] SLUB: Provide unique end marker for each slab
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
` (4 preceding siblings ...)
2007-10-28 3:32 ` [patch 05/10] SLUB: __slab_alloc() exit path consolidation Christoph Lameter
@ 2007-10-28 3:32 ` Christoph Lameter
2007-10-28 3:32 ` [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc Christoph Lameter
` (3 subsequent siblings)
9 siblings, 0 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:32 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_end_marker --]
[-- Type: text/plain, Size: 9168 bytes --]
Currently we use the NULL pointer to signal that there are no more objects.
However the NULL pointers of all slabs match in contrast to the
pointers to the real objects which are distinctive for each slab page.
Change the end pointer to be simply a pointer to the first object with
bit 0 set. That way all end pointers are different for each slab. This
is necessary to allow reliable end marker identification for the
next patch that implements a fast path without the need to disable interrupts.
Bring back the use of the mapping field by SLUB since we would otherwise
have to call a relatively expensive function page_address() in __slab_alloc().
Use of the mapping field allows avoiding calling page_address() in various
other functions as well.
There is no need to change the page_mapping() function since bit 0 is
set on the mapping as also for anonymous pages. page_mapping(slab_page)
will therefore still return NULL although the mapping field is overloaded.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
include/linux/mm_types.h | 5 ++-
mm/slub.c | 72 ++++++++++++++++++++++++++++++-----------------
2 files changed, 51 insertions(+), 26 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-26 19:09:03.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-27 07:52:12.000000000 -0700
@@ -277,15 +277,32 @@ static inline struct kmem_cache_cpu *get
#endif
}
+/*
+ * The end pointer in a slab is special. It points to the first object in the
+ * slab but has bit 0 set to mark it.
+ *
+ * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
+ * in the mapping set.
+ */
+static inline int is_end(void *addr)
+{
+ return (unsigned long)addr & PAGE_MAPPING_ANON;
+}
+
+void *slab_address(struct page *page)
+{
+ return page->end - PAGE_MAPPING_ANON;
+}
+
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
{
void *base;
- if (!object)
+ if (object == page->end)
return 1;
- base = page_address(page);
+ base = slab_address(page);
if (object < base || object >= base + s->objects * s->size ||
(object - base) % s->size) {
return 0;
@@ -318,7 +335,8 @@ static inline void set_freepointer(struc
/* Scan freelist */
#define for_each_free_object(__p, __s, __free) \
- for (__p = (__free); __p; __p = get_freepointer((__s), __p))
+ for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
+ __p))
/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -470,7 +488,7 @@ static void slab_fix(struct kmem_cache *
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
unsigned int off; /* Offset of last byte */
- u8 *addr = page_address(page);
+ u8 *addr = slab_address(page);
print_tracking(s, p);
@@ -648,7 +666,7 @@ static int slab_pad_check(struct kmem_ca
if (!(s->flags & SLAB_POISON))
return 1;
- start = page_address(page);
+ start = slab_address(page);
end = start + (PAGE_SIZE << s->order);
length = s->objects * s->size;
remainder = end - (start + length);
@@ -715,7 +733,7 @@ static int check_object(struct kmem_cach
* of the free objects in this slab. May cause
* another error because the object count is now wrong.
*/
- set_freepointer(s, p, NULL);
+ set_freepointer(s, p, page->end);
return 0;
}
return 1;
@@ -749,18 +767,18 @@ static int on_freelist(struct kmem_cache
void *fp = page->freelist;
void *object = NULL;
- while (fp && nr <= s->objects) {
+ while (fp != page->end && nr <= s->objects) {
if (fp == search)
return 1;
if (!check_valid_pointer(s, page, fp)) {
if (object) {
object_err(s, page, object,
"Freechain corrupt");
- set_freepointer(s, object, NULL);
+ set_freepointer(s, object, page->end);
break;
} else {
slab_err(s, page, "Freepointer corrupt");
- page->freelist = NULL;
+ page->freelist = page->end;
page->inuse = s->objects;
slab_fix(s, "Freelist cleared");
return 0;
@@ -870,7 +888,7 @@ bad:
*/
slab_fix(s, "Marking all objects used");
page->inuse = s->objects;
- page->freelist = NULL;
+ page->freelist = page->end;
}
return 0;
}
@@ -912,7 +930,7 @@ static noinline int free_debug_processin
}
/* Special debug activities for freeing objects */
- if (!SlabFrozen(page) && !page->freelist)
+ if (!SlabFrozen(page) && page->freelist == page->end)
remove_full(s, page);
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
@@ -1085,7 +1103,6 @@ static noinline struct page *new_slab(st
struct page *page;
struct kmem_cache_node *n;
void *start;
- void *end;
void *last;
void *p;
@@ -1106,7 +1123,7 @@ static noinline struct page *new_slab(st
SetSlabDebug(page);
start = page_address(page);
- end = start + s->objects * s->size;
+ page->end = start + 1;
if (unlikely(s->flags & SLAB_POISON))
memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1118,7 +1135,7 @@ static noinline struct page *new_slab(st
last = p;
}
setup_object(s, page, last);
- set_freepointer(s, last, NULL);
+ set_freepointer(s, last, page->end);
page->freelist = start;
page->inuse = 0;
@@ -1134,7 +1151,7 @@ static void __free_slab(struct kmem_cach
void *p;
slab_pad_check(s, page);
- for_each_object(p, s, page_address(page))
+ for_each_object(p, s, slab_address(page))
check_object(s, page, p, 0);
ClearSlabDebug(page);
}
@@ -1144,6 +1161,7 @@ static void __free_slab(struct kmem_cach
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
- pages);
+ page->mapping = NULL;
__free_pages(page, s->order);
}
@@ -1345,7 +1363,7 @@ static void unfreeze_slab(struct kmem_ca
ClearSlabFrozen(page);
if (page->inuse) {
- if (page->freelist)
+ if (page->freelist != page->end)
add_partial(s, page, tail);
else
add_full(s, page);
@@ -1382,8 +1400,12 @@ static void deactivate_slab(struct kmem_
* Merge cpu freelist into freelist. Typically we get here
* because both freelists are empty. So this is unlikely
* to occur.
+ *
+ * We need to use _is_end here because deactivate slab may
+ * be called for a debug slab. Then c->freelist may contain
+ * a dummy pointer.
*/
- while (unlikely(c->freelist)) {
+ while (unlikely(!is_end(c->freelist))) {
void **object;
tail = 0; /* Hot objects. Put the slab first */
@@ -1483,7 +1505,7 @@ static void *__slab_alloc(struct kmem_ca
goto another_slab;
load_freelist:
object = c->page->freelist;
- if (unlikely(!object))
+ if (unlikely(object == c->page->end))
goto another_slab;
if (unlikely(SlabDebug(c->page)))
goto debug;
@@ -1491,7 +1513,7 @@ load_freelist:
object = c->page->freelist;
c->freelist = object[c->offset];
c->page->inuse = s->objects;
- c->page->freelist = NULL;
+ c->page->freelist = c->page->end;
c->node = page_to_nid(c->page);
unlock_out:
slab_unlock(c->page);
@@ -1575,7 +1597,7 @@ static void __always_inline *slab_alloc(
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
- if (unlikely(!c->freelist || !node_match(c, node))) {
+ if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
if (unlikely(!object)) {
@@ -1642,7 +1664,7 @@ checks_ok:
* was not on the partial list before
* then add it.
*/
- if (unlikely(!prior))
+ if (unlikely(prior == page->end))
add_partial(s, page, 0);
out_unlock:
@@ -1650,7 +1672,7 @@ out_unlock:
return;
slab_empty:
- if (prior)
+ if (prior != page->end)
/*
* Slab still on the partial list.
*/
@@ -1870,7 +1892,7 @@ static void init_kmem_cache_cpu(struct k
struct kmem_cache_cpu *c)
{
c->page = NULL;
- c->freelist = NULL;
+ c->freelist = (void *)PAGE_MAPPING_ANON;
c->node = 0;
c->offset = s->offset / sizeof(void *);
c->objsize = s->objsize;
@@ -3107,7 +3129,7 @@ static int validate_slab(struct kmem_cac
unsigned long *map)
{
void *p;
- void *addr = page_address(page);
+ void *addr = slab_address(page);
if (!check_slab(s, page) ||
!on_freelist(s, page, NULL))
@@ -3387,7 +3409,7 @@ static int add_location(struct loc_track
static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc)
{
- void *addr = page_address(page);
+ void *addr = slab_address(page);
DECLARE_BITMAP(map, s->objects);
void *p;
Index: linux-2.6/include/linux/mm_types.h
===================================================================
--- linux-2.6.orig/include/linux/mm_types.h 2007-10-26 19:06:30.000000000 -0700
+++ linux-2.6/include/linux/mm_types.h 2007-10-26 19:09:04.000000000 -0700
@@ -64,7 +64,10 @@ struct page {
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
spinlock_t ptl;
#endif
- struct kmem_cache *slab; /* SLUB: Pointer to slab */
+ struct {
+ struct kmem_cache *slab; /* SLUB: Pointer to slab */
+ void *end; /* SLUB: end marker */
+ };
struct page *first_page; /* Compound tail pages */
};
union {
--
^ permalink raw reply [flat|nested] 35+ messages in thread* [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
` (5 preceding siblings ...)
2007-10-28 3:32 ` [patch 06/10] SLUB: Provide unique end marker for each slab Christoph Lameter
@ 2007-10-28 3:32 ` Christoph Lameter
2007-10-28 13:12 ` Pekka J Enberg
2007-10-30 18:38 ` Andrew Morton
2007-10-28 3:32 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Christoph Lameter
` (2 subsequent siblings)
9 siblings, 2 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:32 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_objects_in_per_cpu --]
[-- Type: text/plain, Size: 1742 bytes --]
There is the need to use the objects per slab in the first part of
__slab_alloc() which is still pretty hot. Copy the number of objects
per slab into the kmem_cache_cpu structure. That way we can get the
value from a cache line that we already need to touch. This brings
the kmem_cache_cpu structure up to 4 even words.
There is no increase in the size of kmem_cache_cpu since the size of object
is rounded to the next word.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
include/linux/slub_def.h | 1 +
mm/slub.c | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h 2007-10-26 19:09:16.000000000 -0700
+++ linux-2.6/include/linux/slub_def.h 2007-10-27 07:55:12.000000000 -0700
@@ -17,6 +17,7 @@ struct kmem_cache_cpu {
int node;
unsigned int offset;
unsigned int objsize;
+ unsigned int objects;
};
struct kmem_cache_node {
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-27 07:52:12.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-27 07:55:12.000000000 -0700
@@ -1512,7 +1512,7 @@ load_freelist:
object = c->page->freelist;
c->freelist = object[c->offset];
- c->page->inuse = s->objects;
+ c->page->inuse = c->objects;
c->page->freelist = c->page->end;
c->node = page_to_nid(c->page);
unlock_out:
@@ -1896,6 +1896,7 @@ static void init_kmem_cache_cpu(struct k
c->node = 0;
c->offset = s->offset / sizeof(void *);
c->objsize = s->objsize;
+ c->objects = s->objects;
}
static void init_kmem_cache_node(struct kmem_cache_node *n)
--
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc
2007-10-28 3:32 ` [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc Christoph Lameter
@ 2007-10-28 13:12 ` Pekka J Enberg
2007-10-30 18:38 ` Andrew Morton
1 sibling, 0 replies; 35+ messages in thread
From: Pekka J Enberg @ 2007-10-28 13:12 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sat, 27 Oct 2007, Christoph Lameter wrote:
> There is the need to use the objects per slab in the first part of
> __slab_alloc() which is still pretty hot. Copy the number of objects
> per slab into the kmem_cache_cpu structure. That way we can get the
> value from a cache line that we already need to touch. This brings
> the kmem_cache_cpu structure up to 4 even words.
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc
2007-10-28 3:32 ` [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc Christoph Lameter
2007-10-28 13:12 ` Pekka J Enberg
@ 2007-10-30 18:38 ` Andrew Morton
1 sibling, 0 replies; 35+ messages in thread
From: Andrew Morton @ 2007-10-30 18:38 UTC (permalink / raw)
To: Christoph Lameter; +Cc: matthew, linux-kernel, linux-mm, penberg
On Sat, 27 Oct 2007 20:32:03 -0700
Christoph Lameter <clameter@sgi.com> wrote:
> There is the need to use the objects per slab in the first part of
> __slab_alloc() which is still pretty hot. Copy the number of objects
> per slab into the kmem_cache_cpu structure. That way we can get the
> value from a cache line that we already need to touch. This brings
> the kmem_cache_cpu structure up to 4 even words.
>
> There is no increase in the size of kmem_cache_cpu since the size of object
> is rounded to the next word.
>
> Signed-off-by: Christoph Lameter <clameter@sgi.com>
>
> ---
> include/linux/slub_def.h | 1 +
> mm/slub.c | 3 ++-
> 2 files changed, 3 insertions(+), 1 deletion(-)
>
> Index: linux-2.6/include/linux/slub_def.h
> ===================================================================
> --- linux-2.6.orig/include/linux/slub_def.h 2007-10-26 19:09:16.000000000 -0700
> +++ linux-2.6/include/linux/slub_def.h 2007-10-27 07:55:12.000000000 -0700
> @@ -17,6 +17,7 @@ struct kmem_cache_cpu {
> int node;
> unsigned int offset;
> unsigned int objsize;
> + unsigned int objects;
> };
mutter. nr_objects would be a better name, but then one should rename
kmem_cache.objects too.
Better would be to comment the field. Please devote extra care to commenting
data structures.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
` (6 preceding siblings ...)
2007-10-28 3:32 ` [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc Christoph Lameter
@ 2007-10-28 3:32 ` Christoph Lameter
2007-10-28 13:05 ` Pekka J Enberg
` (2 more replies)
2007-10-28 3:32 ` [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock Christoph Lameter
2007-10-28 3:32 ` [patch 10/10] SLUB: Restructure slab alloc Christoph Lameter
9 siblings, 3 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:32 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_cmpxchg_local --]
[-- Type: text/plain, Size: 6352 bytes --]
Provide an alternate implementation of the SLUB fast paths for alloc
and free using cmpxchg_local. The cmpxchg_local fast path is selected
for arches that have CONFIG_FAST_CMPXCHG_LOCAL set. An arch should only
set CONFIG_FAST_CMPXCHG_LOCAL if the cmpxchg_local is faster than an
interrupt enable/disable sequence. This is known to be true for both
x86 platforms so set FAST_CMPXCHG_LOCAL for both arches.
Not all arches can support fast cmpxchg operations. Typically the
architecture must have an optimized cmpxchg instruction. The
cmpxchg fast path makes no sense on platforms whose cmpxchg is
slower than interrupt enable/disable (like f.e. IA64).
The advantages of a cmpxchg_local based fast path are:
1. Lower cycle count (30%-60% faster)
2. There is no need to disable and enable interrupts on the fast path.
Currently interrupts have to be disabled and enabled on every
slab operation. This is likely saving a significant percentage
of interrupt off / on sequences in the kernel.
3. The disposal of freed slabs can occur with interrupts enabled.
The alternate path is realized using #ifdef's. Several attempts to do the
same with macros and in line functions resulted in a mess (in particular due
to the strange way that local_interrupt_save() handles its argument and due
to the need to define macros/functions that sometimes disable interrupts
and sometimes do something else. The macro based approaches made it also
difficult to preserve the optimizations for the non cmpxchg paths).
#ifdef seems to be the way to go here to have a readable source.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
arch/x86/Kconfig.i386 | 4 ++
arch/x86/Kconfig.x86_64 | 4 ++
mm/slub.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 77 insertions(+), 2 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-27 10:39:07.583665939 -0700
+++ linux-2.6/mm/slub.c 2007-10-27 10:40:19.710415861 -0700
@@ -1496,7 +1496,12 @@ static void *__slab_alloc(struct kmem_ca
{
void **object;
struct page *new;
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ unsigned long flags;
+ local_irq_save(flags);
+ preempt_enable_no_resched();
+#endif
if (!c->page)
goto new_slab;
@@ -1518,6 +1523,10 @@ load_freelist:
unlock_out:
slab_unlock(c->page);
out:
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ preempt_disable();
+ local_irq_restore(flags);
+#endif
return object;
another_slab:
@@ -1592,9 +1601,26 @@ static void __always_inline *slab_alloc(
gfp_t gfpflags, int node, void *addr)
{
void **object;
- unsigned long flags;
struct kmem_cache_cpu *c;
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ c = get_cpu_slab(s, get_cpu());
+ do {
+ object = c->freelist;
+ if (unlikely(is_end(object) || !node_match(c, node))) {
+ object = __slab_alloc(s, gfpflags, node, addr, c);
+ if (unlikely(!object)) {
+ put_cpu();
+ goto out;
+ }
+ break;
+ }
+ } while (cmpxchg_local(&c->freelist, object, object[c->offset])
+ != object);
+ put_cpu();
+#else
+ unsigned long flags;
+
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
@@ -1609,6 +1635,7 @@ static void __always_inline *slab_alloc(
c->freelist = object[c->offset];
}
local_irq_restore(flags);
+#endif
if (unlikely((gfpflags & __GFP_ZERO)))
memset(object, 0, c->objsize);
@@ -1644,6 +1671,11 @@ static void __slab_free(struct kmem_cach
void *prior;
void **object = (void *)x;
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ unsigned long flags;
+
+ local_irq_save(flags);
+#endif
slab_lock(page);
if (unlikely(SlabDebug(page)))
@@ -1669,6 +1701,9 @@ checks_ok:
out_unlock:
slab_unlock(page);
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ local_irq_restore(flags);
+#endif
return;
slab_empty:
@@ -1679,6 +1714,9 @@ slab_empty:
remove_partial(s, page);
slab_unlock(page);
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ local_irq_restore(flags);
+#endif
discard_slab(s, page);
return;
@@ -1703,9 +1741,37 @@ static void __always_inline slab_free(st
struct page *page, void *x, void *addr)
{
void **object = (void *)x;
- unsigned long flags;
struct kmem_cache_cpu *c;
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ void **freelist;
+
+ c = get_cpu_slab(s, get_cpu());
+ debug_check_no_locks_freed(object, s->objsize);
+ do {
+ freelist = c->freelist;
+ barrier();
+ /*
+ * If the compiler would reorder the retrieval of c->page to
+ * come before c->freelist then an interrupt could
+ * change the cpu slab before we retrieve c->freelist. We
+ * could be matching on a page no longer active and put the
+ * object onto the freelist of the wrong slab.
+ *
+ * On the other hand: If we already have the freelist pointer
+ * then any change of cpu_slab will cause the cmpxchg to fail
+ * since the freelist pointers are unique per slab.
+ */
+ if (unlikely(page != c->page || c->node < 0)) {
+ __slab_free(s, page, x, addr, c->offset);
+ break;
+ }
+ object[c->offset] = freelist;
+ } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
+ put_cpu();
+#else
+ unsigned long flags;
+
local_irq_save(flags);
debug_check_no_locks_freed(object, s->objsize);
c = get_cpu_slab(s, smp_processor_id());
@@ -1716,6 +1782,7 @@ static void __always_inline slab_free(st
__slab_free(s, page, x, addr, c->offset);
local_irq_restore(flags);
+#endif
}
void kmem_cache_free(struct kmem_cache *s, void *x)
Index: linux-2.6/arch/x86/Kconfig.i386
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig.i386 2007-10-27 10:38:33.630415778 -0700
+++ linux-2.6/arch/x86/Kconfig.i386 2007-10-27 10:40:19.710415861 -0700
@@ -51,6 +51,10 @@ config X86
bool
default y
+config FAST_CMPXCHG_LOCAL
+ bool
+ default y
+
config MMU
bool
default y
Index: linux-2.6/arch/x86/Kconfig.x86_64
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig.x86_64 2007-10-27 10:38:33.630415778 -0700
+++ linux-2.6/arch/x86/Kconfig.x86_64 2007-10-27 10:40:19.710415861 -0700
@@ -97,6 +97,10 @@ config X86_CMPXCHG
bool
default y
+config FAST_CMPXCHG_LOCAL
+ bool
+ default y
+
config EARLY_PRINTK
bool
default y
--
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-28 3:32 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Christoph Lameter
@ 2007-10-28 13:05 ` Pekka J Enberg
2007-10-29 2:59 ` Christoph Lameter
` (2 more replies)
2007-10-30 18:49 ` Andrew Morton
2007-10-31 2:28 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Mathieu Desnoyers
2 siblings, 3 replies; 35+ messages in thread
From: Pekka J Enberg @ 2007-10-28 13:05 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sat, 27 Oct 2007, Christoph Lameter wrote:
> The alternate path is realized using #ifdef's. Several attempts to do the
> same with macros and in line functions resulted in a mess (in particular due
> to the strange way that local_interrupt_save() handles its argument and due
> to the need to define macros/functions that sometimes disable interrupts
> and sometimes do something else. The macro based approaches made it also
> difficult to preserve the optimizations for the non cmpxchg paths).
I think at least slub_alloc() and slub_free() can be made simpler. See the
included patch below.
> @@ -1496,7 +1496,12 @@ static void *__slab_alloc(struct kmem_ca
> {
> void **object;
> struct page *new;
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + unsigned long flags;
>
> + local_irq_save(flags);
> + preempt_enable_no_resched();
> +#endif
> if (!c->page)
> goto new_slab;
>
> @@ -1518,6 +1523,10 @@ load_freelist:
> unlock_out:
> slab_unlock(c->page);
> out:
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + preempt_disable();
> + local_irq_restore(flags);
> +#endif
> return object;
Can you please write a comment of the locking rules when cmpxchg_local()
is used? Looks as if we could push that local_irq_save() to slub_lock()
and local_irq_restore() to slub_unlock() and deal with the unused flags
variable for the non-CONFIG_FAST_CMPXCHG_LOCAL case with a macro, no?
Pekka
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
arch/x86/Kconfig.i386 | 4 +
arch/x86/Kconfig.x86_64 | 4 +
mm/slub.c | 140 +++++++++++++++++++++++++++++++++++++++---------
3 files changed, 122 insertions(+), 26 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -1496,7 +1496,12 @@ static void *__slab_alloc(struct kmem_ca
{
void **object;
struct page *new;
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ unsigned long flags;
+ local_irq_save(flags);
+ preempt_enable_no_resched();
+#endif
if (!c->page)
goto new_slab;
@@ -1518,6 +1523,10 @@ load_freelist:
unlock_out:
slab_unlock(c->page);
out:
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ preempt_disable();
+ local_irq_restore(flags);
+#endif
return object;
another_slab:
@@ -1578,6 +1587,45 @@ debug:
goto unlock_out;
}
+#ifdef CONFIG_FAST_CMPXHG_LOCAL
+static __always_inline void *do_slab_alloc(struct kmem_cache *s,
+ struct kmem_cache_cpu *c, gfp_t gfpflags, int node, void *addr)
+{
+ unsigned long flags;
+ void **object;
+
+ do {
+ object = c->freelist;
+ if (unlikely(is_end(object) || !node_match(c, node))) {
+ object = __slab_alloc(s, gfpflags, node, addr, c);
+ break;
+ }
+ } while (cmpxchg_local(&c->freelist, object, object[c->offset])
+ != object);
+ put_cpu();
+
+ return object;
+}
+#else
+
+static __always_inline void *do_slab_alloc(struct kmem_cache *s,
+ struct kmem_cache_cpu *c, gfp_t gfpflags, int node, void *addr)
+{
+ unsigned long flags;
+ void **object;
+
+ local_irq_save(flags);
+ if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
+ object = __slab_alloc(s, gfpflags, node, addr, c);
+ } else {
+ object = c->freelist;
+ c->freelist = object[c->offset];
+ }
+ local_irq_restore(flags);
+ return object;
+}
+#endif
+
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
@@ -1591,24 +1639,13 @@ debug:
static void __always_inline *slab_alloc(struct kmem_cache *s,
gfp_t gfpflags, int node, void *addr)
{
- void **object;
- unsigned long flags;
struct kmem_cache_cpu *c;
+ void **object;
- local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
- if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
-
- object = __slab_alloc(s, gfpflags, node, addr, c);
- if (unlikely(!object)) {
- local_irq_restore(flags);
- goto out;
- }
- } else {
- object = c->freelist;
- c->freelist = object[c->offset];
- }
- local_irq_restore(flags);
+ object = do_slab_alloc(s, c, gfpflags, node, addr);
+ if (unlikely(!object))
+ goto out;
if (unlikely((gfpflags & __GFP_ZERO)))
memset(object, 0, c->objsize);
@@ -1644,6 +1681,11 @@ static void __slab_free(struct kmem_cach
void *prior;
void **object = (void *)x;
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ unsigned long flags;
+
+ local_irq_save(flags);
+#endif
slab_lock(page);
if (unlikely(SlabDebug(page)))
@@ -1669,6 +1711,9 @@ checks_ok:
out_unlock:
slab_unlock(page);
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ local_irq_restore(flags);
+#endif
return;
slab_empty:
@@ -1679,6 +1724,9 @@ slab_empty:
remove_partial(s, page);
slab_unlock(page);
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+ local_irq_restore(flags);
+#endif
discard_slab(s, page);
return;
@@ -1688,6 +1736,56 @@ debug:
goto checks_ok;
}
+#ifdef CONFIG_FAST_CMPXCHG_LOCAL
+static __always_inline void do_slab_free(struct kmem_cache *s,
+ struct page *page, void **object, void *addr)
+{
+ struct kmem_cache_cpu *c;
+ void **freelist;
+
+ c = get_cpu_slab(s, get_cpu());
+ do {
+ freelist = c->freelist;
+ barrier();
+ /*
+ * If the compiler would reorder the retrieval of c->page to
+ * come before c->freelist then an interrupt could
+ * change the cpu slab before we retrieve c->freelist. We
+ * could be matching on a page no longer active and put the
+ * object onto the freelist of the wrong slab.
+ *
+ * On the other hand: If we already have the freelist pointer
+ * then any change of cpu_slab will cause the cmpxchg to fail
+ * since the freelist pointers are unique per slab.
+ */
+ if (unlikely(page != c->page || c->node < 0)) {
+ __slab_free(s, page, object, addr, c->offset);
+ break;
+ }
+ object[c->offset] = freelist;
+ } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
+ put_cpu();
+}
+#else
+
+static __always_inline void do_slab_free(struct kmem_cache *s,
+ struct page *page, void **object, void *addr)
+{
+ struct kmem_cache_cpu *c;
+ unsigned long flags;
+
+ c = get_cpu_slab(s, smp_processor_id());
+ local_irq_save(flags);
+ if (likely(page == c->page && c->node >= 0)) {
+ object[c->offset] = c->freelist;
+ c->freelist = object;
+ } else
+ __slab_free(s, page, object, addr, c->offset);
+
+ local_irq_restore(flags);
+}
+#endif
+
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
* can perform fastpath freeing without additional function calls.
@@ -1703,19 +1801,9 @@ static void __always_inline slab_free(st
struct page *page, void *x, void *addr)
{
void **object = (void *)x;
- unsigned long flags;
- struct kmem_cache_cpu *c;
- local_irq_save(flags);
debug_check_no_locks_freed(object, s->objsize);
- c = get_cpu_slab(s, smp_processor_id());
- if (likely(page == c->page && c->node >= 0)) {
- object[c->offset] = c->freelist;
- c->freelist = object;
- } else
- __slab_free(s, page, x, addr, c->offset);
-
- local_irq_restore(flags);
+ do_slab_free(s, page, object, addr);
}
void kmem_cache_free(struct kmem_cache *s, void *x)
Index: linux-2.6/arch/x86/Kconfig.i386
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig.i386
+++ linux-2.6/arch/x86/Kconfig.i386
@@ -51,6 +51,10 @@ config X86
bool
default y
+config FAST_CMPXCHG_LOCAL
+ bool
+ default y
+
config MMU
bool
default y
Index: linux-2.6/arch/x86/Kconfig.x86_64
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig.x86_64
+++ linux-2.6/arch/x86/Kconfig.x86_64
@@ -97,6 +97,10 @@ config X86_CMPXCHG
bool
default y
+config FAST_CMPXCHG_LOCAL
+ bool
+ default y
+
config EARLY_PRINTK
bool
default y
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-28 13:05 ` Pekka J Enberg
@ 2007-10-29 2:59 ` Christoph Lameter
2007-10-29 3:34 ` Christoph Lameter
2007-10-30 18:30 ` Andrew Morton
2 siblings, 0 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-29 2:59 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sun, 28 Oct 2007, Pekka J Enberg wrote:
> Can you please write a comment of the locking rules when cmpxchg_local()
> is used? Looks as if we could push that local_irq_save() to slub_lock()
> and local_irq_restore() to slub_unlock() and deal with the unused flags
> variable for the non-CONFIG_FAST_CMPXCHG_LOCAL case with a macro, no?
Hmmmm... Maybe .. The locking rules are not changed at all by this patch.
The cmpxchg_local is only used for the per cpu object list. The per cpu
slabs are frozen.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-28 13:05 ` Pekka J Enberg
2007-10-29 2:59 ` Christoph Lameter
@ 2007-10-29 3:34 ` Christoph Lameter
2007-10-30 18:30 ` Andrew Morton
2 siblings, 0 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-29 3:34 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sun, 28 Oct 2007, Pekka J Enberg wrote:
> - local_irq_restore(flags);
> + object = do_slab_alloc(s, c, gfpflags, node, addr);
> + if (unlikely(!object))
> + goto out;
Undoing the optimization that one of the earlier patches added.
The #ifdef version is for me at least easier to read. The code there is a
special unit that has to deal with the most performance critical piece of
the slab allocator. And the #ifdef there clarifies that any changes have
to be done to both branches.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-28 13:05 ` Pekka J Enberg
2007-10-29 2:59 ` Christoph Lameter
2007-10-29 3:34 ` Christoph Lameter
@ 2007-10-30 18:30 ` Andrew Morton
2 siblings, 0 replies; 35+ messages in thread
From: Andrew Morton @ 2007-10-30 18:30 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: clameter, matthew, linux-kernel, linux-mm
On Sun, 28 Oct 2007 15:05:50 +0200 (EET)
Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> On Sat, 27 Oct 2007, Christoph Lameter wrote:
> > The alternate path is realized using #ifdef's. Several attempts to do the
> > same with macros and in line functions resulted in a mess (in particular due
> > to the strange way that local_interrupt_save() handles its argument and due
> > to the need to define macros/functions that sometimes disable interrupts
> > and sometimes do something else. The macro based approaches made it also
> > difficult to preserve the optimizations for the non cmpxchg paths).
>
> I think at least slub_alloc() and slub_free() can be made simpler. See the
> included patch below.
Both versions look pretty crappy to me. The code duplication in the two
version of do_slab_alloc() could be tidied up considerably.
> +#ifdef CONFIG_FAST_CMPXHG_LOCAL
> +static __always_inline void *do_slab_alloc(struct kmem_cache *s,
> + struct kmem_cache_cpu *c, gfp_t gfpflags, int node, void *addr)
> +{
> + unsigned long flags;
> + void **object;
> +
> + do {
> + object = c->freelist;
> + if (unlikely(is_end(object) || !node_match(c, node))) {
> + object = __slab_alloc(s, gfpflags, node, addr, c);
> + break;
> + }
> + } while (cmpxchg_local(&c->freelist, object, object[c->offset])
> + != object);
> + put_cpu();
> +
> + return object;
> +}
Unmatched put_cpu()
> +
> +static __always_inline void *do_slab_alloc(struct kmem_cache *s,
> + struct kmem_cache_cpu *c, gfp_t gfpflags, int node, void *addr)
> +{
> + unsigned long flags;
> + void **object;
> +
> + local_irq_save(flags);
> + if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
> + object = __slab_alloc(s, gfpflags, node, addr, c);
> + } else {
> + object = c->freelist;
> + c->freelist = object[c->offset];
> + }
> + local_irq_restore(flags);
> + return object;
> +}
> +#endif
> +
> /*
> * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
> * have the fastpath folded into their functions. So no function call
> @@ -1591,24 +1639,13 @@ debug:
> static void __always_inline *slab_alloc(struct kmem_cache *s,
> gfp_t gfpflags, int node, void *addr)
> {
> - void **object;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
> + void **object;
>
> - local_irq_save(flags);
> c = get_cpu_slab(s, smp_processor_id());
smp_processor_id() in preemptible code.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-28 3:32 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Christoph Lameter
2007-10-28 13:05 ` Pekka J Enberg
@ 2007-10-30 18:49 ` Andrew Morton
2007-10-30 18:58 ` Christoph Lameter
2007-10-31 2:28 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Mathieu Desnoyers
2 siblings, 1 reply; 35+ messages in thread
From: Andrew Morton @ 2007-10-30 18:49 UTC (permalink / raw)
To: Christoph Lameter; +Cc: matthew, linux-kernel, linux-mm, penberg, linux-arch
On Sat, 27 Oct 2007 20:32:04 -0700
Christoph Lameter <clameter@sgi.com> wrote:
> Provide an alternate implementation of the SLUB fast paths for alloc
> and free using cmpxchg_local. The cmpxchg_local fast path is selected
> for arches that have CONFIG_FAST_CMPXCHG_LOCAL set. An arch should only
> set CONFIG_FAST_CMPXCHG_LOCAL if the cmpxchg_local is faster than an
> interrupt enable/disable sequence. This is known to be true for both
> x86 platforms so set FAST_CMPXCHG_LOCAL for both arches.
>
> Not all arches can support fast cmpxchg operations. Typically the
> architecture must have an optimized cmpxchg instruction. The
> cmpxchg fast path makes no sense on platforms whose cmpxchg is
> slower than interrupt enable/disable (like f.e. IA64).
>
> The advantages of a cmpxchg_local based fast path are:
>
> 1. Lower cycle count (30%-60% faster)
>
> 2. There is no need to disable and enable interrupts on the fast path.
> Currently interrupts have to be disabled and enabled on every
> slab operation. This is likely saving a significant percentage
> of interrupt off / on sequences in the kernel.
>
> 3. The disposal of freed slabs can occur with interrupts enabled.
>
> The alternate path is realized using #ifdef's. Several attempts to do the
> same with macros and in line functions resulted in a mess (in particular due
> to the strange way that local_interrupt_save() handles its argument and due
> to the need to define macros/functions that sometimes disable interrupts
> and sometimes do something else. The macro based approaches made it also
> difficult to preserve the optimizations for the non cmpxchg paths).
>
> #ifdef seems to be the way to go here to have a readable source.
>
>
> ---
> arch/x86/Kconfig.i386 | 4 ++
> arch/x86/Kconfig.x86_64 | 4 ++
> mm/slub.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++--
Let's cc linux-arch: presumably other architectures can implement cpu-local
cmpxchg and would see some benefit from doing so.
The semantics are "atomic wrt interrutps on this cpu, not atomic wrt other
cpus", yes?
Do you have a feel for how useful it would be for arch maintainers to implement
this? IOW, is it worth their time?
>
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c 2007-10-27 10:39:07.583665939 -0700
> +++ linux-2.6/mm/slub.c 2007-10-27 10:40:19.710415861 -0700
> @@ -1496,7 +1496,12 @@ static void *__slab_alloc(struct kmem_ca
> {
> void **object;
> struct page *new;
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + unsigned long flags;
>
> + local_irq_save(flags);
> + preempt_enable_no_resched();
> +#endif
> if (!c->page)
> goto new_slab;
>
> @@ -1518,6 +1523,10 @@ load_freelist:
> unlock_out:
> slab_unlock(c->page);
> out:
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + preempt_disable();
> + local_irq_restore(flags);
> +#endif
> return object;
>
> another_slab:
> @@ -1592,9 +1601,26 @@ static void __always_inline *slab_alloc(
> gfp_t gfpflags, int node, void *addr)
> {
> void **object;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
>
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + c = get_cpu_slab(s, get_cpu());
> + do {
> + object = c->freelist;
> + if (unlikely(is_end(object) || !node_match(c, node))) {
> + object = __slab_alloc(s, gfpflags, node, addr, c);
> + if (unlikely(!object)) {
> + put_cpu();
> + goto out;
> + }
> + break;
> + }
> + } while (cmpxchg_local(&c->freelist, object, object[c->offset])
> + != object);
> + put_cpu();
> +#else
> + unsigned long flags;
> +
> local_irq_save(flags);
> c = get_cpu_slab(s, smp_processor_id());
> if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
> @@ -1609,6 +1635,7 @@ static void __always_inline *slab_alloc(
> c->freelist = object[c->offset];
> }
> local_irq_restore(flags);
> +#endif
>
> if (unlikely((gfpflags & __GFP_ZERO)))
> memset(object, 0, c->objsize);
> @@ -1644,6 +1671,11 @@ static void __slab_free(struct kmem_cach
> void *prior;
> void **object = (void *)x;
>
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + unsigned long flags;
> +
> + local_irq_save(flags);
> +#endif
> slab_lock(page);
>
> if (unlikely(SlabDebug(page)))
> @@ -1669,6 +1701,9 @@ checks_ok:
>
> out_unlock:
> slab_unlock(page);
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + local_irq_restore(flags);
> +#endif
> return;
>
> slab_empty:
> @@ -1679,6 +1714,9 @@ slab_empty:
> remove_partial(s, page);
>
> slab_unlock(page);
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + local_irq_restore(flags);
> +#endif
> discard_slab(s, page);
> return;
>
> @@ -1703,9 +1741,37 @@ static void __always_inline slab_free(st
> struct page *page, void *x, void *addr)
> {
> void **object = (void *)x;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
>
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + void **freelist;
> +
> + c = get_cpu_slab(s, get_cpu());
> + debug_check_no_locks_freed(object, s->objsize);
> + do {
> + freelist = c->freelist;
> + barrier();
> + /*
> + * If the compiler would reorder the retrieval of c->page to
> + * come before c->freelist then an interrupt could
> + * change the cpu slab before we retrieve c->freelist. We
> + * could be matching on a page no longer active and put the
> + * object onto the freelist of the wrong slab.
> + *
> + * On the other hand: If we already have the freelist pointer
> + * then any change of cpu_slab will cause the cmpxchg to fail
> + * since the freelist pointers are unique per slab.
> + */
> + if (unlikely(page != c->page || c->node < 0)) {
> + __slab_free(s, page, x, addr, c->offset);
> + break;
> + }
> + object[c->offset] = freelist;
> + } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
> + put_cpu();
> +#else
> + unsigned long flags;
> +
> local_irq_save(flags);
> debug_check_no_locks_freed(object, s->objsize);
> c = get_cpu_slab(s, smp_processor_id());
> @@ -1716,6 +1782,7 @@ static void __always_inline slab_free(st
> __slab_free(s, page, x, addr, c->offset);
>
> local_irq_restore(flags);
> +#endif
> }
>
> void kmem_cache_free(struct kmem_cache *s, void *x)
> Index: linux-2.6/arch/x86/Kconfig.i386
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig.i386 2007-10-27 10:38:33.630415778 -0700
> +++ linux-2.6/arch/x86/Kconfig.i386 2007-10-27 10:40:19.710415861 -0700
> @@ -51,6 +51,10 @@ config X86
> bool
> default y
>
> +config FAST_CMPXCHG_LOCAL
> + bool
> + default y
> +
> config MMU
> bool
> default y
> Index: linux-2.6/arch/x86/Kconfig.x86_64
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig.x86_64 2007-10-27 10:38:33.630415778 -0700
> +++ linux-2.6/arch/x86/Kconfig.x86_64 2007-10-27 10:40:19.710415861 -0700
> @@ -97,6 +97,10 @@ config X86_CMPXCHG
> bool
> default y
>
> +config FAST_CMPXCHG_LOCAL
> + bool
> + default y
> +
> config EARLY_PRINTK
> bool
> default y
>
> --
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-30 18:49 ` Andrew Morton
@ 2007-10-30 18:58 ` Christoph Lameter
2007-10-30 19:12 ` Mathieu Desnoyers
2007-10-31 1:52 ` [PATCH] local_t Documentation update 2 Mathieu Desnoyers
0 siblings, 2 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-30 18:58 UTC (permalink / raw)
To: Andrew Morton
Cc: matthew, linux-kernel, linux-mm, penberg, Mathieu Desnoyers,
linux-arch
On Tue, 30 Oct 2007, Andrew Morton wrote:
> Let's cc linux-arch: presumably other architectures can implement cpu-local
> cmpxchg and would see some benefit from doing so.
Matheiu had a whole series of cmpxchg_local patches. Ccing him too. I
think he has some numbers for other architectures.
> The semantics are "atomic wrt interrutps on this cpu, not atomic wrt other
> cpus", yes?
Right.
> Do you have a feel for how useful it would be for arch maintainers to implement
> this? IOW, is it worth their time?
That depends on the efficiency of a cmpxchg_local vs. the interrupt
enable/ disable sequence on a particular arch. On x86 this yields about
50% so it doubles the speed of the fastpath. On other architectures the
cmpxchg is so slow that it is not worth it (ia64 f.e.)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-30 18:58 ` Christoph Lameter
@ 2007-10-30 19:12 ` Mathieu Desnoyers
2007-10-31 1:52 ` [PATCH] local_t Documentation update 2 Mathieu Desnoyers
1 sibling, 0 replies; 35+ messages in thread
From: Mathieu Desnoyers @ 2007-10-30 19:12 UTC (permalink / raw)
To: Christoph Lameter
Cc: Andrew Morton, matthew, linux-kernel, linux-mm, penberg,
linux-arch
* Christoph Lameter (clameter@sgi.com) wrote:
> On Tue, 30 Oct 2007, Andrew Morton wrote:
>
> > Let's cc linux-arch: presumably other architectures can implement cpu-local
> > cmpxchg and would see some benefit from doing so.
>
> Matheiu had a whole series of cmpxchg_local patches. Ccing him too. I
> think he has some numbers for other architectures.
>
Well, I tested it on x86 and AMD64 only. For slub:
Using cmpxchg_local shows a performance improvements of the fast path
goes from a 66% speedup on a Pentium 4 to a 14% speedup on AMD64.
It really depends on how fast cmpxchg_local is vs disabling interrupts.
> > The semantics are "atomic wrt interrutps on this cpu, not atomic wrt other
> > cpus", yes?
>
> Right.
>
> > Do you have a feel for how useful it would be for arch maintainers to implement
> > this? IOW, is it worth their time?
>
> That depends on the efficiency of a cmpxchg_local vs. the interrupt
> enable/ disable sequence on a particular arch. On x86 this yields about
> 50% so it doubles the speed of the fastpath. On other architectures the
> cmpxchg is so slow that it is not worth it (ia64 f.e.)
As Christoph pointed out, we even saw a small slowdown on ia64 because
there is no concept of atomicity wrt only one CPU. Emulating this with
irq disable has been tried, but just the supplementary memory barriers
hurts performance a bit. We tried to come up with clever macros that
switch between irq disable and cmpxchg_local depending on the
architecture, but all the results were awkward.
I guess it's time for me to repost my patchset. I use interrupt disable
to emulate the cmpxchg_local on architectures that lacks atomic ops.
# cmpxchg_local and cmpxchg64_local standardization
add-cmpxchg-local-to-generic-for-up.patch
i386-cmpxchg64-80386-80486-fallback.patch
add-cmpxchg64-to-alpha.patch
add-cmpxchg64-to-mips.patch
add-cmpxchg64-to-powerpc.patch
add-cmpxchg64-to-x86_64.patch
#
add-cmpxchg-local-to-arm.patch
add-cmpxchg-local-to-avr32.patch
add-cmpxchg-local-to-blackfin.patch
add-cmpxchg-local-to-cris.patch
add-cmpxchg-local-to-frv.patch
add-cmpxchg-local-to-h8300.patch
add-cmpxchg-local-to-ia64.patch
add-cmpxchg-local-to-m32r.patch
fix-m32r-__xchg.patch
fix-m32r-include-sched-h-in-smpboot.patch
local_t_m32r_optimized.patch
add-cmpxchg-local-to-m68k.patch
add-cmpxchg-local-to-m68knommu.patch
add-cmpxchg-local-to-parisc.patch
add-cmpxchg-local-to-ppc.patch
add-cmpxchg-local-to-s390.patch
add-cmpxchg-local-to-sh.patch
add-cmpxchg-local-to-sh64.patch
add-cmpxchg-local-to-sparc.patch
add-cmpxchg-local-to-sparc64.patch
add-cmpxchg-local-to-v850.patch
add-cmpxchg-local-to-xtensa.patch
#
slub-use-cmpxchg-local.patch
Mathieu
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH] local_t Documentation update 2
2007-10-30 18:58 ` Christoph Lameter
2007-10-30 19:12 ` Mathieu Desnoyers
@ 2007-10-31 1:52 ` Mathieu Desnoyers
1 sibling, 0 replies; 35+ messages in thread
From: Mathieu Desnoyers @ 2007-10-31 1:52 UTC (permalink / raw)
To: Andrew Morton
Cc: matthew, linux-kernel, linux-mm, penberg, linux-arch,
Christoph Lameter
local_t Documentation update 2
(this patch seems to have fallen off the grid, but is still providing
useful information. It applies to 2.6.23-mm1.)
Grant Grundler was asking for more detail about correct usage of local atomic
operations and suggested adding the resulting summary to local_ops.txt.
"Please add a bit more detail. If DaveM is correct (he normally is), then
there must be limits on how the local_t can be used in the kernel process
and interrupt contexts. I'd like those rules spelled out very clearly
since it's easy to get wrong and tracking down such a bug is quite painful."
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Grant Grundler <grundler@parisc-linux.org>
---
Documentation/local_ops.txt | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
Index: linux-2.6-lttng/Documentation/local_ops.txt
===================================================================
--- linux-2.6-lttng.orig/Documentation/local_ops.txt 2007-09-04 11:53:23.000000000 -0400
+++ linux-2.6-lttng/Documentation/local_ops.txt 2007-09-04 12:19:31.000000000 -0400
@@ -68,6 +68,29 @@ typedef struct { atomic_long_t a; } loca
variable can be read when reading some _other_ cpu's variables.
+* Rules to follow when using local atomic operations
+
+- Variables touched by local ops must be per cpu variables.
+- _Only_ the CPU owner of these variables must write to them.
+- This CPU can use local ops from any context (process, irq, softirq, nmi, ...)
+ to update its local_t variables.
+- Preemption (or interrupts) must be disabled when using local ops in
+ process context to make sure the process won't be migrated to a
+ different CPU between getting the per-cpu variable and doing the
+ actual local op.
+- When using local ops in interrupt context, no special care must be
+ taken on a mainline kernel, since they will run on the local CPU with
+ preemption already disabled. I suggest, however, to explicitly
+ disable preemption anyway to make sure it will still work correctly on
+ -rt kernels.
+- Reading the local cpu variable will provide the current copy of the
+ variable.
+- Reads of these variables can be done from any CPU, because updates to
+ "long", aligned, variables are always atomic. Since no memory
+ synchronization is done by the writer CPU, an outdated copy of the
+ variable can be read when reading some _other_ cpu's variables.
+
+
* How to use local atomic operations
#include <linux/percpu.h>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
2007-10-28 3:32 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Christoph Lameter
2007-10-28 13:05 ` Pekka J Enberg
2007-10-30 18:49 ` Andrew Morton
@ 2007-10-31 2:28 ` Mathieu Desnoyers
2 siblings, 0 replies; 35+ messages in thread
From: Mathieu Desnoyers @ 2007-10-31 2:28 UTC (permalink / raw)
To: Christoph Lameter
Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm, Pekka Enberg
* Christoph Lameter (clameter@sgi.com) wrote:
> Provide an alternate implementation of the SLUB fast paths for alloc
> and free using cmpxchg_local. The cmpxchg_local fast path is selected
> for arches that have CONFIG_FAST_CMPXCHG_LOCAL set. An arch should only
> set CONFIG_FAST_CMPXCHG_LOCAL if the cmpxchg_local is faster than an
> interrupt enable/disable sequence. This is known to be true for both
> x86 platforms so set FAST_CMPXCHG_LOCAL for both arches.
>
> Not all arches can support fast cmpxchg operations. Typically the
> architecture must have an optimized cmpxchg instruction. The
> cmpxchg fast path makes no sense on platforms whose cmpxchg is
> slower than interrupt enable/disable (like f.e. IA64).
>
> The advantages of a cmpxchg_local based fast path are:
>
> 1. Lower cycle count (30%-60% faster)
>
> 2. There is no need to disable and enable interrupts on the fast path.
> Currently interrupts have to be disabled and enabled on every
> slab operation. This is likely saving a significant percentage
> of interrupt off / on sequences in the kernel.
>
> 3. The disposal of freed slabs can occur with interrupts enabled.
>
It would require some testing, but I suspect that powerpc, mips and m32r
are three other architectures that could benefit from this (from the top
of my head)
Mathieu
> The alternate path is realized using #ifdef's. Several attempts to do the
> same with macros and in line functions resulted in a mess (in particular due
> to the strange way that local_interrupt_save() handles its argument and due
> to the need to define macros/functions that sometimes disable interrupts
> and sometimes do something else. The macro based approaches made it also
> difficult to preserve the optimizations for the non cmpxchg paths).
>
> #ifdef seems to be the way to go here to have a readable source.
>
> Signed-off-by: Christoph Lameter <clameter@sgi.com>
>
> ---
> arch/x86/Kconfig.i386 | 4 ++
> arch/x86/Kconfig.x86_64 | 4 ++
> mm/slub.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++--
> 3 files changed, 77 insertions(+), 2 deletions(-)
>
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c 2007-10-27 10:39:07.583665939 -0700
> +++ linux-2.6/mm/slub.c 2007-10-27 10:40:19.710415861 -0700
> @@ -1496,7 +1496,12 @@ static void *__slab_alloc(struct kmem_ca
> {
> void **object;
> struct page *new;
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + unsigned long flags;
>
> + local_irq_save(flags);
> + preempt_enable_no_resched();
> +#endif
> if (!c->page)
> goto new_slab;
>
> @@ -1518,6 +1523,10 @@ load_freelist:
> unlock_out:
> slab_unlock(c->page);
> out:
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + preempt_disable();
> + local_irq_restore(flags);
> +#endif
> return object;
>
> another_slab:
> @@ -1592,9 +1601,26 @@ static void __always_inline *slab_alloc(
> gfp_t gfpflags, int node, void *addr)
> {
> void **object;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
>
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + c = get_cpu_slab(s, get_cpu());
> + do {
> + object = c->freelist;
> + if (unlikely(is_end(object) || !node_match(c, node))) {
> + object = __slab_alloc(s, gfpflags, node, addr, c);
> + if (unlikely(!object)) {
> + put_cpu();
> + goto out;
> + }
> + break;
> + }
> + } while (cmpxchg_local(&c->freelist, object, object[c->offset])
> + != object);
> + put_cpu();
> +#else
> + unsigned long flags;
> +
> local_irq_save(flags);
> c = get_cpu_slab(s, smp_processor_id());
> if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
> @@ -1609,6 +1635,7 @@ static void __always_inline *slab_alloc(
> c->freelist = object[c->offset];
> }
> local_irq_restore(flags);
> +#endif
>
> if (unlikely((gfpflags & __GFP_ZERO)))
> memset(object, 0, c->objsize);
> @@ -1644,6 +1671,11 @@ static void __slab_free(struct kmem_cach
> void *prior;
> void **object = (void *)x;
>
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + unsigned long flags;
> +
> + local_irq_save(flags);
> +#endif
> slab_lock(page);
>
> if (unlikely(SlabDebug(page)))
> @@ -1669,6 +1701,9 @@ checks_ok:
>
> out_unlock:
> slab_unlock(page);
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + local_irq_restore(flags);
> +#endif
> return;
>
> slab_empty:
> @@ -1679,6 +1714,9 @@ slab_empty:
> remove_partial(s, page);
>
> slab_unlock(page);
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + local_irq_restore(flags);
> +#endif
> discard_slab(s, page);
> return;
>
> @@ -1703,9 +1741,37 @@ static void __always_inline slab_free(st
> struct page *page, void *x, void *addr)
> {
> void **object = (void *)x;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
>
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> + void **freelist;
> +
> + c = get_cpu_slab(s, get_cpu());
> + debug_check_no_locks_freed(object, s->objsize);
> + do {
> + freelist = c->freelist;
> + barrier();
> + /*
> + * If the compiler would reorder the retrieval of c->page to
> + * come before c->freelist then an interrupt could
> + * change the cpu slab before we retrieve c->freelist. We
> + * could be matching on a page no longer active and put the
> + * object onto the freelist of the wrong slab.
> + *
> + * On the other hand: If we already have the freelist pointer
> + * then any change of cpu_slab will cause the cmpxchg to fail
> + * since the freelist pointers are unique per slab.
> + */
> + if (unlikely(page != c->page || c->node < 0)) {
> + __slab_free(s, page, x, addr, c->offset);
> + break;
> + }
> + object[c->offset] = freelist;
> + } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
> + put_cpu();
> +#else
> + unsigned long flags;
> +
> local_irq_save(flags);
> debug_check_no_locks_freed(object, s->objsize);
> c = get_cpu_slab(s, smp_processor_id());
> @@ -1716,6 +1782,7 @@ static void __always_inline slab_free(st
> __slab_free(s, page, x, addr, c->offset);
>
> local_irq_restore(flags);
> +#endif
> }
>
> void kmem_cache_free(struct kmem_cache *s, void *x)
> Index: linux-2.6/arch/x86/Kconfig.i386
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig.i386 2007-10-27 10:38:33.630415778 -0700
> +++ linux-2.6/arch/x86/Kconfig.i386 2007-10-27 10:40:19.710415861 -0700
> @@ -51,6 +51,10 @@ config X86
> bool
> default y
>
> +config FAST_CMPXCHG_LOCAL
> + bool
> + default y
> +
> config MMU
> bool
> default y
> Index: linux-2.6/arch/x86/Kconfig.x86_64
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig.x86_64 2007-10-27 10:38:33.630415778 -0700
> +++ linux-2.6/arch/x86/Kconfig.x86_64 2007-10-27 10:40:19.710415861 -0700
> @@ -97,6 +97,10 @@ config X86_CMPXCHG
> bool
> default y
>
> +config FAST_CMPXCHG_LOCAL
> + bool
> + default y
> +
> config EARLY_PRINTK
> bool
> default y
>
> --
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
` (7 preceding siblings ...)
2007-10-28 3:32 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Christoph Lameter
@ 2007-10-28 3:32 ` Christoph Lameter
2007-10-28 15:10 ` Pekka J Enberg
2007-10-30 4:50 ` Nick Piggin
2007-10-28 3:32 ` [patch 10/10] SLUB: Restructure slab alloc Christoph Lameter
9 siblings, 2 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:32 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_ownlocking --]
[-- Type: text/plain, Size: 17467 bytes --]
Too many troubles with the bitlocks and we really do not need
to do any bitops. Bitops do not effectively retrieve the old
value which we want. So use a cmpxchg instead on the arches
that allow it.
Instead of modifying the page->flags with fake atomic operations
we pass the page state as a parameter to functions. No function uses
the slab state if the page lock is held. Function must wait until the
lock is cleared. Thus we can defer the update of page->flags until slab
processing is complete. The "unlock" operation is then simply updating
page->flags.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 324 +++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 187 insertions(+), 137 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-27 07:56:03.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-27 07:56:52.000000000 -0700
@@ -101,6 +101,7 @@
*/
#define FROZEN (1 << PG_active)
+#define LOCKED (1 << PG_locked)
#ifdef CONFIG_SLUB_DEBUG
#define SLABDEBUG (1 << PG_error)
@@ -108,36 +109,6 @@
#define SLABDEBUG 0
#endif
-static inline int SlabFrozen(struct page *page)
-{
- return page->flags & FROZEN;
-}
-
-static inline void SetSlabFrozen(struct page *page)
-{
- page->flags |= FROZEN;
-}
-
-static inline void ClearSlabFrozen(struct page *page)
-{
- page->flags &= ~FROZEN;
-}
-
-static inline int SlabDebug(struct page *page)
-{
- return page->flags & SLABDEBUG;
-}
-
-static inline void SetSlabDebug(struct page *page)
-{
- page->flags |= SLABDEBUG;
-}
-
-static inline void ClearSlabDebug(struct page *page)
-{
- page->flags &= ~SLABDEBUG;
-}
-
/*
* Issues still to be resolved:
*
@@ -818,11 +789,12 @@ static void trace(struct kmem_cache *s,
/*
* Tracking of fully allocated slabs for debugging purposes.
*/
-static void add_full(struct kmem_cache *s, struct page *page)
+static void add_full(struct kmem_cache *s, struct page *page,
+ unsigned long state)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
- if (!SlabDebug(page) || !(s->flags & SLAB_STORE_USER))
+ if (!(state & SLABDEBUG) || !(s->flags & SLAB_STORE_USER))
return;
spin_lock(&n->list_lock);
list_add(&page->lru, &n->full);
@@ -894,7 +866,7 @@ bad:
}
static noinline int free_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, void *addr)
+ struct page *page, void *object, void *addr, unsigned long state)
{
if (!check_slab(s, page))
goto fail;
@@ -930,7 +902,7 @@ static noinline int free_debug_processin
}
/* Special debug activities for freeing objects */
- if (!SlabFrozen(page) && page->freelist == page->end)
+ if (!(state & FROZEN) && page->freelist == page->end)
remove_full(s, page);
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
@@ -1047,7 +1019,8 @@ static inline int slab_pad_check(struct
{ return 1; }
static inline int check_object(struct kmem_cache *s, struct page *page,
void *object, int active) { return 1; }
-static inline void add_full(struct kmem_cache *s, struct page *page) {}
+static inline void add_full(struct kmem_cache *s, struct page *page,
+ unsigned long state) {}
static inline unsigned long kmem_cache_flags(unsigned long objsize,
unsigned long flags, const char *name,
void (*ctor)(struct kmem_cache *, void *))
@@ -1105,6 +1078,7 @@ static noinline struct page *new_slab(st
void *start;
void *last;
void *p;
+ unsigned long state;
BUG_ON(flags & GFP_SLAB_BUG_MASK);
@@ -1117,11 +1091,12 @@ static noinline struct page *new_slab(st
if (n)
atomic_long_inc(&n->nr_slabs);
page->slab = s;
- page->flags |= 1 << PG_slab;
+ state = 1 << PG_slab;
if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
SLAB_STORE_USER | SLAB_TRACE))
- SetSlabDebug(page);
+ state |= SLABDEBUG;
+ page->flags |= state;
start = page_address(page);
page->end = start + 1;
@@ -1147,13 +1122,13 @@ static void __free_slab(struct kmem_cach
{
int pages = 1 << s->order;
- if (unlikely(SlabDebug(page))) {
+ if (unlikely(page->flags & SLABDEBUG)) {
void *p;
slab_pad_check(s, page);
for_each_object(p, s, slab_address(page))
check_object(s, page, p, 0);
- ClearSlabDebug(page);
+ page->flags &= ~SLABDEBUG;
}
mod_zone_page_state(page_zone(page),
@@ -1196,27 +1171,73 @@ static void discard_slab(struct kmem_cac
free_slab(s, page);
}
+#ifdef __HAVE_ARCH_CMPXCHG
/*
* Per slab locking using the pagelock
*/
-static __always_inline void slab_lock(struct page *page)
+static __always_inline void slab_unlock(struct page *page,
+ unsigned long state)
{
- bit_spin_lock(PG_locked, &page->flags);
+ smp_wmb();
+ page->flags = state;
+ preempt_enable();
+ __release(bitlock);
+}
+
+static __always_inline unsigned long slab_trylock(struct page *page)
+{
+ unsigned long state;
+
+ preempt_disable();
+ state = page->flags & ~LOCKED;
+#ifdef CONFIG_SMP
+ if (cmpxchg(&page->flags, state, state | LOCKED) != state) {
+ preempt_enable();
+ return 0;
+ }
+#endif
+ __acquire(bitlock);
+ return state;
}
-static __always_inline void slab_unlock(struct page *page)
+static __always_inline unsigned long slab_lock(struct page *page)
{
- bit_spin_unlock(PG_locked, &page->flags);
+ unsigned long state;
+
+ preempt_disable();
+#ifdef CONFIG_SMP
+ do {
+ state = page->flags & ~LOCKED;
+ } while (cmpxchg(&page->flags, state, state | LOCKED) != state);
+#else
+ state = page->flags & ~LOCKED;
+#endif
+ __acquire(bitlock);
+ return state;
}
-static __always_inline int slab_trylock(struct page *page)
+#else
+static __always_inline void slab_unlock(struct page *page,
+ unsigned long state)
{
- int rc = 1;
+ page->flags = state;
+ __bit_spin_unlock(PG_locked, &page->flags);
+}
- rc = bit_spin_trylock(PG_locked, &page->flags);
- return rc;
+static __always_inline unsigned long slab_trylock(struct page *page)
+{
+ if (!bit_spin_trylock(PG_locked, &page->flags))
+ return 0;
+ return page->flags;
}
+static __always_inline unsigned long slab_lock(struct page *page)
+{
+ bit_spin_lock(PG_locked, &page->flags);
+ return page->flags;
+}
+#endif
+
/*
* Management of partially allocated slabs
*/
@@ -1250,13 +1271,17 @@ static noinline void remove_partial(stru
*
* Must hold list_lock.
*/
-static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
+static inline unsigned long lock_and_freeze_slab(struct kmem_cache_node *n,
+ struct kmem_cache_cpu *c, struct page *page)
{
- if (slab_trylock(page)) {
+ unsigned long state;
+
+ state = slab_trylock(page);
+ if (state) {
list_del(&page->lru);
n->nr_partial--;
- SetSlabFrozen(page);
- return 1;
+ c->page = page;
+ return state | FROZEN;
}
return 0;
}
@@ -1264,9 +1289,11 @@ static inline int lock_and_freeze_slab(s
/*
* Try to allocate a partial slab from a specific node.
*/
-static struct page *get_partial_node(struct kmem_cache_node *n)
+static unsigned long get_partial_node(struct kmem_cache_node *n,
+ struct kmem_cache_cpu *c)
{
struct page *page;
+ unsigned long state;
/*
* Racy check. If we mistakenly see no partial slabs then we
@@ -1275,27 +1302,30 @@ static struct page *get_partial_node(str
* will return NULL.
*/
if (!n || !n->nr_partial)
- return NULL;
+ return 0;
spin_lock(&n->list_lock);
- list_for_each_entry(page, &n->partial, lru)
- if (lock_and_freeze_slab(n, page))
+ list_for_each_entry(page, &n->partial, lru) {
+ state = lock_and_freeze_slab(n, c, page);
+ if (state)
goto out;
- page = NULL;
+ }
+ state = 0;
out:
spin_unlock(&n->list_lock);
- return page;
+ return state;
}
/*
* Get a page from somewhere. Search in increasing NUMA distances.
*/
-static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
+static unsigned long get_any_partial(struct kmem_cache *s,
+ struct kmem_cache_cpu *c, gfp_t flags)
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
struct zone **z;
- struct page *page;
+ unsigned long state;
/*
* The defrag ratio allows a configuration of the tradeoffs between
@@ -1316,7 +1346,7 @@ static struct page *get_any_partial(stru
* with available objects.
*/
if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio)
- return NULL;
+ return 0;
zonelist = &NODE_DATA(slab_node(current->mempolicy))
->node_zonelists[gfp_zone(flags)];
@@ -1327,28 +1357,30 @@ static struct page *get_any_partial(stru
if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
n->nr_partial > MIN_PARTIAL) {
- page = get_partial_node(n);
- if (page)
- return page;
+ state = get_partial_node(n, c);
+ if (state)
+ return state;
}
}
#endif
- return NULL;
+ return 0;
}
/*
- * Get a partial page, lock it and return it.
+ * Get a partial page, lock it and make it the current cpu slab.
*/
-static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
+static noinline unsigned long get_partial(struct kmem_cache *s,
+ struct kmem_cache_cpu *c, gfp_t flags, int node)
{
- struct page *page;
+ unsigned long state;
int searchnode = (node == -1) ? numa_node_id() : node;
- page = get_partial_node(get_node(s, searchnode));
- if (page || (flags & __GFP_THISNODE))
- return page;
-
- return get_any_partial(s, flags);
+ state = get_partial_node(get_node(s, searchnode), c);
+ if (!state && !(flags & __GFP_THISNODE))
+ state = get_any_partial(s, c, flags);
+ if (!state)
+ return 0;
+ return state;
}
/*
@@ -1358,16 +1390,17 @@ static struct page *get_partial(struct k
*
* On exit the slab lock will have been dropped.
*/
-static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
+static void unfreeze_slab(struct kmem_cache *s, struct page *page,
+ int tail, unsigned long state)
{
- ClearSlabFrozen(page);
+ state &= ~FROZEN;
if (page->inuse) {
if (page->freelist != page->end)
add_partial(s, page, tail);
else
- add_full(s, page);
- slab_unlock(page);
+ add_full(s, page, state);
+ slab_unlock(page, state);
} else {
if (get_node(s, page_to_nid(page))->nr_partial
@@ -1381,9 +1414,9 @@ static void unfreeze_slab(struct kmem_ca
* reclaim empty slabs from the partial list.
*/
add_partial(s, page, 1);
- slab_unlock(page);
+ slab_unlock(page, state);
} else {
- slab_unlock(page);
+ slab_unlock(page, state);
discard_slab(s, page);
}
}
@@ -1392,7 +1425,8 @@ static void unfreeze_slab(struct kmem_ca
/*
* Remove the cpu slab
*/
-static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
+static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c,
+ unsigned long state)
{
struct page *page = c->page;
int tail = 1;
@@ -1420,13 +1454,15 @@ static void deactivate_slab(struct kmem_
page->inuse--;
}
c->page = NULL;
- unfreeze_slab(s, page, tail);
+ unfreeze_slab(s, page, tail, state);
}
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
- slab_lock(c->page);
- deactivate_slab(s, c);
+ unsigned long state;
+
+ state = slab_lock(c->page);
+ deactivate_slab(s, c, state);
}
/*
@@ -1474,6 +1510,48 @@ static inline int node_match(struct kmem
return 1;
}
+/* Allocate a new slab and make it the current cpu slab */
+static noinline unsigned long get_new_slab(struct kmem_cache *s,
+ struct kmem_cache_cpu **pc, gfp_t gfpflags, int node)
+{
+ struct kmem_cache_cpu *c = *pc;
+ struct page *page;
+
+ if (gfpflags & __GFP_WAIT)
+ local_irq_enable();
+
+ page = new_slab(s, gfpflags, node);
+
+ if (gfpflags & __GFP_WAIT)
+ local_irq_disable();
+
+ if (!page)
+ return 0;
+
+ *pc = c = get_cpu_slab(s, smp_processor_id());
+ if (c->page) {
+ /*
+ * Someone else populated the cpu_slab while we
+ * enabled interrupts, or we have gotten scheduled
+ * on another cpu. The page may not be on the
+ * requested node even if __GFP_THISNODE was
+ * specified. So we need to recheck.
+ */
+ if (node_match(c, node)) {
+ /*
+ * Current cpuslab is acceptable and we
+ * want the current one since its cache hot
+ */
+ discard_slab(s, page);
+ return slab_lock(c->page);
+ }
+ /* New slab does not fit our expectations */
+ flush_slab(s, c);
+ }
+ c->page = page;
+ return slab_lock(page) | FROZEN;
+}
+
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
@@ -1495,7 +1573,7 @@ static void *__slab_alloc(struct kmem_ca
gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
{
void **object;
- struct page *new;
+ unsigned long state;
#ifdef CONFIG_FAST_CMPXCHG_LOCAL
unsigned long flags;
@@ -1505,14 +1583,14 @@ static void *__slab_alloc(struct kmem_ca
if (!c->page)
goto new_slab;
- slab_lock(c->page);
+ state = slab_lock(c->page);
if (unlikely(!node_match(c, node)))
goto another_slab;
load_freelist:
object = c->page->freelist;
if (unlikely(object == c->page->end))
goto another_slab;
- if (unlikely(SlabDebug(c->page)))
+ if (unlikely(state & SLABDEBUG))
goto debug;
object = c->page->freelist;
@@ -1521,7 +1599,7 @@ load_freelist:
c->page->freelist = c->page->end;
c->node = page_to_nid(c->page);
unlock_out:
- slab_unlock(c->page);
+ slab_unlock(c->page, state);
out:
#ifdef CONFIG_FAST_CMPXCHG_LOCAL
preempt_disable();
@@ -1530,50 +1608,17 @@ out:
return object;
another_slab:
- deactivate_slab(s, c);
+ deactivate_slab(s, c, state);
new_slab:
- new = get_partial(s, gfpflags, node);
- if (new) {
- c->page = new;
+ state = get_partial(s, c, gfpflags, node);
+ if (state)
goto load_freelist;
- }
-
- if (gfpflags & __GFP_WAIT)
- local_irq_enable();
-
- new = new_slab(s, gfpflags, node);
- if (gfpflags & __GFP_WAIT)
- local_irq_disable();
-
- if (new) {
- c = get_cpu_slab(s, smp_processor_id());
- if (c->page) {
- /*
- * Someone else populated the cpu_slab while we
- * enabled interrupts, or we have gotten scheduled
- * on another cpu. The page may not be on the
- * requested node even if __GFP_THISNODE was
- * specified. So we need to recheck.
- */
- if (node_match(c, node)) {
- /*
- * Current cpuslab is acceptable and we
- * want the current one since its cache hot
- */
- discard_slab(s, new);
- slab_lock(c->page);
- goto load_freelist;
- }
- /* New slab does not fit our expectations */
- flush_slab(s, c);
- }
- slab_lock(new);
- SetSlabFrozen(new);
- c->page = new;
+ state = get_new_slab(s, &c, gfpflags, node);
+ if (state)
goto load_freelist;
- }
+
object = NULL;
goto out;
debug:
@@ -1670,22 +1715,23 @@ static void __slab_free(struct kmem_cach
{
void *prior;
void **object = (void *)x;
+ unsigned long state;
#ifdef CONFIG_FAST_CMPXCHG_LOCAL
unsigned long flags;
local_irq_save(flags);
#endif
- slab_lock(page);
+ state = slab_lock(page);
- if (unlikely(SlabDebug(page)))
+ if (unlikely(state & SLABDEBUG))
goto debug;
checks_ok:
prior = object[offset] = page->freelist;
page->freelist = object;
page->inuse--;
- if (unlikely(SlabFrozen(page)))
+ if (unlikely(state & FROZEN))
goto out_unlock;
if (unlikely(!page->inuse))
@@ -1700,7 +1746,7 @@ checks_ok:
add_partial(s, page, 0);
out_unlock:
- slab_unlock(page);
+ slab_unlock(page, state);
#ifdef CONFIG_FAST_CMPXCHG_LOCAL
local_irq_restore(flags);
#endif
@@ -1713,7 +1759,7 @@ slab_empty:
*/
remove_partial(s, page);
- slab_unlock(page);
+ slab_unlock(page, state);
#ifdef CONFIG_FAST_CMPXCHG_LOCAL
local_irq_restore(flags);
#endif
@@ -1721,7 +1767,7 @@ slab_empty:
return;
debug:
- if (!free_debug_processing(s, page, x, addr))
+ if (!free_debug_processing(s, page, x, addr, state))
goto out_unlock;
goto checks_ok;
}
@@ -2741,6 +2787,7 @@ int kmem_cache_shrink(struct kmem_cache
struct list_head *slabs_by_inuse =
kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
unsigned long flags;
+ unsigned long state;
if (!slabs_by_inuse)
return -ENOMEM;
@@ -2764,7 +2811,7 @@ int kmem_cache_shrink(struct kmem_cache
* list_lock. page->inuse here is the upper limit.
*/
list_for_each_entry_safe(page, t, &n->partial, lru) {
- if (!page->inuse && slab_trylock(page)) {
+ if (!page->inuse && (state = slab_trylock(page))) {
/*
* Must hold slab lock here because slab_free
* may have freed the last object and be
@@ -2772,7 +2819,7 @@ int kmem_cache_shrink(struct kmem_cache
*/
list_del(&page->lru);
n->nr_partial--;
- slab_unlock(page);
+ slab_unlock(page, state);
discard_slab(s, page);
} else {
list_move(&page->lru,
@@ -3222,19 +3269,22 @@ static int validate_slab(struct kmem_cac
static void validate_slab_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
- if (slab_trylock(page)) {
+ unsigned long state;
+
+ state = slab_trylock(page);
+ if (state) {
validate_slab(s, page, map);
- slab_unlock(page);
+ slab_unlock(page, state);
} else
printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
s->name, page);
if (s->flags & DEBUG_DEFAULT_FLAGS) {
- if (!SlabDebug(page))
+ if (!(state & SLABDEBUG))
printk(KERN_ERR "SLUB %s: SlabDebug not set "
"on slab 0x%p\n", s->name, page);
} else {
- if (SlabDebug(page))
+ if (state & SLABDEBUG)
printk(KERN_ERR "SLUB %s: SlabDebug set on "
"slab 0x%p\n", s->name, page);
}
--
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-28 3:32 ` [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock Christoph Lameter
@ 2007-10-28 15:10 ` Pekka J Enberg
2007-10-28 15:14 ` Pekka J Enberg
2007-10-29 3:03 ` Christoph Lameter
2007-10-30 4:50 ` Nick Piggin
1 sibling, 2 replies; 35+ messages in thread
From: Pekka J Enberg @ 2007-10-28 15:10 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
Hi Christoph,
On Sat, 27 Oct 2007, Christoph Lameter wrote:
> Too many troubles with the bitlocks and we really do not need
> to do any bitops. Bitops do not effectively retrieve the old
> value which we want. So use a cmpxchg instead on the arches
> that allow it.
> -static inline int SlabFrozen(struct page *page)
> -{
> - return page->flags & FROZEN;
> -}
> -
> -static inline void SetSlabFrozen(struct page *page)
> -{
> - page->flags |= FROZEN;
> -}
[snip]
It would be easier to review the actual locking changes if you did the
SlabXXX removal in a separate patch.
> +#ifdef __HAVE_ARCH_CMPXCHG
> /*
> * Per slab locking using the pagelock
> */
> -static __always_inline void slab_lock(struct page *page)
> +static __always_inline void slab_unlock(struct page *page,
> + unsigned long state)
> {
> - bit_spin_lock(PG_locked, &page->flags);
> + smp_wmb();
Memory barriers deserve a comment. I suppose this is protecting
page->flags but against what?
> + page->flags = state;
> + preempt_enable();
We don't need preempt_enable for CONFIG_SMP, right?
> + __release(bitlock);
This needs a less generic name and maybe a comment explaining that it's
not annotating a proper lock? Or maybe we can drop it completely?
> +static __always_inline unsigned long slab_trylock(struct page *page)
> +{
> + unsigned long state;
> +
> + preempt_disable();
> + state = page->flags & ~LOCKED;
> +#ifdef CONFIG_SMP
> + if (cmpxchg(&page->flags, state, state | LOCKED) != state) {
> + preempt_enable();
> + return 0;
> + }
> +#endif
This is hairy. Perhaps it would be cleaner to have totally separate
functions for SMP and UP instead?
> -static __always_inline void slab_unlock(struct page *page)
> +static __always_inline unsigned long slab_lock(struct page *page)
> {
> - bit_spin_unlock(PG_locked, &page->flags);
> + unsigned long state;
> +
> + preempt_disable();
> +#ifdef CONFIG_SMP
> + do {
> + state = page->flags & ~LOCKED;
> + } while (cmpxchg(&page->flags, state, state | LOCKED) != state);
> +#else
> + state = page->flags & ~LOCKED;
> +#endif
Same here.
Pekka
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-28 15:10 ` Pekka J Enberg
@ 2007-10-28 15:14 ` Pekka J Enberg
2007-10-29 3:03 ` Christoph Lameter
1 sibling, 0 replies; 35+ messages in thread
From: Pekka J Enberg @ 2007-10-28 15:14 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sun, 28 Oct 2007, Pekka J Enberg wrote:
> > + __release(bitlock);
>
> This needs a less generic name and maybe a comment explaining that it's
> not annotating a proper lock? Or maybe we can drop it completely?
Ah, I see that <linux/bit_spinlock.h> does the same thing, so strike that.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-28 15:10 ` Pekka J Enberg
2007-10-28 15:14 ` Pekka J Enberg
@ 2007-10-29 3:03 ` Christoph Lameter
2007-10-29 6:30 ` Pekka Enberg
1 sibling, 1 reply; 35+ messages in thread
From: Christoph Lameter @ 2007-10-29 3:03 UTC (permalink / raw)
To: Pekka J Enberg; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
On Sun, 28 Oct 2007, Pekka J Enberg wrote:
> It would be easier to review the actual locking changes if you did the
> SlabXXX removal in a separate patch.
There are no locking changes.
> > -static __always_inline void slab_lock(struct page *page)
> > +static __always_inline void slab_unlock(struct page *page,
> > + unsigned long state)
> > {
> > - bit_spin_lock(PG_locked, &page->flags);
> > + smp_wmb();
>
> Memory barriers deserve a comment. I suppose this is protecting
> page->flags but against what?
Its making sure that the changes to page flags are made visible after all
other changes.
>
> > + page->flags = state;
> > + preempt_enable();
>
> We don't need preempt_enable for CONFIG_SMP, right?
preempt_enable is needed if preemption is enabled.
>
> > + __release(bitlock);
>
> This needs a less generic name and maybe a comment explaining that it's
> not annotating a proper lock? Or maybe we can drop it completely?
Probably.
> > +static __always_inline unsigned long slab_trylock(struct page *page)
> > +{
> > + unsigned long state;
> > +
> > + preempt_disable();
> > + state = page->flags & ~LOCKED;
> > +#ifdef CONFIG_SMP
> > + if (cmpxchg(&page->flags, state, state | LOCKED) != state) {
> > + preempt_enable();
> > + return 0;
> > + }
> > +#endif
>
> This is hairy. Perhaps it would be cleaner to have totally separate
> functions for SMP and UP instead?
I think that is reasonably clear. Having code duplicated is not good
either. Well we may have to clean this up a bit.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-29 3:03 ` Christoph Lameter
@ 2007-10-29 6:30 ` Pekka Enberg
0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2007-10-29 6:30 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm
Hi,
On 10/29/07, Christoph Lameter <clameter@sgi.com> wrote:
> > We don't need preempt_enable for CONFIG_SMP, right?
>
> preempt_enable is needed if preemption is enabled.
Disabled? But yeah, I see that slab_trylock() can leave preemption
disabled if cmpxchg fails. Was confused by the #ifdefs... :-)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-28 3:32 ` [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock Christoph Lameter
2007-10-28 15:10 ` Pekka J Enberg
@ 2007-10-30 4:50 ` Nick Piggin
2007-10-30 18:32 ` Christoph Lameter
1 sibling, 1 reply; 35+ messages in thread
From: Nick Piggin @ 2007-10-30 4:50 UTC (permalink / raw)
To: Christoph Lameter
Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm, Pekka Enberg
On Sunday 28 October 2007 14:32, Christoph Lameter wrote:
> Too many troubles with the bitlocks and we really do not need
> to do any bitops. Bitops do not effectively retrieve the old
> value which we want. So use a cmpxchg instead on the arches
> that allow it.
>
> Instead of modifying the page->flags with fake atomic operations
> we pass the page state as a parameter to functions. No function uses
> the slab state if the page lock is held. Function must wait until the
> lock is cleared. Thus we can defer the update of page->flags until slab
> processing is complete. The "unlock" operation is then simply updating
> page->flags.
Is this actually a speedup on any architecture to roll your own locking
rather than using bit spinlock?
I am not exactly convinced that smp_wmb() is a good idea to have in your
unlock, rather than the normally required smp_mb() that every other open
coded lock in the kernel is using today. If you comment every code path
where a load leaking out of the critical section would not be a problem,
then OK it may be correct, but I still don't think it is worth the
maintenance overhead.
>
> Signed-off-by: Christoph Lameter <clameter@sgi.com>
>
>
> ---
> mm/slub.c | 324
> +++++++++++++++++++++++++++++++++++--------------------------- 1 file
> changed, 187 insertions(+), 137 deletions(-)
>
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c 2007-10-27 07:56:03.000000000 -0700
> +++ linux-2.6/mm/slub.c 2007-10-27 07:56:52.000000000 -0700
> @@ -101,6 +101,7 @@
> */
>
> #define FROZEN (1 << PG_active)
> +#define LOCKED (1 << PG_locked)
>
> #ifdef CONFIG_SLUB_DEBUG
> #define SLABDEBUG (1 << PG_error)
> @@ -108,36 +109,6 @@
> #define SLABDEBUG 0
> #endif
>
> -static inline int SlabFrozen(struct page *page)
> -{
> - return page->flags & FROZEN;
> -}
> -
> -static inline void SetSlabFrozen(struct page *page)
> -{
> - page->flags |= FROZEN;
> -}
> -
> -static inline void ClearSlabFrozen(struct page *page)
> -{
> - page->flags &= ~FROZEN;
> -}
> -
> -static inline int SlabDebug(struct page *page)
> -{
> - return page->flags & SLABDEBUG;
> -}
> -
> -static inline void SetSlabDebug(struct page *page)
> -{
> - page->flags |= SLABDEBUG;
> -}
> -
> -static inline void ClearSlabDebug(struct page *page)
> -{
> - page->flags &= ~SLABDEBUG;
> -}
> -
> /*
> * Issues still to be resolved:
> *
> @@ -818,11 +789,12 @@ static void trace(struct kmem_cache *s,
> /*
> * Tracking of fully allocated slabs for debugging purposes.
> */
> -static void add_full(struct kmem_cache *s, struct page *page)
> +static void add_full(struct kmem_cache *s, struct page *page,
> + unsigned long state)
> {
> struct kmem_cache_node *n = get_node(s, page_to_nid(page));
>
> - if (!SlabDebug(page) || !(s->flags & SLAB_STORE_USER))
> + if (!(state & SLABDEBUG) || !(s->flags & SLAB_STORE_USER))
> return;
> spin_lock(&n->list_lock);
> list_add(&page->lru, &n->full);
> @@ -894,7 +866,7 @@ bad:
> }
>
> static noinline int free_debug_processing(struct kmem_cache *s,
> - struct page *page, void *object, void *addr)
> + struct page *page, void *object, void *addr, unsigned long state)
> {
> if (!check_slab(s, page))
> goto fail;
> @@ -930,7 +902,7 @@ static noinline int free_debug_processin
> }
>
> /* Special debug activities for freeing objects */
> - if (!SlabFrozen(page) && page->freelist == page->end)
> + if (!(state & FROZEN) && page->freelist == page->end)
> remove_full(s, page);
> if (s->flags & SLAB_STORE_USER)
> set_track(s, object, TRACK_FREE, addr);
> @@ -1047,7 +1019,8 @@ static inline int slab_pad_check(struct
> { return 1; }
> static inline int check_object(struct kmem_cache *s, struct page *page,
> void *object, int active) { return 1; }
> -static inline void add_full(struct kmem_cache *s, struct page *page) {}
> +static inline void add_full(struct kmem_cache *s, struct page *page,
> + unsigned long state) {}
> static inline unsigned long kmem_cache_flags(unsigned long objsize,
> unsigned long flags, const char *name,
> void (*ctor)(struct kmem_cache *, void *))
> @@ -1105,6 +1078,7 @@ static noinline struct page *new_slab(st
> void *start;
> void *last;
> void *p;
> + unsigned long state;
>
> BUG_ON(flags & GFP_SLAB_BUG_MASK);
>
> @@ -1117,11 +1091,12 @@ static noinline struct page *new_slab(st
> if (n)
> atomic_long_inc(&n->nr_slabs);
> page->slab = s;
> - page->flags |= 1 << PG_slab;
> + state = 1 << PG_slab;
> if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
> SLAB_STORE_USER | SLAB_TRACE))
> - SetSlabDebug(page);
> + state |= SLABDEBUG;
>
> + page->flags |= state;
> start = page_address(page);
> page->end = start + 1;
>
> @@ -1147,13 +1122,13 @@ static void __free_slab(struct kmem_cach
> {
> int pages = 1 << s->order;
>
> - if (unlikely(SlabDebug(page))) {
> + if (unlikely(page->flags & SLABDEBUG)) {
> void *p;
>
> slab_pad_check(s, page);
> for_each_object(p, s, slab_address(page))
> check_object(s, page, p, 0);
> - ClearSlabDebug(page);
> + page->flags &= ~SLABDEBUG;
> }
>
> mod_zone_page_state(page_zone(page),
> @@ -1196,27 +1171,73 @@ static void discard_slab(struct kmem_cac
> free_slab(s, page);
> }
>
> +#ifdef __HAVE_ARCH_CMPXCHG
> /*
> * Per slab locking using the pagelock
> */
> -static __always_inline void slab_lock(struct page *page)
> +static __always_inline void slab_unlock(struct page *page,
> + unsigned long state)
> {
> - bit_spin_lock(PG_locked, &page->flags);
> + smp_wmb();
> + page->flags = state;
> + preempt_enable();
> + __release(bitlock);
> +}
> +
> +static __always_inline unsigned long slab_trylock(struct page *page)
> +{
> + unsigned long state;
> +
> + preempt_disable();
> + state = page->flags & ~LOCKED;
> +#ifdef CONFIG_SMP
> + if (cmpxchg(&page->flags, state, state | LOCKED) != state) {
> + preempt_enable();
> + return 0;
> + }
> +#endif
> + __acquire(bitlock);
> + return state;
> }
>
> -static __always_inline void slab_unlock(struct page *page)
> +static __always_inline unsigned long slab_lock(struct page *page)
> {
> - bit_spin_unlock(PG_locked, &page->flags);
> + unsigned long state;
> +
> + preempt_disable();
> +#ifdef CONFIG_SMP
> + do {
> + state = page->flags & ~LOCKED;
> + } while (cmpxchg(&page->flags, state, state | LOCKED) != state);
> +#else
> + state = page->flags & ~LOCKED;
> +#endif
> + __acquire(bitlock);
> + return state;
> }
>
> -static __always_inline int slab_trylock(struct page *page)
> +#else
> +static __always_inline void slab_unlock(struct page *page,
> + unsigned long state)
> {
> - int rc = 1;
> + page->flags = state;
> + __bit_spin_unlock(PG_locked, &page->flags);
> +}
>
> - rc = bit_spin_trylock(PG_locked, &page->flags);
> - return rc;
> +static __always_inline unsigned long slab_trylock(struct page *page)
> +{
> + if (!bit_spin_trylock(PG_locked, &page->flags))
> + return 0;
> + return page->flags;
> }
>
> +static __always_inline unsigned long slab_lock(struct page *page)
> +{
> + bit_spin_lock(PG_locked, &page->flags);
> + return page->flags;
> +}
> +#endif
> +
> /*
> * Management of partially allocated slabs
> */
> @@ -1250,13 +1271,17 @@ static noinline void remove_partial(stru
> *
> * Must hold list_lock.
> */
> -static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct
> page *page) +static inline unsigned long lock_and_freeze_slab(struct
> kmem_cache_node *n, + struct kmem_cache_cpu *c, struct page *page)
> {
> - if (slab_trylock(page)) {
> + unsigned long state;
> +
> + state = slab_trylock(page);
> + if (state) {
> list_del(&page->lru);
> n->nr_partial--;
> - SetSlabFrozen(page);
> - return 1;
> + c->page = page;
> + return state | FROZEN;
> }
> return 0;
> }
> @@ -1264,9 +1289,11 @@ static inline int lock_and_freeze_slab(s
> /*
> * Try to allocate a partial slab from a specific node.
> */
> -static struct page *get_partial_node(struct kmem_cache_node *n)
> +static unsigned long get_partial_node(struct kmem_cache_node *n,
> + struct kmem_cache_cpu *c)
> {
> struct page *page;
> + unsigned long state;
>
> /*
> * Racy check. If we mistakenly see no partial slabs then we
> @@ -1275,27 +1302,30 @@ static struct page *get_partial_node(str
> * will return NULL.
> */
> if (!n || !n->nr_partial)
> - return NULL;
> + return 0;
>
> spin_lock(&n->list_lock);
> - list_for_each_entry(page, &n->partial, lru)
> - if (lock_and_freeze_slab(n, page))
> + list_for_each_entry(page, &n->partial, lru) {
> + state = lock_and_freeze_slab(n, c, page);
> + if (state)
> goto out;
> - page = NULL;
> + }
> + state = 0;
> out:
> spin_unlock(&n->list_lock);
> - return page;
> + return state;
> }
>
> /*
> * Get a page from somewhere. Search in increasing NUMA distances.
> */
> -static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
> +static unsigned long get_any_partial(struct kmem_cache *s,
> + struct kmem_cache_cpu *c, gfp_t flags)
> {
> #ifdef CONFIG_NUMA
> struct zonelist *zonelist;
> struct zone **z;
> - struct page *page;
> + unsigned long state;
>
> /*
> * The defrag ratio allows a configuration of the tradeoffs between
> @@ -1316,7 +1346,7 @@ static struct page *get_any_partial(stru
> * with available objects.
> */
> if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio)
> - return NULL;
> + return 0;
>
> zonelist = &NODE_DATA(slab_node(current->mempolicy))
> ->node_zonelists[gfp_zone(flags)];
> @@ -1327,28 +1357,30 @@ static struct page *get_any_partial(stru
>
> if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
> n->nr_partial > MIN_PARTIAL) {
> - page = get_partial_node(n);
> - if (page)
> - return page;
> + state = get_partial_node(n, c);
> + if (state)
> + return state;
> }
> }
> #endif
> - return NULL;
> + return 0;
> }
>
> /*
> - * Get a partial page, lock it and return it.
> + * Get a partial page, lock it and make it the current cpu slab.
> */
> -static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int
> node) +static noinline unsigned long get_partial(struct kmem_cache *s,
> + struct kmem_cache_cpu *c, gfp_t flags, int node)
> {
> - struct page *page;
> + unsigned long state;
> int searchnode = (node == -1) ? numa_node_id() : node;
>
> - page = get_partial_node(get_node(s, searchnode));
> - if (page || (flags & __GFP_THISNODE))
> - return page;
> -
> - return get_any_partial(s, flags);
> + state = get_partial_node(get_node(s, searchnode), c);
> + if (!state && !(flags & __GFP_THISNODE))
> + state = get_any_partial(s, c, flags);
> + if (!state)
> + return 0;
> + return state;
> }
>
> /*
> @@ -1358,16 +1390,17 @@ static struct page *get_partial(struct k
> *
> * On exit the slab lock will have been dropped.
> */
> -static void unfreeze_slab(struct kmem_cache *s, struct page *page, int
> tail) +static void unfreeze_slab(struct kmem_cache *s, struct page *page,
> + int tail, unsigned long state)
> {
> - ClearSlabFrozen(page);
> + state &= ~FROZEN;
> if (page->inuse) {
>
> if (page->freelist != page->end)
> add_partial(s, page, tail);
> else
> - add_full(s, page);
> - slab_unlock(page);
> + add_full(s, page, state);
> + slab_unlock(page, state);
>
> } else {
> if (get_node(s, page_to_nid(page))->nr_partial
> @@ -1381,9 +1414,9 @@ static void unfreeze_slab(struct kmem_ca
> * reclaim empty slabs from the partial list.
> */
> add_partial(s, page, 1);
> - slab_unlock(page);
> + slab_unlock(page, state);
> } else {
> - slab_unlock(page);
> + slab_unlock(page, state);
> discard_slab(s, page);
> }
> }
> @@ -1392,7 +1425,8 @@ static void unfreeze_slab(struct kmem_ca
> /*
> * Remove the cpu slab
> */
> -static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu
> *c) +static void deactivate_slab(struct kmem_cache *s, struct
> kmem_cache_cpu *c, + unsigned long state)
> {
> struct page *page = c->page;
> int tail = 1;
> @@ -1420,13 +1454,15 @@ static void deactivate_slab(struct kmem_
> page->inuse--;
> }
> c->page = NULL;
> - unfreeze_slab(s, page, tail);
> + unfreeze_slab(s, page, tail, state);
> }
>
> static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu
> *c) {
> - slab_lock(c->page);
> - deactivate_slab(s, c);
> + unsigned long state;
> +
> + state = slab_lock(c->page);
> + deactivate_slab(s, c, state);
> }
>
> /*
> @@ -1474,6 +1510,48 @@ static inline int node_match(struct kmem
> return 1;
> }
>
> +/* Allocate a new slab and make it the current cpu slab */
> +static noinline unsigned long get_new_slab(struct kmem_cache *s,
> + struct kmem_cache_cpu **pc, gfp_t gfpflags, int node)
> +{
> + struct kmem_cache_cpu *c = *pc;
> + struct page *page;
> +
> + if (gfpflags & __GFP_WAIT)
> + local_irq_enable();
> +
> + page = new_slab(s, gfpflags, node);
> +
> + if (gfpflags & __GFP_WAIT)
> + local_irq_disable();
> +
> + if (!page)
> + return 0;
> +
> + *pc = c = get_cpu_slab(s, smp_processor_id());
> + if (c->page) {
> + /*
> + * Someone else populated the cpu_slab while we
> + * enabled interrupts, or we have gotten scheduled
> + * on another cpu. The page may not be on the
> + * requested node even if __GFP_THISNODE was
> + * specified. So we need to recheck.
> + */
> + if (node_match(c, node)) {
> + /*
> + * Current cpuslab is acceptable and we
> + * want the current one since its cache hot
> + */
> + discard_slab(s, page);
> + return slab_lock(c->page);
> + }
> + /* New slab does not fit our expectations */
> + flush_slab(s, c);
> + }
> + c->page = page;
> + return slab_lock(page) | FROZEN;
> +}
> +
> /*
> * Slow path. The lockless freelist is empty or we need to perform
> * debugging duties.
> @@ -1495,7 +1573,7 @@ static void *__slab_alloc(struct kmem_ca
> gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
> {
> void **object;
> - struct page *new;
> + unsigned long state;
> #ifdef CONFIG_FAST_CMPXCHG_LOCAL
> unsigned long flags;
>
> @@ -1505,14 +1583,14 @@ static void *__slab_alloc(struct kmem_ca
> if (!c->page)
> goto new_slab;
>
> - slab_lock(c->page);
> + state = slab_lock(c->page);
> if (unlikely(!node_match(c, node)))
> goto another_slab;
> load_freelist:
> object = c->page->freelist;
> if (unlikely(object == c->page->end))
> goto another_slab;
> - if (unlikely(SlabDebug(c->page)))
> + if (unlikely(state & SLABDEBUG))
> goto debug;
>
> object = c->page->freelist;
> @@ -1521,7 +1599,7 @@ load_freelist:
> c->page->freelist = c->page->end;
> c->node = page_to_nid(c->page);
> unlock_out:
> - slab_unlock(c->page);
> + slab_unlock(c->page, state);
> out:
> #ifdef CONFIG_FAST_CMPXCHG_LOCAL
> preempt_disable();
> @@ -1530,50 +1608,17 @@ out:
> return object;
>
> another_slab:
> - deactivate_slab(s, c);
> + deactivate_slab(s, c, state);
>
> new_slab:
> - new = get_partial(s, gfpflags, node);
> - if (new) {
> - c->page = new;
> + state = get_partial(s, c, gfpflags, node);
> + if (state)
> goto load_freelist;
> - }
> -
> - if (gfpflags & __GFP_WAIT)
> - local_irq_enable();
> -
> - new = new_slab(s, gfpflags, node);
>
> - if (gfpflags & __GFP_WAIT)
> - local_irq_disable();
> -
> - if (new) {
> - c = get_cpu_slab(s, smp_processor_id());
> - if (c->page) {
> - /*
> - * Someone else populated the cpu_slab while we
> - * enabled interrupts, or we have gotten scheduled
> - * on another cpu. The page may not be on the
> - * requested node even if __GFP_THISNODE was
> - * specified. So we need to recheck.
> - */
> - if (node_match(c, node)) {
> - /*
> - * Current cpuslab is acceptable and we
> - * want the current one since its cache hot
> - */
> - discard_slab(s, new);
> - slab_lock(c->page);
> - goto load_freelist;
> - }
> - /* New slab does not fit our expectations */
> - flush_slab(s, c);
> - }
> - slab_lock(new);
> - SetSlabFrozen(new);
> - c->page = new;
> + state = get_new_slab(s, &c, gfpflags, node);
> + if (state)
> goto load_freelist;
> - }
> +
> object = NULL;
> goto out;
> debug:
> @@ -1670,22 +1715,23 @@ static void __slab_free(struct kmem_cach
> {
> void *prior;
> void **object = (void *)x;
> + unsigned long state;
>
> #ifdef CONFIG_FAST_CMPXCHG_LOCAL
> unsigned long flags;
>
> local_irq_save(flags);
> #endif
> - slab_lock(page);
> + state = slab_lock(page);
>
> - if (unlikely(SlabDebug(page)))
> + if (unlikely(state & SLABDEBUG))
> goto debug;
> checks_ok:
> prior = object[offset] = page->freelist;
> page->freelist = object;
> page->inuse--;
>
> - if (unlikely(SlabFrozen(page)))
> + if (unlikely(state & FROZEN))
> goto out_unlock;
>
> if (unlikely(!page->inuse))
> @@ -1700,7 +1746,7 @@ checks_ok:
> add_partial(s, page, 0);
>
> out_unlock:
> - slab_unlock(page);
> + slab_unlock(page, state);
> #ifdef CONFIG_FAST_CMPXCHG_LOCAL
> local_irq_restore(flags);
> #endif
> @@ -1713,7 +1759,7 @@ slab_empty:
> */
> remove_partial(s, page);
>
> - slab_unlock(page);
> + slab_unlock(page, state);
> #ifdef CONFIG_FAST_CMPXCHG_LOCAL
> local_irq_restore(flags);
> #endif
> @@ -1721,7 +1767,7 @@ slab_empty:
> return;
>
> debug:
> - if (!free_debug_processing(s, page, x, addr))
> + if (!free_debug_processing(s, page, x, addr, state))
> goto out_unlock;
> goto checks_ok;
> }
> @@ -2741,6 +2787,7 @@ int kmem_cache_shrink(struct kmem_cache
> struct list_head *slabs_by_inuse =
> kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
> unsigned long flags;
> + unsigned long state;
>
> if (!slabs_by_inuse)
> return -ENOMEM;
> @@ -2764,7 +2811,7 @@ int kmem_cache_shrink(struct kmem_cache
> * list_lock. page->inuse here is the upper limit.
> */
> list_for_each_entry_safe(page, t, &n->partial, lru) {
> - if (!page->inuse && slab_trylock(page)) {
> + if (!page->inuse && (state = slab_trylock(page))) {
> /*
> * Must hold slab lock here because slab_free
> * may have freed the last object and be
> @@ -2772,7 +2819,7 @@ int kmem_cache_shrink(struct kmem_cache
> */
> list_del(&page->lru);
> n->nr_partial--;
> - slab_unlock(page);
> + slab_unlock(page, state);
> discard_slab(s, page);
> } else {
> list_move(&page->lru,
> @@ -3222,19 +3269,22 @@ static int validate_slab(struct kmem_cac
> static void validate_slab_slab(struct kmem_cache *s, struct page *page,
> unsigned long *map)
> {
> - if (slab_trylock(page)) {
> + unsigned long state;
> +
> + state = slab_trylock(page);
> + if (state) {
> validate_slab(s, page, map);
> - slab_unlock(page);
> + slab_unlock(page, state);
> } else
> printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
> s->name, page);
>
> if (s->flags & DEBUG_DEFAULT_FLAGS) {
> - if (!SlabDebug(page))
> + if (!(state & SLABDEBUG))
> printk(KERN_ERR "SLUB %s: SlabDebug not set "
> "on slab 0x%p\n", s->name, page);
> } else {
> - if (SlabDebug(page))
> + if (state & SLABDEBUG)
> printk(KERN_ERR "SLUB %s: SlabDebug set on "
> "slab 0x%p\n", s->name, page);
> }
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread* Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-30 4:50 ` Nick Piggin
@ 2007-10-30 18:32 ` Christoph Lameter
2007-10-31 1:17 ` Nick Piggin
0 siblings, 1 reply; 35+ messages in thread
From: Christoph Lameter @ 2007-10-30 18:32 UTC (permalink / raw)
To: Nick Piggin; +Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm, Pekka Enberg
On Tue, 30 Oct 2007, Nick Piggin wrote:
> Is this actually a speedup on any architecture to roll your own locking
> rather than using bit spinlock?
It avoids one load from memory when allocating and the release is simply
writing the page->flags back. Less instructions.
> I am not exactly convinced that smp_wmb() is a good idea to have in your
> unlock, rather than the normally required smp_mb() that every other open
> coded lock in the kernel is using today. If you comment every code path
> where a load leaking out of the critical section would not be a problem,
> then OK it may be correct, but I still don't think it is worth the
> maintenance overhead.
I thought you agreed that release semantics only require a write barrier?
The issue here is that other processors see the updates before the
updates to page-flags.
A load leaking out of a critical section would require that the result of
the load is not used to update other information before the slab_unlock
and that the source of the load is not overwritten in the critical
section. That does not happen in sluib.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.
2007-10-30 18:32 ` Christoph Lameter
@ 2007-10-31 1:17 ` Nick Piggin
0 siblings, 0 replies; 35+ messages in thread
From: Nick Piggin @ 2007-10-31 1:17 UTC (permalink / raw)
To: Christoph Lameter
Cc: Matthew Wilcox, akpm, linux-kernel, linux-mm, Pekka Enberg
On Wednesday 31 October 2007 05:32, Christoph Lameter wrote:
> On Tue, 30 Oct 2007, Nick Piggin wrote:
> > Is this actually a speedup on any architecture to roll your own locking
> > rather than using bit spinlock?
>
> It avoids one load from memory when allocating and the release is simply
> writing the page->flags back. Less instructions.
OK, but it probably isn't a measurable speedup, even on microbenchmarks,
right? And many architectures have to have more barriers around cmpxchg
than they do around a test_and_set_bit_lock, so it may even be slower
on some.
> > I am not exactly convinced that smp_wmb() is a good idea to have in your
> > unlock, rather than the normally required smp_mb() that every other open
> > coded lock in the kernel is using today. If you comment every code path
> > where a load leaking out of the critical section would not be a problem,
> > then OK it may be correct, but I still don't think it is worth the
> > maintenance overhead.
>
> I thought you agreed that release semantics only require a write barrier?
Not in general.
> The issue here is that other processors see the updates before the
> updates to page-flags.
>
> A load leaking out of a critical section would require that the result of
> the load is not used to update other information before the slab_unlock
> and that the source of the load is not overwritten in the critical
> section. That does not happen in sluib.
That may be the case, but I don't think there is enough performance
justification to add a hack like this. ia64 for example is going to
do an mf for smp_wmb so I doubt it is a clear win.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 35+ messages in thread
* [patch 10/10] SLUB: Restructure slab alloc
2007-10-28 3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
` (8 preceding siblings ...)
2007-10-28 3:32 ` [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock Christoph Lameter
@ 2007-10-28 3:32 ` Christoph Lameter
9 siblings, 0 replies; 35+ messages in thread
From: Christoph Lameter @ 2007-10-28 3:32 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-kernel, linux-mm, Pekka Enberg
[-- Attachment #1: slub_restruct_alloc --]
[-- Type: text/plain, Size: 1817 bytes --]
Restructure slab_alloc so that the code flows in the sequence
it is usually executed.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 40 ++++++++++++++++++++++++----------------
1 file changed, 24 insertions(+), 16 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2007-10-27 07:58:07.000000000 -0700
+++ linux-2.6/mm/slub.c 2007-10-27 07:58:36.000000000 -0700
@@ -1580,16 +1580,28 @@ static void *__slab_alloc(struct kmem_ca
local_irq_save(flags);
preempt_enable_no_resched();
#endif
- if (!c->page)
- goto new_slab;
+ if (likely(c->page)) {
+ state = slab_lock(c->page);
+
+ if (unlikely(node_match(c, node) &&
+ c->page->freelist != c->page->end))
+ goto load_freelist;
+
+ deactivate_slab(s, c, state);
+ }
+
+another_slab:
+ state = get_partial(s, c, gfpflags, node);
+ if (!state)
+ goto grow_slab;
- state = slab_lock(c->page);
- if (unlikely(!node_match(c, node)))
- goto another_slab;
load_freelist:
- object = c->page->freelist;
- if (unlikely(object == c->page->end))
- goto another_slab;
+ /*
+ * slabs from the partial list must have at least
+ * one free object.
+ */
+ VM_BUG_ON(c->page->freelist == c->page->end);
+
if (unlikely(state & SLABDEBUG))
goto debug;
@@ -1607,20 +1619,16 @@ out:
#endif
return object;
-another_slab:
- deactivate_slab(s, c, state);
-
-new_slab:
- state = get_partial(s, c, gfpflags, node);
- if (state)
- goto load_freelist;
-
+/* Extend the slabcache with a new slab */
+grow_slab:
state = get_new_slab(s, &c, gfpflags, node);
if (state)
goto load_freelist;
object = NULL;
goto out;
+
+/* Perform debugging */
debug:
object = c->page->freelist;
if (!alloc_debug_processing(s, c->page, object, addr))
--
^ permalink raw reply [flat|nested] 35+ messages in thread