* Re: lockdep complaints in slab allocator
@ 2009-11-23 19:00 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-23 19:00 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: paulmck, linux-mm, cl, mpm, LKML, Nick Piggin
Hi Peter,
On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > Uh, ok, so apparently I was right after all. There's a comment in
> > free_block() above the slab_destroy() call that refers to the comment
> > above alloc_slabmgmt() function definition which explains it all.
> >
> > Long story short: ->slab_cachep never points to the same kmalloc cache
> > we're allocating or freeing from. Where do we need to put the
> > spin_lock_nested() annotation? Would it be enough to just use it in
> > cache_free_alien() for alien->lock or do we need it in
> > cache_flusharray() as well?
>
> You'd have to somehow push the nested state down from the
> kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
That turns out to be _very_ hard. How about something like the following
untested patch which delays slab_destroy() while we're under nc->lock.
Pekka
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481..6f522e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -316,7 +316,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
static int drain_freelist(struct kmem_cache *cache,
struct kmem_list3 *l3, int tofree);
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
- int node);
+ int node, struct list_head *to_destroy);
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
static void cache_reap(struct work_struct *unused);
@@ -1002,7 +1002,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
}
static void __drain_alien_cache(struct kmem_cache *cachep,
- struct array_cache *ac, int node)
+ struct array_cache *ac, int node,
+ struct list_head *to_destroy)
{
struct kmem_list3 *rl3 = cachep->nodelists[node];
@@ -1016,12 +1017,22 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
if (rl3->shared)
transfer_objects(rl3->shared, ac, ac->limit);
- free_block(cachep, ac->entry, ac->avail, node);
+ free_block(cachep, ac->entry, ac->avail, node, to_destroy);
ac->avail = 0;
spin_unlock(&rl3->list_lock);
}
}
+static void slab_destroy(struct kmem_cache *, struct slab *);
+
+static void destroy_slabs(struct kmem_cache *cache, struct list_head *to_destroy)
+{
+ struct slab *slab, *tmp;
+
+ list_for_each_entry_safe(slab, tmp, to_destroy, list)
+ slab_destroy(cache, slab);
+}
+
/*
* Called from cache_reap() to regularly drain alien caches round robin.
*/
@@ -1033,8 +1044,11 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
struct array_cache *ac = l3->alien[node];
if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
- __drain_alien_cache(cachep, ac, node);
+ LIST_HEAD(to_destroy);
+
+ __drain_alien_cache(cachep, ac, node, &to_destroy);
spin_unlock_irq(&ac->lock);
+ destroy_slabs(cachep, &to_destroy);
}
}
}
@@ -1049,9 +1063,12 @@ static void drain_alien_cache(struct kmem_cache *cachep,
for_each_online_node(i) {
ac = alien[i];
if (ac) {
+ LIST_HEAD(to_destroy);
+
spin_lock_irqsave(&ac->lock, flags);
- __drain_alien_cache(cachep, ac, i);
+ __drain_alien_cache(cachep, ac, i, &to_destroy);
spin_unlock_irqrestore(&ac->lock, flags);
+ destroy_slabs(cachep, &to_destroy);
}
}
}
@@ -1076,17 +1093,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
l3 = cachep->nodelists[node];
STATS_INC_NODEFREES(cachep);
if (l3->alien && l3->alien[nodeid]) {
+ LIST_HEAD(to_destroy);
+
alien = l3->alien[nodeid];
spin_lock(&alien->lock);
if (unlikely(alien->avail == alien->limit)) {
STATS_INC_ACOVERFLOW(cachep);
- __drain_alien_cache(cachep, alien, nodeid);
+ __drain_alien_cache(cachep, alien, nodeid, &to_destroy);
}
alien->entry[alien->avail++] = objp;
spin_unlock(&alien->lock);
+ destroy_slabs(cachep, &to_destroy);
} else {
spin_lock(&(cachep->nodelists[nodeid])->list_lock);
- free_block(cachep, &objp, 1, nodeid);
+ free_block(cachep, &objp, 1, nodeid, NULL);
spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
}
return 1;
@@ -1118,7 +1138,7 @@ static void __cpuinit cpuup_canceled(long cpu)
/* Free limit for this kmem_list3 */
l3->free_limit -= cachep->batchcount;
if (nc)
- free_block(cachep, nc->entry, nc->avail, node);
+ free_block(cachep, nc->entry, nc->avail, node, NULL);
if (!cpus_empty(*mask)) {
spin_unlock_irq(&l3->list_lock);
@@ -1128,7 +1148,7 @@ static void __cpuinit cpuup_canceled(long cpu)
shared = l3->shared;
if (shared) {
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, NULL);
l3->shared = NULL;
}
@@ -2402,7 +2422,7 @@ static void do_drain(void *arg)
check_irq_off();
ac = cpu_cache_get(cachep);
spin_lock(&cachep->nodelists[node]->list_lock);
- free_block(cachep, ac->entry, ac->avail, node);
+ free_block(cachep, ac->entry, ac->avail, node, NULL);
spin_unlock(&cachep->nodelists[node]->list_lock);
ac->avail = 0;
}
@@ -3410,7 +3430,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
* Caller needs to acquire correct kmem_list's list_lock
*/
static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
- int node)
+ int node, struct list_head *to_destroy)
{
int i;
struct kmem_list3 *l3;
@@ -3439,7 +3459,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
* a different cache, refer to comments before
* alloc_slabmgmt.
*/
- slab_destroy(cachep, slabp);
+ if (to_destroy)
+ list_add(&slabp->list, to_destroy);
+ else
+ slab_destroy(cachep, slabp);
} else {
list_add(&slabp->list, &l3->slabs_free);
}
@@ -3479,7 +3502,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
}
}
- free_block(cachep, ac->entry, batchcount, node);
+ free_block(cachep, ac->entry, batchcount, node, NULL);
free_done:
#if STATS
{
@@ -3822,7 +3845,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
if (shared)
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, NULL);
l3->shared = new_shared;
if (!l3->alien) {
@@ -3925,7 +3948,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
if (!ccold)
continue;
spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
- free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
+ free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), NULL);
spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
kfree(ccold);
}
@@ -4007,7 +4030,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
tofree = (ac->avail + 1) / 2;
- free_block(cachep, ac->entry, tofree, node);
+ free_block(cachep, ac->entry, tofree, node, NULL);
ac->avail -= tofree;
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
2009-11-23 19:00 ` Pekka Enberg
@ 2009-11-23 19:10 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-23 19:10 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
On Mon, 2009-11-23 at 21:00 +0200, Pekka Enberg wrote:
> Hi Peter,
>
> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > > Uh, ok, so apparently I was right after all. There's a comment in
> > > free_block() above the slab_destroy() call that refers to the comment
> > > above alloc_slabmgmt() function definition which explains it all.
> > >
> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> > > we're allocating or freeing from. Where do we need to put the
> > > spin_lock_nested() annotation? Would it be enough to just use it in
> > > cache_free_alien() for alien->lock or do we need it in
> > > cache_flusharray() as well?
> >
> > You'd have to somehow push the nested state down from the
> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
>
> That turns out to be _very_ hard. How about something like the following
> untested patch which delays slab_destroy() while we're under nc->lock.
>
> Pekka
This seems like a lot of work to paper over a lockdep false positive in
code that should be firmly in the maintenance end of its lifecycle? I'd
rather the fix or papering over happen in lockdep.
Introducing extra cacheline pressure by passing to_destroy around also
seems like a good way to trickle away SLAB's narrow remaining
performance advantages.
>
> diff --git a/mm/slab.c b/mm/slab.c
> index 7dfa481..6f522e3 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -316,7 +316,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
> static int drain_freelist(struct kmem_cache *cache,
> struct kmem_list3 *l3, int tofree);
> static void free_block(struct kmem_cache *cachep, void **objpp, int len,
> - int node);
> + int node, struct list_head *to_destroy);
> static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
> static void cache_reap(struct work_struct *unused);
>
> @@ -1002,7 +1002,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
> }
>
> static void __drain_alien_cache(struct kmem_cache *cachep,
> - struct array_cache *ac, int node)
> + struct array_cache *ac, int node,
> + struct list_head *to_destroy)
> {
> struct kmem_list3 *rl3 = cachep->nodelists[node];
>
> @@ -1016,12 +1017,22 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
> if (rl3->shared)
> transfer_objects(rl3->shared, ac, ac->limit);
>
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, to_destroy);
> ac->avail = 0;
> spin_unlock(&rl3->list_lock);
> }
> }
>
> +static void slab_destroy(struct kmem_cache *, struct slab *);
> +
> +static void destroy_slabs(struct kmem_cache *cache, struct list_head *to_destroy)
> +{
> + struct slab *slab, *tmp;
> +
> + list_for_each_entry_safe(slab, tmp, to_destroy, list)
> + slab_destroy(cache, slab);
> +}
> +
> /*
> * Called from cache_reap() to regularly drain alien caches round robin.
> */
> @@ -1033,8 +1044,11 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
> struct array_cache *ac = l3->alien[node];
>
> if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
> - __drain_alien_cache(cachep, ac, node);
> + LIST_HEAD(to_destroy);
> +
> + __drain_alien_cache(cachep, ac, node, &to_destroy);
> spin_unlock_irq(&ac->lock);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1049,9 +1063,12 @@ static void drain_alien_cache(struct kmem_cache *cachep,
> for_each_online_node(i) {
> ac = alien[i];
> if (ac) {
> + LIST_HEAD(to_destroy);
> +
> spin_lock_irqsave(&ac->lock, flags);
> - __drain_alien_cache(cachep, ac, i);
> + __drain_alien_cache(cachep, ac, i, &to_destroy);
> spin_unlock_irqrestore(&ac->lock, flags);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1076,17 +1093,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
> l3 = cachep->nodelists[node];
> STATS_INC_NODEFREES(cachep);
> if (l3->alien && l3->alien[nodeid]) {
> + LIST_HEAD(to_destroy);
> +
> alien = l3->alien[nodeid];
> spin_lock(&alien->lock);
> if (unlikely(alien->avail == alien->limit)) {
> STATS_INC_ACOVERFLOW(cachep);
> - __drain_alien_cache(cachep, alien, nodeid);
> + __drain_alien_cache(cachep, alien, nodeid, &to_destroy);
> }
> alien->entry[alien->avail++] = objp;
> spin_unlock(&alien->lock);
> + destroy_slabs(cachep, &to_destroy);
> } else {
> spin_lock(&(cachep->nodelists[nodeid])->list_lock);
> - free_block(cachep, &objp, 1, nodeid);
> + free_block(cachep, &objp, 1, nodeid, NULL);
> spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
> }
> return 1;
> @@ -1118,7 +1138,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> /* Free limit for this kmem_list3 */
> l3->free_limit -= cachep->batchcount;
> if (nc)
> - free_block(cachep, nc->entry, nc->avail, node);
> + free_block(cachep, nc->entry, nc->avail, node, NULL);
>
> if (!cpus_empty(*mask)) {
> spin_unlock_irq(&l3->list_lock);
> @@ -1128,7 +1148,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> shared = l3->shared;
> if (shared) {
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
> l3->shared = NULL;
> }
>
> @@ -2402,7 +2422,7 @@ static void do_drain(void *arg)
> check_irq_off();
> ac = cpu_cache_get(cachep);
> spin_lock(&cachep->nodelists[node]->list_lock);
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, NULL);
> spin_unlock(&cachep->nodelists[node]->list_lock);
> ac->avail = 0;
> }
> @@ -3410,7 +3430,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
> * Caller needs to acquire correct kmem_list's list_lock
> */
> static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> - int node)
> + int node, struct list_head *to_destroy)
> {
> int i;
> struct kmem_list3 *l3;
> @@ -3439,7 +3459,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> * a different cache, refer to comments before
> * alloc_slabmgmt.
> */
> - slab_destroy(cachep, slabp);
> + if (to_destroy)
> + list_add(&slabp->list, to_destroy);
> + else
> + slab_destroy(cachep, slabp);
> } else {
> list_add(&slabp->list, &l3->slabs_free);
> }
> @@ -3479,7 +3502,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
> }
> }
>
> - free_block(cachep, ac->entry, batchcount, node);
> + free_block(cachep, ac->entry, batchcount, node, NULL);
> free_done:
> #if STATS
> {
> @@ -3822,7 +3845,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
>
> if (shared)
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
>
> l3->shared = new_shared;
> if (!l3->alien) {
> @@ -3925,7 +3948,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
> if (!ccold)
> continue;
> spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> - free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
> + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), NULL);
> spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> kfree(ccold);
> }
> @@ -4007,7 +4030,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
> tofree = force ? ac->avail : (ac->limit + 4) / 5;
> if (tofree > ac->avail)
> tofree = (ac->avail + 1) / 2;
> - free_block(cachep, ac->entry, tofree, node);
> + free_block(cachep, ac->entry, tofree, node, NULL);
> ac->avail -= tofree;
> memmove(ac->entry, &(ac->entry[tofree]),
> sizeof(void *) * ac->avail);
>
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
@ 2009-11-23 19:10 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-23 19:10 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
On Mon, 2009-11-23 at 21:00 +0200, Pekka Enberg wrote:
> Hi Peter,
>
> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > > Uh, ok, so apparently I was right after all. There's a comment in
> > > free_block() above the slab_destroy() call that refers to the comment
> > > above alloc_slabmgmt() function definition which explains it all.
> > >
> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> > > we're allocating or freeing from. Where do we need to put the
> > > spin_lock_nested() annotation? Would it be enough to just use it in
> > > cache_free_alien() for alien->lock or do we need it in
> > > cache_flusharray() as well?
> >
> > You'd have to somehow push the nested state down from the
> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
>
> That turns out to be _very_ hard. How about something like the following
> untested patch which delays slab_destroy() while we're under nc->lock.
>
> Pekka
This seems like a lot of work to paper over a lockdep false positive in
code that should be firmly in the maintenance end of its lifecycle? I'd
rather the fix or papering over happen in lockdep.
Introducing extra cacheline pressure by passing to_destroy around also
seems like a good way to trickle away SLAB's narrow remaining
performance advantages.
>
> diff --git a/mm/slab.c b/mm/slab.c
> index 7dfa481..6f522e3 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -316,7 +316,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
> static int drain_freelist(struct kmem_cache *cache,
> struct kmem_list3 *l3, int tofree);
> static void free_block(struct kmem_cache *cachep, void **objpp, int len,
> - int node);
> + int node, struct list_head *to_destroy);
> static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
> static void cache_reap(struct work_struct *unused);
>
> @@ -1002,7 +1002,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
> }
>
> static void __drain_alien_cache(struct kmem_cache *cachep,
> - struct array_cache *ac, int node)
> + struct array_cache *ac, int node,
> + struct list_head *to_destroy)
> {
> struct kmem_list3 *rl3 = cachep->nodelists[node];
>
> @@ -1016,12 +1017,22 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
> if (rl3->shared)
> transfer_objects(rl3->shared, ac, ac->limit);
>
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, to_destroy);
> ac->avail = 0;
> spin_unlock(&rl3->list_lock);
> }
> }
>
> +static void slab_destroy(struct kmem_cache *, struct slab *);
> +
> +static void destroy_slabs(struct kmem_cache *cache, struct list_head *to_destroy)
> +{
> + struct slab *slab, *tmp;
> +
> + list_for_each_entry_safe(slab, tmp, to_destroy, list)
> + slab_destroy(cache, slab);
> +}
> +
> /*
> * Called from cache_reap() to regularly drain alien caches round robin.
> */
> @@ -1033,8 +1044,11 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
> struct array_cache *ac = l3->alien[node];
>
> if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
> - __drain_alien_cache(cachep, ac, node);
> + LIST_HEAD(to_destroy);
> +
> + __drain_alien_cache(cachep, ac, node, &to_destroy);
> spin_unlock_irq(&ac->lock);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1049,9 +1063,12 @@ static void drain_alien_cache(struct kmem_cache *cachep,
> for_each_online_node(i) {
> ac = alien[i];
> if (ac) {
> + LIST_HEAD(to_destroy);
> +
> spin_lock_irqsave(&ac->lock, flags);
> - __drain_alien_cache(cachep, ac, i);
> + __drain_alien_cache(cachep, ac, i, &to_destroy);
> spin_unlock_irqrestore(&ac->lock, flags);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1076,17 +1093,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
> l3 = cachep->nodelists[node];
> STATS_INC_NODEFREES(cachep);
> if (l3->alien && l3->alien[nodeid]) {
> + LIST_HEAD(to_destroy);
> +
> alien = l3->alien[nodeid];
> spin_lock(&alien->lock);
> if (unlikely(alien->avail == alien->limit)) {
> STATS_INC_ACOVERFLOW(cachep);
> - __drain_alien_cache(cachep, alien, nodeid);
> + __drain_alien_cache(cachep, alien, nodeid, &to_destroy);
> }
> alien->entry[alien->avail++] = objp;
> spin_unlock(&alien->lock);
> + destroy_slabs(cachep, &to_destroy);
> } else {
> spin_lock(&(cachep->nodelists[nodeid])->list_lock);
> - free_block(cachep, &objp, 1, nodeid);
> + free_block(cachep, &objp, 1, nodeid, NULL);
> spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
> }
> return 1;
> @@ -1118,7 +1138,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> /* Free limit for this kmem_list3 */
> l3->free_limit -= cachep->batchcount;
> if (nc)
> - free_block(cachep, nc->entry, nc->avail, node);
> + free_block(cachep, nc->entry, nc->avail, node, NULL);
>
> if (!cpus_empty(*mask)) {
> spin_unlock_irq(&l3->list_lock);
> @@ -1128,7 +1148,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> shared = l3->shared;
> if (shared) {
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
> l3->shared = NULL;
> }
>
> @@ -2402,7 +2422,7 @@ static void do_drain(void *arg)
> check_irq_off();
> ac = cpu_cache_get(cachep);
> spin_lock(&cachep->nodelists[node]->list_lock);
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, NULL);
> spin_unlock(&cachep->nodelists[node]->list_lock);
> ac->avail = 0;
> }
> @@ -3410,7 +3430,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
> * Caller needs to acquire correct kmem_list's list_lock
> */
> static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> - int node)
> + int node, struct list_head *to_destroy)
> {
> int i;
> struct kmem_list3 *l3;
> @@ -3439,7 +3459,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> * a different cache, refer to comments before
> * alloc_slabmgmt.
> */
> - slab_destroy(cachep, slabp);
> + if (to_destroy)
> + list_add(&slabp->list, to_destroy);
> + else
> + slab_destroy(cachep, slabp);
> } else {
> list_add(&slabp->list, &l3->slabs_free);
> }
> @@ -3479,7 +3502,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
> }
> }
>
> - free_block(cachep, ac->entry, batchcount, node);
> + free_block(cachep, ac->entry, batchcount, node, NULL);
> free_done:
> #if STATS
> {
> @@ -3822,7 +3845,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
>
> if (shared)
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
>
> l3->shared = new_shared;
> if (!l3->alien) {
> @@ -3925,7 +3948,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
> if (!ccold)
> continue;
> spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> - free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
> + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), NULL);
> spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> kfree(ccold);
> }
> @@ -4007,7 +4030,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
> tofree = force ? ac->avail : (ac->limit + 4) / 5;
> if (tofree > ac->avail)
> tofree = (ac->avail + 1) / 2;
> - free_block(cachep, ac->entry, tofree, node);
> + free_block(cachep, ac->entry, tofree, node, NULL);
> ac->avail -= tofree;
> memmove(ac->entry, &(ac->entry[tofree]),
> sizeof(void *) * ac->avail);
>
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
2009-11-23 19:10 ` Matt Mackall
@ 2009-11-23 19:13 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-23 19:13 UTC (permalink / raw)
To: Matt Mackall; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
Matt Mackall wrote:
> This seems like a lot of work to paper over a lockdep false positive in
> code that should be firmly in the maintenance end of its lifecycle? I'd
> rather the fix or papering over happen in lockdep.
True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
state is pretty invasive because of the kmem_cache_free() call in
slab_destroy(). We re-enter the slab allocator from the outer edges
which makes spin_lock_nested() very inconvenient.
> Introducing extra cacheline pressure by passing to_destroy around also
> seems like a good way to trickle away SLAB's narrow remaining
> performance advantages.
We can probably fix that to affect CONFIG_NUMA only which sucks already.
Pekka
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-23 19:13 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-23 19:13 UTC (permalink / raw)
To: Matt Mackall; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
Matt Mackall wrote:
> This seems like a lot of work to paper over a lockdep false positive in
> code that should be firmly in the maintenance end of its lifecycle? I'd
> rather the fix or papering over happen in lockdep.
True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
state is pretty invasive because of the kmem_cache_free() call in
slab_destroy(). We re-enter the slab allocator from the outer edges
which makes spin_lock_nested() very inconvenient.
> Introducing extra cacheline pressure by passing to_destroy around also
> seems like a good way to trickle away SLAB's narrow remaining
> performance advantages.
We can probably fix that to affect CONFIG_NUMA only which sucks already.
Pekka
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-23 19:13 ` Pekka Enberg
@ 2009-11-24 16:33 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 16:33 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Matt Mackall, paulmck, linux-mm, cl, LKML, Nick Piggin
On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> Matt Mackall wrote:
> > This seems like a lot of work to paper over a lockdep false positive in
> > code that should be firmly in the maintenance end of its lifecycle? I'd
> > rather the fix or papering over happen in lockdep.
>
> True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> state is pretty invasive because of the kmem_cache_free() call in
> slab_destroy(). We re-enter the slab allocator from the outer edges
> which makes spin_lock_nested() very inconvenient.
I'm perfectly fine with letting the thing be as it is, its apparently
not something that triggers very often, and since slab will be killed
off soon, who cares.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 16:33 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 16:33 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Matt Mackall, paulmck, linux-mm, cl, LKML, Nick Piggin
On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> Matt Mackall wrote:
> > This seems like a lot of work to paper over a lockdep false positive in
> > code that should be firmly in the maintenance end of its lifecycle? I'd
> > rather the fix or papering over happen in lockdep.
>
> True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> state is pretty invasive because of the kmem_cache_free() call in
> slab_destroy(). We re-enter the slab allocator from the outer edges
> which makes spin_lock_nested() very inconvenient.
I'm perfectly fine with letting the thing be as it is, its apparently
not something that triggers very often, and since slab will be killed
off soon, who cares.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 16:33 ` Peter Zijlstra
@ 2009-11-24 17:00 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 17:00 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Pekka Enberg, Matt Mackall, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > Matt Mackall wrote:
> > > This seems like a lot of work to paper over a lockdep false positive in
> > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > rather the fix or papering over happen in lockdep.
> >
> > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > state is pretty invasive because of the kmem_cache_free() call in
> > slab_destroy(). We re-enter the slab allocator from the outer edges
> > which makes spin_lock_nested() very inconvenient.
>
> I'm perfectly fine with letting the thing be as it is, its apparently
> not something that triggers very often, and since slab will be killed
> off soon, who cares.
Which of the alternatives to slab should I be testing with, then?
[Ducks, runs away.]
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 17:00 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 17:00 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Pekka Enberg, Matt Mackall, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > Matt Mackall wrote:
> > > This seems like a lot of work to paper over a lockdep false positive in
> > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > rather the fix or papering over happen in lockdep.
> >
> > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > state is pretty invasive because of the kmem_cache_free() call in
> > slab_destroy(). We re-enter the slab allocator from the outer edges
> > which makes spin_lock_nested() very inconvenient.
>
> I'm perfectly fine with letting the thing be as it is, its apparently
> not something that triggers very often, and since slab will be killed
> off soon, who cares.
Which of the alternatives to slab should I be testing with, then?
[Ducks, runs away.]
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 17:00 ` Paul E. McKenney
@ 2009-11-24 17:12 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 17:12 UTC (permalink / raw)
To: paulmck; +Cc: Peter Zijlstra, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > Matt Mackall wrote:
> > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > rather the fix or papering over happen in lockdep.
> > >
> > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > state is pretty invasive because of the kmem_cache_free() call in
> > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > which makes spin_lock_nested() very inconvenient.
> >
> > I'm perfectly fine with letting the thing be as it is, its apparently
> > not something that triggers very often, and since slab will be killed
> > off soon, who cares.
>
> Which of the alternatives to slab should I be testing with, then?
I'm guessing your system is in the minority that has more than $10 worth
of RAM, which means you should probably be evaluating SLUB.
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 17:12 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 17:12 UTC (permalink / raw)
To: paulmck; +Cc: Peter Zijlstra, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > Matt Mackall wrote:
> > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > rather the fix or papering over happen in lockdep.
> > >
> > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > state is pretty invasive because of the kmem_cache_free() call in
> > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > which makes spin_lock_nested() very inconvenient.
> >
> > I'm perfectly fine with letting the thing be as it is, its apparently
> > not something that triggers very often, and since slab will be killed
> > off soon, who cares.
>
> Which of the alternatives to slab should I be testing with, then?
I'm guessing your system is in the minority that has more than $10 worth
of RAM, which means you should probably be evaluating SLUB.
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 17:12 ` Matt Mackall
@ 2009-11-24 17:58 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 17:58 UTC (permalink / raw)
To: Matt Mackall
Cc: Peter Zijlstra, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 11:12:36AM -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > Matt Mackall wrote:
> > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > rather the fix or papering over happen in lockdep.
> > > >
> > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > which makes spin_lock_nested() very inconvenient.
> > >
> > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > not something that triggers very often, and since slab will be killed
> > > off soon, who cares.
> >
> > Which of the alternatives to slab should I be testing with, then?
>
> I'm guessing your system is in the minority that has more than $10 worth
> of RAM, which means you should probably be evaluating SLUB.
I have one nomination for SLUB. I have started a short test run.
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 17:58 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 17:58 UTC (permalink / raw)
To: Matt Mackall
Cc: Peter Zijlstra, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 11:12:36AM -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > Matt Mackall wrote:
> > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > rather the fix or papering over happen in lockdep.
> > > >
> > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > which makes spin_lock_nested() very inconvenient.
> > >
> > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > not something that triggers very often, and since slab will be killed
> > > off soon, who cares.
> >
> > Which of the alternatives to slab should I be testing with, then?
>
> I'm guessing your system is in the minority that has more than $10 worth
> of RAM, which means you should probably be evaluating SLUB.
I have one nomination for SLUB. I have started a short test run.
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 17:12 ` Matt Mackall
@ 2009-11-24 18:14 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 18:14 UTC (permalink / raw)
To: Matt Mackall; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > Matt Mackall wrote:
> > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > rather the fix or papering over happen in lockdep.
> > > >
> > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > which makes spin_lock_nested() very inconvenient.
> > >
> > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > not something that triggers very often, and since slab will be killed
> > > off soon, who cares.
> >
> > Which of the alternatives to slab should I be testing with, then?
>
> I'm guessing your system is in the minority that has more than $10 worth
> of RAM, which means you should probably be evaluating SLUB.
Well, I was rather hoping that'd die too ;-)
Weren't we going to go with SLQB?
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 18:14 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 18:14 UTC (permalink / raw)
To: Matt Mackall; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > Matt Mackall wrote:
> > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > rather the fix or papering over happen in lockdep.
> > > >
> > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > which makes spin_lock_nested() very inconvenient.
> > >
> > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > not something that triggers very often, and since slab will be killed
> > > off soon, who cares.
> >
> > Which of the alternatives to slab should I be testing with, then?
>
> I'm guessing your system is in the minority that has more than $10 worth
> of RAM, which means you should probably be evaluating SLUB.
Well, I was rather hoping that'd die too ;-)
Weren't we going to go with SLQB?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 18:14 ` Peter Zijlstra
@ 2009-11-24 18:25 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 18:25 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 07:14:19PM +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > > Matt Mackall wrote:
> > > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > > rather the fix or papering over happen in lockdep.
> > > > >
> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > > which makes spin_lock_nested() very inconvenient.
> > > >
> > > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > > not something that triggers very often, and since slab will be killed
> > > > off soon, who cares.
> > >
> > > Which of the alternatives to slab should I be testing with, then?
> >
> > I'm guessing your system is in the minority that has more than $10 worth
> > of RAM, which means you should probably be evaluating SLUB.
>
> Well, I was rather hoping that'd die too ;-)
>
> Weren't we going to go with SLQB?
Well, I suppose I could make my scripts randomly choose the memory
allocator, but I would rather not. ;-)
More seriously, I do have a number of configurations that I test, and I
suppose I can chose different allocators for the different configurations.
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 18:25 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 18:25 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 07:14:19PM +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > > Matt Mackall wrote:
> > > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > > rather the fix or papering over happen in lockdep.
> > > > >
> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > > which makes spin_lock_nested() very inconvenient.
> > > >
> > > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > > not something that triggers very often, and since slab will be killed
> > > > off soon, who cares.
> > >
> > > Which of the alternatives to slab should I be testing with, then?
> >
> > I'm guessing your system is in the minority that has more than $10 worth
> > of RAM, which means you should probably be evaluating SLUB.
>
> Well, I was rather hoping that'd die too ;-)
>
> Weren't we going to go with SLQB?
Well, I suppose I could make my scripts randomly choose the memory
allocator, but I would rather not. ;-)
More seriously, I do have a number of configurations that I test, and I
suppose I can chose different allocators for the different configurations.
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 18:25 ` Paul E. McKenney
@ 2009-11-24 18:31 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 18:31 UTC (permalink / raw)
To: paulmck; +Cc: Matt Mackall, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 10:25 -0800, Paul E. McKenney wrote:
> Well, I suppose I could make my scripts randomly choose the memory
> allocator, but I would rather not. ;-)
Which is why I hope we'll soon be down to 2, SLOB for tiny systems and
SLQB for the rest of us, having 3 in-tree and 1 pending is pure and
simple insanity.
Preferably SLQB will be small enough to also be able to get rid of SLOB,
but I've not recently seen any data on that particular issue.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 18:31 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 18:31 UTC (permalink / raw)
To: paulmck; +Cc: Matt Mackall, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 10:25 -0800, Paul E. McKenney wrote:
> Well, I suppose I could make my scripts randomly choose the memory
> allocator, but I would rather not. ;-)
Which is why I hope we'll soon be down to 2, SLOB for tiny systems and
SLQB for the rest of us, having 3 in-tree and 1 pending is pure and
simple insanity.
Preferably SLQB will be small enough to also be able to get rid of SLOB,
but I've not recently seen any data on that particular issue.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 18:31 ` Peter Zijlstra
@ 2009-11-24 18:53 ` Christoph Lameter
-1 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-24 18:53 UTC (permalink / raw)
To: Peter Zijlstra
Cc: paulmck, Matt Mackall, Pekka Enberg, linux-mm, LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 10:25 -0800, Paul E. McKenney wrote:
>
> > Well, I suppose I could make my scripts randomly choose the memory
> > allocator, but I would rather not. ;-)
>
> Which is why I hope we'll soon be down to 2, SLOB for tiny systems and
> SLQB for the rest of us, having 3 in-tree and 1 pending is pure and
> simple insanity.
>
> Preferably SLQB will be small enough to also be able to get rid of SLOB,
> but I've not recently seen any data on that particular issue.
We have some issues with NUMA in SLQB. Memoryless node support needs to
get some work. The fixes of memoryless node support to SLAB by Lee
create another case where SLQB will be regressing against SLAB.
Multiple modifications of per cpu variables in allocators other than SLUB
means that interruptless fastpath is going to be difficult to realize and
may continue to cause rt issues with preemption and per cpu handling.
Memory consumption of SLQB is better??
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 18:53 ` Christoph Lameter
0 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-24 18:53 UTC (permalink / raw)
To: Peter Zijlstra
Cc: paulmck, Matt Mackall, Pekka Enberg, linux-mm, LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 10:25 -0800, Paul E. McKenney wrote:
>
> > Well, I suppose I could make my scripts randomly choose the memory
> > allocator, but I would rather not. ;-)
>
> Which is why I hope we'll soon be down to 2, SLOB for tiny systems and
> SLQB for the rest of us, having 3 in-tree and 1 pending is pure and
> simple insanity.
>
> Preferably SLQB will be small enough to also be able to get rid of SLOB,
> but I've not recently seen any data on that particular issue.
We have some issues with NUMA in SLQB. Memoryless node support needs to
get some work. The fixes of memoryless node support to SLAB by Lee
create another case where SLQB will be regressing against SLAB.
Multiple modifications of per cpu variables in allocators other than SLUB
means that interruptless fastpath is going to be difficult to realize and
may continue to cause rt issues with preemption and per cpu handling.
Memory consumption of SLQB is better??
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 18:31 ` Peter Zijlstra
@ 2009-11-24 18:54 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 18:54 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 07:31:51PM +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 10:25 -0800, Paul E. McKenney wrote:
>
> > Well, I suppose I could make my scripts randomly choose the memory
> > allocator, but I would rather not. ;-)
>
> Which is why I hope we'll soon be down to 2, SLOB for tiny systems and
> SLQB for the rest of us, having 3 in-tree and 1 pending is pure and
> simple insanity.
So I should start specifying SLOB for my TINY_RCU tests, then.
> Preferably SLQB will be small enough to also be able to get rid of SLOB,
> but I've not recently seen any data on that particular issue.
Given the existence of TINY_RCU, I would look pretty funny if I insisted
on but a single implementation of core subsystems. ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 18:54 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 18:54 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 07:31:51PM +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 10:25 -0800, Paul E. McKenney wrote:
>
> > Well, I suppose I could make my scripts randomly choose the memory
> > allocator, but I would rather not. ;-)
>
> Which is why I hope we'll soon be down to 2, SLOB for tiny systems and
> SLQB for the rest of us, having 3 in-tree and 1 pending is pure and
> simple insanity.
So I should start specifying SLOB for my TINY_RCU tests, then.
> Preferably SLQB will be small enough to also be able to get rid of SLOB,
> but I've not recently seen any data on that particular issue.
Given the existence of TINY_RCU, I would look pretty funny if I insisted
on but a single implementation of core subsystems. ;-)
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 18:14 ` Peter Zijlstra
@ 2009-11-24 19:23 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 19:23 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > > Matt Mackall wrote:
> > > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > > rather the fix or papering over happen in lockdep.
> > > > >
> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > > which makes spin_lock_nested() very inconvenient.
> > > >
> > > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > > not something that triggers very often, and since slab will be killed
> > > > off soon, who cares.
> > >
> > > Which of the alternatives to slab should I be testing with, then?
> >
> > I'm guessing your system is in the minority that has more than $10 worth
> > of RAM, which means you should probably be evaluating SLUB.
>
> Well, I was rather hoping that'd die too ;-)
>
> Weren't we going to go with SLQB?
News to me. Perhaps it was discussed at KS.
My understanding of the current state of play is:
SLUB: default allocator
SLAB: deep maintenance, will be removed if SLUB ever covers remaining
performance regressions
SLOB: useful for low-end (but high-volume!) embedded
SLQB: sitting in slab.git#for-next for months, has some ground to cover
SLQB and SLUB have pretty similar target audiences, so I agree we should
eventually have only one of them. But I strongly expect performance
results to be mixed, just as they have been comparing SLUB/SLAB.
Similarly, SLQB still has of room for tuning left compared to SLUB, as
SLUB did compared to SLAB when it first emerged. It might be a while
before a clear winner emerges.
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 19:23 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 19:23 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > > Matt Mackall wrote:
> > > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > > rather the fix or papering over happen in lockdep.
> > > > >
> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > > which makes spin_lock_nested() very inconvenient.
> > > >
> > > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > > not something that triggers very often, and since slab will be killed
> > > > off soon, who cares.
> > >
> > > Which of the alternatives to slab should I be testing with, then?
> >
> > I'm guessing your system is in the minority that has more than $10 worth
> > of RAM, which means you should probably be evaluating SLUB.
>
> Well, I was rather hoping that'd die too ;-)
>
> Weren't we going to go with SLQB?
News to me. Perhaps it was discussed at KS.
My understanding of the current state of play is:
SLUB: default allocator
SLAB: deep maintenance, will be removed if SLUB ever covers remaining
performance regressions
SLOB: useful for low-end (but high-volume!) embedded
SLQB: sitting in slab.git#for-next for months, has some ground to cover
SLQB and SLUB have pretty similar target audiences, so I agree we should
eventually have only one of them. But I strongly expect performance
results to be mixed, just as they have been comparing SLUB/SLAB.
Similarly, SLQB still has of room for tuning left compared to SLUB, as
SLUB did compared to SLAB when it first emerged. It might be a while
before a clear winner emerges.
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 19:23 ` Matt Mackall
@ 2009-11-24 19:50 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 19:50 UTC (permalink / raw)
To: Matt Mackall
Cc: Peter Zijlstra, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 01:23:35PM -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
> > On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> > > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > > > Matt Mackall wrote:
> > > > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > > > rather the fix or papering over happen in lockdep.
> > > > > >
> > > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > > > which makes spin_lock_nested() very inconvenient.
> > > > >
> > > > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > > > not something that triggers very often, and since slab will be killed
> > > > > off soon, who cares.
> > > >
> > > > Which of the alternatives to slab should I be testing with, then?
> > >
> > > I'm guessing your system is in the minority that has more than $10 worth
> > > of RAM, which means you should probably be evaluating SLUB.
> >
> > Well, I was rather hoping that'd die too ;-)
> >
> > Weren't we going to go with SLQB?
>
> News to me. Perhaps it was discussed at KS.
>
> My understanding of the current state of play is:
>
> SLUB: default allocator
Not on all architectures, it appears.
> SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> performance regressions
;-)
> SLOB: useful for low-end (but high-volume!) embedded
And unfortunately also depends on CONFIG_EMBEDDED, making it difficult
for me to test on the available machines. My usual workaround is to
patch Kconfig to remove the dependency.
> SLQB: sitting in slab.git#for-next for months, has some ground to cover
I will hold off testing this until it hits mainline, especially if it is
where KS decided to go.
> SLQB and SLUB have pretty similar target audiences, so I agree we should
> eventually have only one of them. But I strongly expect performance
> results to be mixed, just as they have been comparing SLUB/SLAB.
> Similarly, SLQB still has of room for tuning left compared to SLUB, as
> SLUB did compared to SLAB when it first emerged. It might be a while
> before a clear winner emerges.
Those how live by the heuristic, die by the heuristic!!! ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 19:50 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 19:50 UTC (permalink / raw)
To: Matt Mackall
Cc: Peter Zijlstra, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 01:23:35PM -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
> > On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> > > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> > > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> > > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> > > > > > Matt Mackall wrote:
> > > > > > > This seems like a lot of work to paper over a lockdep false positive in
> > > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> > > > > > > rather the fix or papering over happen in lockdep.
> > > > > >
> > > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> > > > > > state is pretty invasive because of the kmem_cache_free() call in
> > > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> > > > > > which makes spin_lock_nested() very inconvenient.
> > > > >
> > > > > I'm perfectly fine with letting the thing be as it is, its apparently
> > > > > not something that triggers very often, and since slab will be killed
> > > > > off soon, who cares.
> > > >
> > > > Which of the alternatives to slab should I be testing with, then?
> > >
> > > I'm guessing your system is in the minority that has more than $10 worth
> > > of RAM, which means you should probably be evaluating SLUB.
> >
> > Well, I was rather hoping that'd die too ;-)
> >
> > Weren't we going to go with SLQB?
>
> News to me. Perhaps it was discussed at KS.
>
> My understanding of the current state of play is:
>
> SLUB: default allocator
Not on all architectures, it appears.
> SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> performance regressions
;-)
> SLOB: useful for low-end (but high-volume!) embedded
And unfortunately also depends on CONFIG_EMBEDDED, making it difficult
for me to test on the available machines. My usual workaround is to
patch Kconfig to remove the dependency.
> SLQB: sitting in slab.git#for-next for months, has some ground to cover
I will hold off testing this until it hits mainline, especially if it is
where KS decided to go.
> SLQB and SLUB have pretty similar target audiences, so I agree we should
> eventually have only one of them. But I strongly expect performance
> results to be mixed, just as they have been comparing SLUB/SLAB.
> Similarly, SLQB still has of room for tuning left compared to SLUB, as
> SLUB did compared to SLAB when it first emerged. It might be a while
> before a clear winner emerges.
Those how live by the heuristic, die by the heuristic!!! ;-)
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 19:23 ` Matt Mackall
@ 2009-11-24 20:46 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 20:46 UTC (permalink / raw)
To: Matt Mackall; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 13:23 -0600, Matt Mackall wrote:
> My understanding of the current state of play is:
>
> SLUB: default allocator
> SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> performance regressions
> SLOB: useful for low-end (but high-volume!) embedded
> SLQB: sitting in slab.git#for-next for months, has some ground to cover
>
> SLQB and SLUB have pretty similar target audiences, so I agree we should
> eventually have only one of them. But I strongly expect performance
> results to be mixed, just as they have been comparing SLUB/SLAB.
> Similarly, SLQB still has of room for tuning left compared to SLUB, as
> SLUB did compared to SLAB when it first emerged. It might be a while
> before a clear winner emerges.
And as long as we drag out this madness nothing will change I suspect.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 20:46 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 20:46 UTC (permalink / raw)
To: Matt Mackall; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 13:23 -0600, Matt Mackall wrote:
> My understanding of the current state of play is:
>
> SLUB: default allocator
> SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> performance regressions
> SLOB: useful for low-end (but high-volume!) embedded
> SLQB: sitting in slab.git#for-next for months, has some ground to cover
>
> SLQB and SLUB have pretty similar target audiences, so I agree we should
> eventually have only one of them. But I strongly expect performance
> results to be mixed, just as they have been comparing SLUB/SLAB.
> Similarly, SLQB still has of room for tuning left compared to SLUB, as
> SLUB did compared to SLAB when it first emerged. It might be a while
> before a clear winner emerges.
And as long as we drag out this madness nothing will change I suspect.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 20:46 ` Peter Zijlstra
@ 2009-11-24 20:53 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 20:53 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 21:46 +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 13:23 -0600, Matt Mackall wrote:
>
> > My understanding of the current state of play is:
> >
> > SLUB: default allocator
> > SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> > performance regressions
> > SLOB: useful for low-end (but high-volume!) embedded
> > SLQB: sitting in slab.git#for-next for months, has some ground to cover
> >
> > SLQB and SLUB have pretty similar target audiences, so I agree we should
> > eventually have only one of them. But I strongly expect performance
> > results to be mixed, just as they have been comparing SLUB/SLAB.
> > Similarly, SLQB still has of room for tuning left compared to SLUB, as
> > SLUB did compared to SLAB when it first emerged. It might be a while
> > before a clear winner emerges.
>
> And as long as we drag out this madness nothing will change I suspect.
If there's a proposal here, it's not clear what it is.
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 20:53 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 20:53 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 21:46 +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 13:23 -0600, Matt Mackall wrote:
>
> > My understanding of the current state of play is:
> >
> > SLUB: default allocator
> > SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> > performance regressions
> > SLOB: useful for low-end (but high-volume!) embedded
> > SLQB: sitting in slab.git#for-next for months, has some ground to cover
> >
> > SLQB and SLUB have pretty similar target audiences, so I agree we should
> > eventually have only one of them. But I strongly expect performance
> > results to be mixed, just as they have been comparing SLUB/SLAB.
> > Similarly, SLQB still has of room for tuning left compared to SLUB, as
> > SLUB did compared to SLAB when it first emerged. It might be a while
> > before a clear winner emerges.
>
> And as long as we drag out this madness nothing will change I suspect.
If there's a proposal here, it's not clear what it is.
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 20:53 ` Matt Mackall
@ 2009-11-24 21:01 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:01 UTC (permalink / raw)
To: Matt Mackall; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 14:53 -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 21:46 +0100, Peter Zijlstra wrote:
> > On Tue, 2009-11-24 at 13:23 -0600, Matt Mackall wrote:
> >
> > > My understanding of the current state of play is:
> > >
> > > SLUB: default allocator
> > > SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> > > performance regressions
> > > SLOB: useful for low-end (but high-volume!) embedded
> > > SLQB: sitting in slab.git#for-next for months, has some ground to cover
> > >
> > > SLQB and SLUB have pretty similar target audiences, so I agree we should
> > > eventually have only one of them. But I strongly expect performance
> > > results to be mixed, just as they have been comparing SLUB/SLAB.
> > > Similarly, SLQB still has of room for tuning left compared to SLUB, as
> > > SLUB did compared to SLAB when it first emerged. It might be a while
> > > before a clear winner emerges.
> >
> > And as long as we drag out this madness nothing will change I suspect.
>
> If there's a proposal here, it's not clear what it is.
Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
As long as people have a choice they'll not even try new stuff and if
they do they'll change to the old one as soon as they find an issue, not
even bothering to report, let alone expend effort fixing it.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:01 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:01 UTC (permalink / raw)
To: Matt Mackall; +Cc: paulmck, Pekka Enberg, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 14:53 -0600, Matt Mackall wrote:
> On Tue, 2009-11-24 at 21:46 +0100, Peter Zijlstra wrote:
> > On Tue, 2009-11-24 at 13:23 -0600, Matt Mackall wrote:
> >
> > > My understanding of the current state of play is:
> > >
> > > SLUB: default allocator
> > > SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> > > performance regressions
> > > SLOB: useful for low-end (but high-volume!) embedded
> > > SLQB: sitting in slab.git#for-next for months, has some ground to cover
> > >
> > > SLQB and SLUB have pretty similar target audiences, so I agree we should
> > > eventually have only one of them. But I strongly expect performance
> > > results to be mixed, just as they have been comparing SLUB/SLAB.
> > > Similarly, SLQB still has of room for tuning left compared to SLUB, as
> > > SLUB did compared to SLAB when it first emerged. It might be a while
> > > before a clear winner emerges.
> >
> > And as long as we drag out this madness nothing will change I suspect.
>
> If there's a proposal here, it's not clear what it is.
Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
As long as people have a choice they'll not even try new stuff and if
they do they'll change to the old one as soon as they find an issue, not
even bothering to report, let alone expend effort fixing it.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:01 ` Peter Zijlstra
@ 2009-11-24 21:03 ` David Rientjes
-1 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-24 21:03 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
>
slqb still has a 5-10% performance regression compared to slab for
benchmarks such as netperf TCP_RR on machines with high cpu counts,
forcing that type of regression isn't acceptable.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:03 ` David Rientjes
0 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-24 21:03 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
>
slqb still has a 5-10% performance regression compared to slab for
benchmarks such as netperf TCP_RR on machines with high cpu counts,
forcing that type of regression isn't acceptable.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:03 ` David Rientjes
@ 2009-11-24 21:12 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:12 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 2009-11-24 at 13:03 -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>
> > Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
> >
>
> slqb still has a 5-10% performance regression compared to slab for
> benchmarks such as netperf TCP_RR on machines with high cpu counts,
> forcing that type of regression isn't acceptable.
Having _4_ slab allocators is equally unacceptable.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:12 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:12 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 2009-11-24 at 13:03 -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>
> > Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
> >
>
> slqb still has a 5-10% performance regression compared to slab for
> benchmarks such as netperf TCP_RR on machines with high cpu counts,
> forcing that type of regression isn't acceptable.
Having _4_ slab allocators is equally unacceptable.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:12 ` Peter Zijlstra
@ 2009-11-24 21:19 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 21:19 UTC (permalink / raw)
To: Peter Zijlstra
Cc: David Rientjes, Matt Mackall, paulmck, linux-mm,
Christoph Lameter, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 11:12 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2009-11-24 at 13:03 -0800, David Rientjes wrote:
>> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>>
>> > Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
>> >
>>
>> slqb still has a 5-10% performance regression compared to slab for
>> benchmarks such as netperf TCP_RR on machines with high cpu counts,
>> forcing that type of regression isn't acceptable.
>
> Having _4_ slab allocators is equally unacceptable.
The whole idea behind merging SLQB is to see if it can replace SLAB.
If it can't do that in few kernel releases, we're pulling it out. It's
as simple as that.
And if SLQB can replace SLAB, then we start to talk about replacing SLUB too...
Pekka
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:19 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 21:19 UTC (permalink / raw)
To: Peter Zijlstra
Cc: David Rientjes, Matt Mackall, paulmck, linux-mm,
Christoph Lameter, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 11:12 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2009-11-24 at 13:03 -0800, David Rientjes wrote:
>> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>>
>> > Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
>> >
>>
>> slqb still has a 5-10% performance regression compared to slab for
>> benchmarks such as netperf TCP_RR on machines with high cpu counts,
>> forcing that type of regression isn't acceptable.
>
> Having _4_ slab allocators is equally unacceptable.
The whole idea behind merging SLQB is to see if it can replace SLAB.
If it can't do that in few kernel releases, we're pulling it out. It's
as simple as that.
And if SLQB can replace SLAB, then we start to talk about replacing SLUB too...
Pekka
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:12 ` Peter Zijlstra
@ 2009-11-24 21:22 ` David Rientjes
-1 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-24 21:22 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> > slqb still has a 5-10% performance regression compared to slab for
> > benchmarks such as netperf TCP_RR on machines with high cpu counts,
> > forcing that type of regression isn't acceptable.
>
> Having _4_ slab allocators is equally unacceptable.
>
So you just advocated to merging slqb so that it gets more testing and
development, and then use its inclusion in a statistic to say we should
remove others solely because the space is too cluttered?
We use slab partially because the regression in slub was too severe for
some of our benchmarks, and while CONFIG_SLUB may be the kernel default
there are still distros that use slab as the default as well. We cannot
simply remove an allocator that is superior to others because it is old or
has increased complexity.
I'd suggest looking at how widely used slob is and whether it has a
significant advantage over slub. We'd then have two allocators for
specialized workloads (and slub is much better for diagnostics) and one in
development.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:22 ` David Rientjes
0 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-24 21:22 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> > slqb still has a 5-10% performance regression compared to slab for
> > benchmarks such as netperf TCP_RR on machines with high cpu counts,
> > forcing that type of regression isn't acceptable.
>
> Having _4_ slab allocators is equally unacceptable.
>
So you just advocated to merging slqb so that it gets more testing and
development, and then use its inclusion in a statistic to say we should
remove others solely because the space is too cluttered?
We use slab partially because the regression in slub was too severe for
some of our benchmarks, and while CONFIG_SLUB may be the kernel default
there are still distros that use slab as the default as well. We cannot
simply remove an allocator that is superior to others because it is old or
has increased complexity.
I'd suggest looking at how widely used slob is and whether it has a
significant advantage over slub. We'd then have two allocators for
specialized workloads (and slub is much better for diagnostics) and one in
development.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:22 ` David Rientjes
@ 2009-11-24 21:35 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:35 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 2009-11-24 at 13:22 -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>
> > > slqb still has a 5-10% performance regression compared to slab for
> > > benchmarks such as netperf TCP_RR on machines with high cpu counts,
> > > forcing that type of regression isn't acceptable.
> >
> > Having _4_ slab allocators is equally unacceptable.
> >
>
> So you just advocated to merging slqb so that it gets more testing and
> development, and then use its inclusion in a statistic to say we should
> remove others solely because the space is too cluttered?
We should cull something, just merging more and more of them is useless
and wastes everybody's time since you have to add features and
interfaces to all of them.
> We use slab partially because the regression in slub was too severe for
> some of our benchmarks, and while CONFIG_SLUB may be the kernel default
> there are still distros that use slab as the default as well. We cannot
> simply remove an allocator that is superior to others because it is old or
> has increased complexity.
Then maybe we should toss SLUB? But then there's people who say SLUB is
better for them. Without forcing something to happen we'll be stuck with
multiple allocators forever.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:35 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:35 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 2009-11-24 at 13:22 -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>
> > > slqb still has a 5-10% performance regression compared to slab for
> > > benchmarks such as netperf TCP_RR on machines with high cpu counts,
> > > forcing that type of regression isn't acceptable.
> >
> > Having _4_ slab allocators is equally unacceptable.
> >
>
> So you just advocated to merging slqb so that it gets more testing and
> development, and then use its inclusion in a statistic to say we should
> remove others solely because the space is too cluttered?
We should cull something, just merging more and more of them is useless
and wastes everybody's time since you have to add features and
interfaces to all of them.
> We use slab partially because the regression in slub was too severe for
> some of our benchmarks, and while CONFIG_SLUB may be the kernel default
> there are still distros that use slab as the default as well. We cannot
> simply remove an allocator that is superior to others because it is old or
> has increased complexity.
Then maybe we should toss SLUB? But then there's people who say SLUB is
better for them. Without forcing something to happen we'll be stuck with
multiple allocators forever.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:35 ` Peter Zijlstra
@ 2009-11-24 21:46 ` David Rientjes
-1 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-24 21:46 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> We should cull something, just merging more and more of them is useless
> and wastes everybody's time since you have to add features and
> interfaces to all of them.
>
I agree, but it's difficult to get widespread testing or development
interest in an allocator that is sitting outside of mainline. I don't
think any allocator could suddenly be merged as the kernel default, it
seems like a prerequisite to go through the preliminary merging and
development. The severe netperf TCP_RR regression that slub has compared
to slab was never found before it became the default allocator, otherwise
there would probably have been more effort into its development as well.
Unfortunately, slub's design is such that it will probably never be able
to nullify the partial slab thrashing enough, even with the percpu counter
speedup that is now available because of Christoph's work, to make TCP_RR
perform as well as slab.
> Then maybe we should toss SLUB? But then there's people who say SLUB is
> better for them. Without forcing something to happen we'll be stuck with
> multiple allocators forever.
>
Slub is definitely superior in diagnostics and is a much simpler design
than slab. I think it would be much easier to remove slub than slab,
though, simply because there are no great slab performance degradations
compared to slub. I think the best candidate for removal might be slob,
however, because it hasn't been compared to slub and usage may not be as
widespread as expected for such a special case allocator.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:46 ` David Rientjes
0 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-24 21:46 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, Pekka Enberg, linux-mm, Christoph Lameter,
LKML, Nick Piggin
On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> We should cull something, just merging more and more of them is useless
> and wastes everybody's time since you have to add features and
> interfaces to all of them.
>
I agree, but it's difficult to get widespread testing or development
interest in an allocator that is sitting outside of mainline. I don't
think any allocator could suddenly be merged as the kernel default, it
seems like a prerequisite to go through the preliminary merging and
development. The severe netperf TCP_RR regression that slub has compared
to slab was never found before it became the default allocator, otherwise
there would probably have been more effort into its development as well.
Unfortunately, slub's design is such that it will probably never be able
to nullify the partial slab thrashing enough, even with the percpu counter
speedup that is now available because of Christoph's work, to make TCP_RR
perform as well as slab.
> Then maybe we should toss SLUB? But then there's people who say SLUB is
> better for them. Without forcing something to happen we'll be stuck with
> multiple allocators forever.
>
Slub is definitely superior in diagnostics and is a much simpler design
than slab. I think it would be much easier to remove slub than slab,
though, simply because there are no great slab performance degradations
compared to slub. I think the best candidate for removal might be slob,
however, because it hasn't been compared to slub and usage may not be as
widespread as expected for such a special case allocator.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:46 ` David Rientjes
@ 2009-11-24 22:23 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 22:23 UTC (permalink / raw)
To: David Rientjes
Cc: Peter Zijlstra, Matt Mackall, Pekka Enberg, linux-mm,
Christoph Lameter, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 01:46:34PM -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>
> > We should cull something, just merging more and more of them is useless
> > and wastes everybody's time since you have to add features and
> > interfaces to all of them.
>
> I agree, but it's difficult to get widespread testing or development
> interest in an allocator that is sitting outside of mainline. I don't
> think any allocator could suddenly be merged as the kernel default, it
> seems like a prerequisite to go through the preliminary merging and
> development. The severe netperf TCP_RR regression that slub has compared
> to slab was never found before it became the default allocator, otherwise
> there would probably have been more effort into its development as well.
> Unfortunately, slub's design is such that it will probably never be able
> to nullify the partial slab thrashing enough, even with the percpu counter
> speedup that is now available because of Christoph's work, to make TCP_RR
> perform as well as slab.
OK. I threatened this over IRC, and I never make threats that I am not
prepared to carry out.
I therefore propose creating a staging area for memory allocators,
similar to the one for device drivers. Have it in place for allocators
both coming and going.
> > Then maybe we should toss SLUB? But then there's people who say SLUB is
> > better for them. Without forcing something to happen we'll be stuck with
> > multiple allocators forever.
>
> Slub is definitely superior in diagnostics and is a much simpler design
> than slab. I think it would be much easier to remove slub than slab,
> though, simply because there are no great slab performance degradations
> compared to slub. I think the best candidate for removal might be slob,
> however, because it hasn't been compared to slub and usage may not be as
> widespread as expected for such a special case allocator.
And yes, the real problem is that each allocator has its advocates.
I would actually not be all that worried about a proliferation of
allocators if they were automatically selected based on machine
configuration, expected workload, or some such. But the fact is
that while 5% is a life-or-death matter to benchmarkers, it is of no
consequence to the typical Linux user/workload.
The concern with simpler allocators is that making them competitive
across the board with SLAB will make them just as complex as SLAB is.
As long as CONFIG_EMBEDDED remains a euphemism for "don't use me", SLOB
will not see much use or testing outside of those people who care
passionately about memory footprint. SLQB probably doesn't make it into
mainline until either Nick gets done with his VFS scalability work or
someone else starts pushing it. Allocator proliferation continues as
long as allocators are perceived to be easy to write. And so on...
As for me, as long as SLAB is in the kernel and is default for some
of the machines I use for testing, I will continue reporting any bugs
I find in it. ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 22:23 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 22:23 UTC (permalink / raw)
To: David Rientjes
Cc: Peter Zijlstra, Matt Mackall, Pekka Enberg, linux-mm,
Christoph Lameter, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 01:46:34PM -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Peter Zijlstra wrote:
>
> > We should cull something, just merging more and more of them is useless
> > and wastes everybody's time since you have to add features and
> > interfaces to all of them.
>
> I agree, but it's difficult to get widespread testing or development
> interest in an allocator that is sitting outside of mainline. I don't
> think any allocator could suddenly be merged as the kernel default, it
> seems like a prerequisite to go through the preliminary merging and
> development. The severe netperf TCP_RR regression that slub has compared
> to slab was never found before it became the default allocator, otherwise
> there would probably have been more effort into its development as well.
> Unfortunately, slub's design is such that it will probably never be able
> to nullify the partial slab thrashing enough, even with the percpu counter
> speedup that is now available because of Christoph's work, to make TCP_RR
> perform as well as slab.
OK. I threatened this over IRC, and I never make threats that I am not
prepared to carry out.
I therefore propose creating a staging area for memory allocators,
similar to the one for device drivers. Have it in place for allocators
both coming and going.
> > Then maybe we should toss SLUB? But then there's people who say SLUB is
> > better for them. Without forcing something to happen we'll be stuck with
> > multiple allocators forever.
>
> Slub is definitely superior in diagnostics and is a much simpler design
> than slab. I think it would be much easier to remove slub than slab,
> though, simply because there are no great slab performance degradations
> compared to slub. I think the best candidate for removal might be slob,
> however, because it hasn't been compared to slub and usage may not be as
> widespread as expected for such a special case allocator.
And yes, the real problem is that each allocator has its advocates.
I would actually not be all that worried about a proliferation of
allocators if they were automatically selected based on machine
configuration, expected workload, or some such. But the fact is
that while 5% is a life-or-death matter to benchmarkers, it is of no
consequence to the typical Linux user/workload.
The concern with simpler allocators is that making them competitive
across the board with SLAB will make them just as complex as SLAB is.
As long as CONFIG_EMBEDDED remains a euphemism for "don't use me", SLOB
will not see much use or testing outside of those people who care
passionately about memory footprint. SLQB probably doesn't make it into
mainline until either Nick gets done with his VFS scalability work or
someone else starts pushing it. Allocator proliferation continues as
long as allocators are perceived to be easy to write. And so on...
As for me, as long as SLAB is in the kernel and is default for some
of the machines I use for testing, I will continue reporting any bugs
I find in it. ;-)
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 22:23 ` Paul E. McKenney
@ 2009-11-25 7:12 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-25 7:12 UTC (permalink / raw)
To: paulmck
Cc: David Rientjes, Peter Zijlstra, Matt Mackall, linux-mm,
Christoph Lameter, LKML, Nick Piggin
Paul E. McKenney kirjoitti:
> As for me, as long as SLAB is in the kernel and is default for some
> of the machines I use for testing, I will continue reporting any bugs
> I find in it. ;-)
Yes, thanks for doing that. As long as SLAB is in the tree, I'll do my
best to get them fixed.
Pekka
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-25 7:12 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-25 7:12 UTC (permalink / raw)
To: paulmck
Cc: David Rientjes, Peter Zijlstra, Matt Mackall, linux-mm,
Christoph Lameter, LKML, Nick Piggin
Paul E. McKenney kirjoitti:
> As for me, as long as SLAB is in the kernel and is default for some
> of the machines I use for testing, I will continue reporting any bugs
> I find in it. ;-)
Yes, thanks for doing that. As long as SLAB is in the tree, I'll do my
best to get them fixed.
Pekka
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:35 ` Peter Zijlstra
@ 2009-11-25 7:25 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-25 7:25 UTC (permalink / raw)
To: Peter Zijlstra
Cc: David Rientjes, Matt Mackall, paulmck, linux-mm,
Christoph Lameter, LKML, Nick Piggin
Peter Zijlstra kirjoitti:
> Then maybe we should toss SLUB? But then there's people who say SLUB is
> better for them. Without forcing something to happen we'll be stuck with
> multiple allocators forever.
SLUB is good for NUMA, SLAB is pretty much a disaster with it's alien
tentacles^Hcaches. AFAIK, SLQB hasn't received much NUMA attention so
it's not obvious whether or not it will be able to perform as well as
SLUB or not.
The biggest problem with SLUB is that most of the people (excluding
Christoph and myself) seem to think the design is unfixable for their
favorite workload so they prefer to either stay with SLAB or work on SLQB.
I really couldn't care less which allocator we end up with as long as
it's not SLAB. I do think putting more performance tuning effort into
SLUB would give best results because the allocator is pretty rock solid
at this point. People seem underestimate the total effort needed to make
a slab allocator good enough for the general public (which is why I
think SLQB still has a long way to go).
Pekka
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-25 7:25 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-25 7:25 UTC (permalink / raw)
To: Peter Zijlstra
Cc: David Rientjes, Matt Mackall, paulmck, linux-mm,
Christoph Lameter, LKML, Nick Piggin
Peter Zijlstra kirjoitti:
> Then maybe we should toss SLUB? But then there's people who say SLUB is
> better for them. Without forcing something to happen we'll be stuck with
> multiple allocators forever.
SLUB is good for NUMA, SLAB is pretty much a disaster with it's alien
tentacles^Hcaches. AFAIK, SLQB hasn't received much NUMA attention so
it's not obvious whether or not it will be able to perform as well as
SLUB or not.
The biggest problem with SLUB is that most of the people (excluding
Christoph and myself) seem to think the design is unfixable for their
favorite workload so they prefer to either stay with SLAB or work on SLQB.
I really couldn't care less which allocator we end up with as long as
it's not SLAB. I do think putting more performance tuning effort into
SLUB would give best results because the allocator is pretty rock solid
at this point. People seem underestimate the total effort needed to make
a slab allocator good enough for the general public (which is why I
think SLQB still has a long way to go).
Pekka
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-25 7:25 ` Pekka Enberg
@ 2009-11-27 17:22 ` Christoph Lameter
-1 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-27 17:22 UTC (permalink / raw)
To: Pekka Enberg
Cc: Peter Zijlstra, David Rientjes, Matt Mackall, paulmck, linux-mm,
LKML, Nick Piggin
On Wed, 25 Nov 2009, Pekka Enberg wrote:
> SLUB is good for NUMA, SLAB is pretty much a disaster with it's alien
> tentacles^Hcaches. AFAIK, SLQB hasn't received much NUMA attention so it's not
> obvious whether or not it will be able to perform as well as SLUB or not.
>
> The biggest problem with SLUB is that most of the people (excluding Christoph
> and myself) seem to think the design is unfixable for their favorite workload
> so they prefer to either stay with SLAB or work on SLQB.
The current design of each has its own strength and its weaknesses. A
queued design is not good for HPC and financial apps since it requires
periodic queue cleaning (therefore disturbing a latency critical
application path). Queue processing can go out of hand if there are
many different types of memory (SLAB in NUMA configurations). So a
queueless allocator design is good for some configurations. It is also
beneficial if the allocator must be frugal with memory allocations.
There is not much difference for most workloads in terms of memory
consumption between SLOB and SLUB.
> I really couldn't care less which allocator we end up with as long as it's not
> SLAB. I do think putting more performance tuning effort into SLUB would give
> best results because the allocator is pretty rock solid at this point. People
> seem underestimate the total effort needed to make a slab allocator good
> enough for the general public (which is why I think SLQB still has a long way
> to go).
There are still patches queued here for SLUB that depend on other per cpu
work to be merged in .33. These do not address the caching issues that
people focus on for networking and enterprise apps but they decrease the
minimum latency important for HPC and financial apps. The SLUB fastpath is
the lowest latency allocation path that exists.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-27 17:22 ` Christoph Lameter
0 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-27 17:22 UTC (permalink / raw)
To: Pekka Enberg
Cc: Peter Zijlstra, David Rientjes, Matt Mackall, paulmck, linux-mm,
LKML, Nick Piggin
On Wed, 25 Nov 2009, Pekka Enberg wrote:
> SLUB is good for NUMA, SLAB is pretty much a disaster with it's alien
> tentacles^Hcaches. AFAIK, SLQB hasn't received much NUMA attention so it's not
> obvious whether or not it will be able to perform as well as SLUB or not.
>
> The biggest problem with SLUB is that most of the people (excluding Christoph
> and myself) seem to think the design is unfixable for their favorite workload
> so they prefer to either stay with SLAB or work on SLQB.
The current design of each has its own strength and its weaknesses. A
queued design is not good for HPC and financial apps since it requires
periodic queue cleaning (therefore disturbing a latency critical
application path). Queue processing can go out of hand if there are
many different types of memory (SLAB in NUMA configurations). So a
queueless allocator design is good for some configurations. It is also
beneficial if the allocator must be frugal with memory allocations.
There is not much difference for most workloads in terms of memory
consumption between SLOB and SLUB.
> I really couldn't care less which allocator we end up with as long as it's not
> SLAB. I do think putting more performance tuning effort into SLUB would give
> best results because the allocator is pretty rock solid at this point. People
> seem underestimate the total effort needed to make a slab allocator good
> enough for the general public (which is why I think SLQB still has a long way
> to go).
There are still patches queued here for SLUB that depend on other per cpu
work to be merged in .33. These do not address the caching issues that
people focus on for networking and enterprise apps but they decrease the
minimum latency important for HPC and financial apps. The SLUB fastpath is
the lowest latency allocation path that exists.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:12 ` Peter Zijlstra
@ 2009-11-24 21:48 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 21:48 UTC (permalink / raw)
To: Peter Zijlstra
Cc: David Rientjes, Matt Mackall, Pekka Enberg, linux-mm,
Christoph Lameter, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 10:12:30PM +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 13:03 -0800, David Rientjes wrote:
> > On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> >
> > > Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
> > >
> >
> > slqb still has a 5-10% performance regression compared to slab for
> > benchmarks such as netperf TCP_RR on machines with high cpu counts,
> > forcing that type of regression isn't acceptable.
>
> Having _4_ slab allocators is equally unacceptable.
I completely agree. We need at least ten. ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:48 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 21:48 UTC (permalink / raw)
To: Peter Zijlstra
Cc: David Rientjes, Matt Mackall, Pekka Enberg, linux-mm,
Christoph Lameter, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 10:12:30PM +0100, Peter Zijlstra wrote:
> On Tue, 2009-11-24 at 13:03 -0800, David Rientjes wrote:
> > On Tue, 24 Nov 2009, Peter Zijlstra wrote:
> >
> > > Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
> > >
> >
> > slqb still has a 5-10% performance regression compared to slab for
> > benchmarks such as netperf TCP_RR on machines with high cpu counts,
> > forcing that type of regression isn't acceptable.
>
> Having _4_ slab allocators is equally unacceptable.
I completely agree. We need at least ten. ;-)
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:01 ` Peter Zijlstra
@ 2009-11-24 21:16 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 21:16 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, linux-mm, cl, LKML, Nick Piggin,
David Rientjes
On Tue, Nov 24, 2009 at 11:01 PM, Peter Zijlstra <peterz@infradead.org> wrote:
>> If there's a proposal here, it's not clear what it is.
>
> Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
>
> As long as people have a choice they'll not even try new stuff and if
> they do they'll change to the old one as soon as they find an issue, not
> even bothering to report, let alone expend effort fixing it.
Oh, no, SLQB is by no means stable enough for the general public. And
it doesn't even have all the functionality SLAB and SLUB does (cpusets
come to mind).
If people want to really help us getting out of this mess, please take
a stab at fixing any of the outstanding performance regressions for
either SLQB or SLUB. David's a great source if you're interested in
knowing where to look. The only big regression for SLUB is the Intel
TPC benchmark thingy that nobody (except Intel folks) really has
access to. SLQB doesn't suffer from that because Nick had some
performance testing help from Intel IIRC.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:16 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 21:16 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Matt Mackall, paulmck, linux-mm, cl, LKML, Nick Piggin,
David Rientjes
On Tue, Nov 24, 2009 at 11:01 PM, Peter Zijlstra <peterz@infradead.org> wrote:
>> If there's a proposal here, it's not clear what it is.
>
> Merge SLQB and rm mm/sl[ua]b.c include/linux/sl[ua]b.h for .33-rc1
>
> As long as people have a choice they'll not even try new stuff and if
> they do they'll change to the old one as soon as they find an issue, not
> even bothering to report, let alone expend effort fixing it.
Oh, no, SLQB is by no means stable enough for the general public. And
it doesn't even have all the functionality SLAB and SLUB does (cpusets
come to mind).
If people want to really help us getting out of this mess, please take
a stab at fixing any of the outstanding performance regressions for
either SLQB or SLUB. David's a great source if you're interested in
knowing where to look. The only big regression for SLUB is the Intel
TPC benchmark thingy that nobody (except Intel folks) really has
access to. SLQB doesn't suffer from that because Nick had some
performance testing help from Intel IIRC.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 19:23 ` Matt Mackall
@ 2009-11-24 21:07 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 21:07 UTC (permalink / raw)
To: Matt Mackall; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 9:23 PM, Matt Mackall <mpm@selenic.com> wrote:
> On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
>> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
>> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
>> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
>> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
>> > > > > Matt Mackall wrote:
>> > > > > > This seems like a lot of work to paper over a lockdep false positive in
>> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
>> > > > > > rather the fix or papering over happen in lockdep.
>> > > > >
>> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
>> > > > > state is pretty invasive because of the kmem_cache_free() call in
>> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
>> > > > > which makes spin_lock_nested() very inconvenient.
>> > > >
>> > > > I'm perfectly fine with letting the thing be as it is, its apparently
>> > > > not something that triggers very often, and since slab will be killed
>> > > > off soon, who cares.
>> > >
>> > > Which of the alternatives to slab should I be testing with, then?
>> >
>> > I'm guessing your system is in the minority that has more than $10 worth
>> > of RAM, which means you should probably be evaluating SLUB.
>>
>> Well, I was rather hoping that'd die too ;-)
>>
>> Weren't we going to go with SLQB?
>
> News to me. Perhaps it was discussed at KS.
Yes, we discussed this at KS. The plan was to merge SLQB to mainline
so people can test it more easily but unfortunately it hasn't gotten
any loving from Nick recently which makes me think it's going to miss
the merge window for .33 as well.
> My understanding of the current state of play is:
>
> SLUB: default allocator
> SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> performance regressions
> SLOB: useful for low-end (but high-volume!) embedded
> SLQB: sitting in slab.git#for-next for months, has some ground to cover
>
> SLQB and SLUB have pretty similar target audiences, so I agree we should
> eventually have only one of them. But I strongly expect performance
> results to be mixed, just as they have been comparing SLUB/SLAB.
> Similarly, SLQB still has of room for tuning left compared to SLUB, as
> SLUB did compared to SLAB when it first emerged. It might be a while
> before a clear winner emerges.
Yeah, something like that. I don't think we were really able to decide
anything at the KS. IIRC Christoph was in favor of having multiple
slab allocators in the tree whereas I, for example, would rather have
only one. The SLOB allocator is bit special here because it's for
embedded. However, I also talked to some embedded folks at the summit
and none of them were using SLOB because the gains weren't big enough.
So I don't know if it's being used that widely.
I personally was hoping for SLUB or SLQB to emerge as a clear winner
so we could delete the rest but that hasn't really happened.
Pekka
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:07 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 21:07 UTC (permalink / raw)
To: Matt Mackall; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 9:23 PM, Matt Mackall <mpm@selenic.com> wrote:
> On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
>> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
>> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
>> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
>> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
>> > > > > Matt Mackall wrote:
>> > > > > > This seems like a lot of work to paper over a lockdep false positive in
>> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
>> > > > > > rather the fix or papering over happen in lockdep.
>> > > > >
>> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
>> > > > > state is pretty invasive because of the kmem_cache_free() call in
>> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
>> > > > > which makes spin_lock_nested() very inconvenient.
>> > > >
>> > > > I'm perfectly fine with letting the thing be as it is, its apparently
>> > > > not something that triggers very often, and since slab will be killed
>> > > > off soon, who cares.
>> > >
>> > > Which of the alternatives to slab should I be testing with, then?
>> >
>> > I'm guessing your system is in the minority that has more than $10 worth
>> > of RAM, which means you should probably be evaluating SLUB.
>>
>> Well, I was rather hoping that'd die too ;-)
>>
>> Weren't we going to go with SLQB?
>
> News to me. Perhaps it was discussed at KS.
Yes, we discussed this at KS. The plan was to merge SLQB to mainline
so people can test it more easily but unfortunately it hasn't gotten
any loving from Nick recently which makes me think it's going to miss
the merge window for .33 as well.
> My understanding of the current state of play is:
>
> SLUB: default allocator
> SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> performance regressions
> SLOB: useful for low-end (but high-volume!) embedded
> SLQB: sitting in slab.git#for-next for months, has some ground to cover
>
> SLQB and SLUB have pretty similar target audiences, so I agree we should
> eventually have only one of them. But I strongly expect performance
> results to be mixed, just as they have been comparing SLUB/SLAB.
> Similarly, SLQB still has of room for tuning left compared to SLUB, as
> SLUB did compared to SLAB when it first emerged. It might be a while
> before a clear winner emerges.
Yeah, something like that. I don't think we were really able to decide
anything at the KS. IIRC Christoph was in favor of having multiple
slab allocators in the tree whereas I, for example, would rather have
only one. The SLOB allocator is bit special here because it's for
embedded. However, I also talked to some embedded folks at the summit
and none of them were using SLOB because the gains weren't big enough.
So I don't know if it's being used that widely.
I personally was hoping for SLUB or SLQB to emerge as a clear winner
so we could delete the rest but that hasn't really happened.
Pekka
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
2009-11-24 21:07 ` Pekka Enberg
@ 2009-11-24 22:55 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 22:55 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 23:07 +0200, Pekka Enberg wrote:
> On Tue, Nov 24, 2009 at 9:23 PM, Matt Mackall <mpm@selenic.com> wrote:
> > On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
> >> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> >> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> >> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> >> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> >> > > > > Matt Mackall wrote:
> >> > > > > > This seems like a lot of work to paper over a lockdep false positive in
> >> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> >> > > > > > rather the fix or papering over happen in lockdep.
> >> > > > >
> >> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> >> > > > > state is pretty invasive because of the kmem_cache_free() call in
> >> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> >> > > > > which makes spin_lock_nested() very inconvenient.
> >> > > >
> >> > > > I'm perfectly fine with letting the thing be as it is, its apparently
> >> > > > not something that triggers very often, and since slab will be killed
> >> > > > off soon, who cares.
> >> > >
> >> > > Which of the alternatives to slab should I be testing with, then?
> >> >
> >> > I'm guessing your system is in the minority that has more than $10 worth
> >> > of RAM, which means you should probably be evaluating SLUB.
> >>
> >> Well, I was rather hoping that'd die too ;-)
> >>
> >> Weren't we going to go with SLQB?
> >
> > News to me. Perhaps it was discussed at KS.
>
> Yes, we discussed this at KS. The plan was to merge SLQB to mainline
> so people can test it more easily but unfortunately it hasn't gotten
> any loving from Nick recently which makes me think it's going to miss
> the merge window for .33 as well.
>
> > My understanding of the current state of play is:
> >
> > SLUB: default allocator
> > SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> > performance regressions
> > SLOB: useful for low-end (but high-volume!) embedded
> > SLQB: sitting in slab.git#for-next for months, has some ground to cover
> >
> > SLQB and SLUB have pretty similar target audiences, so I agree we should
> > eventually have only one of them. But I strongly expect performance
> > results to be mixed, just as they have been comparing SLUB/SLAB.
> > Similarly, SLQB still has of room for tuning left compared to SLUB, as
> > SLUB did compared to SLAB when it first emerged. It might be a while
> > before a clear winner emerges.
>
> Yeah, something like that. I don't think we were really able to decide
> anything at the KS. IIRC Christoph was in favor of having multiple
> slab allocators in the tree whereas I, for example, would rather have
> only one. The SLOB allocator is bit special here because it's for
> embedded. However, I also talked to some embedded folks at the summit
> and none of them were using SLOB because the gains weren't big enough.
> So I don't know if it's being used that widely.
I'm afraid I have only anecdotal reports from SLOB users, and embedded
folks are notorious for lack of feedback, but I only need a few people
to tell me they're shipping 100k units/mo to be confident that SLOB is
in use in millions of devices.
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 22:55 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-24 22:55 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, paulmck, linux-mm, cl, LKML, Nick Piggin
On Tue, 2009-11-24 at 23:07 +0200, Pekka Enberg wrote:
> On Tue, Nov 24, 2009 at 9:23 PM, Matt Mackall <mpm@selenic.com> wrote:
> > On Tue, 2009-11-24 at 19:14 +0100, Peter Zijlstra wrote:
> >> On Tue, 2009-11-24 at 11:12 -0600, Matt Mackall wrote:
> >> > On Tue, 2009-11-24 at 09:00 -0800, Paul E. McKenney wrote:
> >> > > On Tue, Nov 24, 2009 at 05:33:26PM +0100, Peter Zijlstra wrote:
> >> > > > On Mon, 2009-11-23 at 21:13 +0200, Pekka Enberg wrote:
> >> > > > > Matt Mackall wrote:
> >> > > > > > This seems like a lot of work to paper over a lockdep false positive in
> >> > > > > > code that should be firmly in the maintenance end of its lifecycle? I'd
> >> > > > > > rather the fix or papering over happen in lockdep.
> >> > > > >
> >> > > > > True that. Is __raw_spin_lock() out of question, Peter?-) Passing the
> >> > > > > state is pretty invasive because of the kmem_cache_free() call in
> >> > > > > slab_destroy(). We re-enter the slab allocator from the outer edges
> >> > > > > which makes spin_lock_nested() very inconvenient.
> >> > > >
> >> > > > I'm perfectly fine with letting the thing be as it is, its apparently
> >> > > > not something that triggers very often, and since slab will be killed
> >> > > > off soon, who cares.
> >> > >
> >> > > Which of the alternatives to slab should I be testing with, then?
> >> >
> >> > I'm guessing your system is in the minority that has more than $10 worth
> >> > of RAM, which means you should probably be evaluating SLUB.
> >>
> >> Well, I was rather hoping that'd die too ;-)
> >>
> >> Weren't we going to go with SLQB?
> >
> > News to me. Perhaps it was discussed at KS.
>
> Yes, we discussed this at KS. The plan was to merge SLQB to mainline
> so people can test it more easily but unfortunately it hasn't gotten
> any loving from Nick recently which makes me think it's going to miss
> the merge window for .33 as well.
>
> > My understanding of the current state of play is:
> >
> > SLUB: default allocator
> > SLAB: deep maintenance, will be removed if SLUB ever covers remaining
> > performance regressions
> > SLOB: useful for low-end (but high-volume!) embedded
> > SLQB: sitting in slab.git#for-next for months, has some ground to cover
> >
> > SLQB and SLUB have pretty similar target audiences, so I agree we should
> > eventually have only one of them. But I strongly expect performance
> > results to be mixed, just as they have been comparing SLUB/SLAB.
> > Similarly, SLQB still has of room for tuning left compared to SLUB, as
> > SLUB did compared to SLAB when it first emerged. It might be a while
> > before a clear winner emerges.
>
> Yeah, something like that. I don't think we were really able to decide
> anything at the KS. IIRC Christoph was in favor of having multiple
> slab allocators in the tree whereas I, for example, would rather have
> only one. The SLOB allocator is bit special here because it's for
> embedded. However, I also talked to some embedded folks at the summit
> and none of them were using SLOB because the gains weren't big enough.
> So I don't know if it's being used that widely.
I'm afraid I have only anecdotal reports from SLOB users, and embedded
folks are notorious for lack of feedback, but I only need a few people
to tell me they're shipping 100k units/mo to be confident that SLOB is
in use in millions of devices.
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 22:55 ` Matt Mackall
@ 2009-11-25 21:59 ` David Rientjes
-1 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-25 21:59 UTC (permalink / raw)
To: Matt Mackall
Cc: Pekka Enberg, Peter Zijlstra, Paul E. McKenney, linux-mm,
Christoph Lameter, linux-kernel, Nick Piggin
On Tue, 24 Nov 2009, Matt Mackall wrote:
> I'm afraid I have only anecdotal reports from SLOB users, and embedded
> folks are notorious for lack of feedback, but I only need a few people
> to tell me they're shipping 100k units/mo to be confident that SLOB is
> in use in millions of devices.
>
It's much more popular than I had expected; do you think it would be
possible to merge slob's core into another allocator or will it require
seperation forever?
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-25 21:59 ` David Rientjes
0 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-25 21:59 UTC (permalink / raw)
To: Matt Mackall
Cc: Pekka Enberg, Peter Zijlstra, Paul E. McKenney, linux-mm,
Christoph Lameter, linux-kernel, Nick Piggin
On Tue, 24 Nov 2009, Matt Mackall wrote:
> I'm afraid I have only anecdotal reports from SLOB users, and embedded
> folks are notorious for lack of feedback, but I only need a few people
> to tell me they're shipping 100k units/mo to be confident that SLOB is
> in use in millions of devices.
>
It's much more popular than I had expected; do you think it would be
possible to merge slob's core into another allocator or will it require
seperation forever?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-25 21:59 ` David Rientjes
@ 2009-11-25 23:06 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-25 23:06 UTC (permalink / raw)
To: David Rientjes
Cc: Pekka Enberg, Peter Zijlstra, Paul E. McKenney, linux-mm,
Christoph Lameter, linux-kernel, Nick Piggin
On Wed, 2009-11-25 at 13:59 -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Matt Mackall wrote:
>
> > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > folks are notorious for lack of feedback, but I only need a few people
> > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > in use in millions of devices.
> >
>
> It's much more popular than I had expected; do you think it would be
> possible to merge slob's core into another allocator or will it require
> seperation forever?
Probably not. It's actually a completely different kind of allocator
than the rest as it doesn't actually use "slabs" at all. It's instead a
slab-like interface on a traditional heap allocator. SLAB/SLUB/SLQB have
much more in common - their biggest differences are about their approach
to scalability/locking issues.
On the upside, SLOB is easily the simplest of the bunch.
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-25 23:06 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-25 23:06 UTC (permalink / raw)
To: David Rientjes
Cc: Pekka Enberg, Peter Zijlstra, Paul E. McKenney, linux-mm,
Christoph Lameter, linux-kernel, Nick Piggin
On Wed, 2009-11-25 at 13:59 -0800, David Rientjes wrote:
> On Tue, 24 Nov 2009, Matt Mackall wrote:
>
> > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > folks are notorious for lack of feedback, but I only need a few people
> > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > in use in millions of devices.
> >
>
> It's much more popular than I had expected; do you think it would be
> possible to merge slob's core into another allocator or will it require
> seperation forever?
Probably not. It's actually a completely different kind of allocator
than the rest as it doesn't actually use "slabs" at all. It's instead a
slab-like interface on a traditional heap allocator. SLAB/SLUB/SLQB have
much more in common - their biggest differences are about their approach
to scalability/locking issues.
On the upside, SLOB is easily the simplest of the bunch.
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-25 21:59 ` David Rientjes
@ 2009-11-27 17:28 ` Christoph Lameter
-1 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-27 17:28 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Wed, 25 Nov 2009, David Rientjes wrote:
> On Tue, 24 Nov 2009, Matt Mackall wrote:
>
> > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > folks are notorious for lack of feedback, but I only need a few people
> > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > in use in millions of devices.
> >
>
> It's much more popular than I had expected; do you think it would be
> possible to merge slob's core into another allocator or will it require
> seperation forever?
It would be possible to create a slab-common.c and isolate common handling
of all allocators. SLUB and SLQB share quite a lot of code and SLAB could
be cleaned up and made to fit into such a framework.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-27 17:28 ` Christoph Lameter
0 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-27 17:28 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Wed, 25 Nov 2009, David Rientjes wrote:
> On Tue, 24 Nov 2009, Matt Mackall wrote:
>
> > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > folks are notorious for lack of feedback, but I only need a few people
> > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > in use in millions of devices.
> >
>
> It's much more popular than I had expected; do you think it would be
> possible to merge slob's core into another allocator or will it require
> seperation forever?
It would be possible to create a slab-common.c and isolate common handling
of all allocators. SLUB and SLQB share quite a lot of code and SLAB could
be cleaned up and made to fit into such a framework.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-27 17:28 ` Christoph Lameter
@ 2009-11-30 23:14 ` David Rientjes
-1 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-30 23:14 UTC (permalink / raw)
To: Christoph Lameter
Cc: Matt Mackall, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Fri, 27 Nov 2009, Christoph Lameter wrote:
> > > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > > folks are notorious for lack of feedback, but I only need a few people
> > > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > > in use in millions of devices.
> > >
> >
> > It's much more popular than I had expected; do you think it would be
> > possible to merge slob's core into another allocator or will it require
> > seperation forever?
>
> It would be possible to create a slab-common.c and isolate common handling
> of all allocators. SLUB and SLQB share quite a lot of code and SLAB could
> be cleaned up and made to fit into such a framework.
>
Right, but the user is still left with a decision of which slab allocator
to compile into their kernel, each with distinct advantages and
disadvantages that get exploited for the wide range of workloads that it
runs. If slob could be merged into another allocator, it would be simple
to remove the distinction of it being seperate altogether, the differences
would depend on CONFIG_EMBEDDED instead.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-30 23:14 ` David Rientjes
0 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-11-30 23:14 UTC (permalink / raw)
To: Christoph Lameter
Cc: Matt Mackall, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Fri, 27 Nov 2009, Christoph Lameter wrote:
> > > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > > folks are notorious for lack of feedback, but I only need a few people
> > > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > > in use in millions of devices.
> > >
> >
> > It's much more popular than I had expected; do you think it would be
> > possible to merge slob's core into another allocator or will it require
> > seperation forever?
>
> It would be possible to create a slab-common.c and isolate common handling
> of all allocators. SLUB and SLQB share quite a lot of code and SLAB could
> be cleaned up and made to fit into such a framework.
>
Right, but the user is still left with a decision of which slab allocator
to compile into their kernel, each with distinct advantages and
disadvantages that get exploited for the wide range of workloads that it
runs. If slob could be merged into another allocator, it would be simple
to remove the distinction of it being seperate altogether, the differences
would depend on CONFIG_EMBEDDED instead.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-30 23:14 ` David Rientjes
@ 2009-12-01 0:21 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-12-01 0:21 UTC (permalink / raw)
To: David Rientjes
Cc: Christoph Lameter, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Mon, 2009-11-30 at 15:14 -0800, David Rientjes wrote:
> On Fri, 27 Nov 2009, Christoph Lameter wrote:
>
> > > > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > > > folks are notorious for lack of feedback, but I only need a few people
> > > > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > > > in use in millions of devices.
> > > >
> > >
> > > It's much more popular than I had expected; do you think it would be
> > > possible to merge slob's core into another allocator or will it require
> > > seperation forever?
> >
> > It would be possible to create a slab-common.c and isolate common handling
> > of all allocators. SLUB and SLQB share quite a lot of code and SLAB could
> > be cleaned up and made to fit into such a framework.
> >
>
> Right, but the user is still left with a decision of which slab allocator
> to compile into their kernel, each with distinct advantages and
> disadvantages that get exploited for the wide range of workloads that it
> runs. If slob could be merged into another allocator, it would be simple
> to remove the distinction of it being seperate altogether, the differences
> would depend on CONFIG_EMBEDDED instead.
No no no wrong wrong wrong. Again, SLOB is the least mergeable of the
set. It has vastly different priorities, design, and code from the rest.
Literally the only thing it has in common with the other three is the
interface.
And it's not even something that -most- of embedded devices will want to
use, so it can't be keyed off CONFIG_EMBEDDED anyway. If you've got even
16MB of memory, you probably want to use a SLAB-like allocator (ie not
SLOB). But there are -millions- of devices being shipped that don't have
that much memory, a situation that's likely to continue until you can
fit a larger Linux system entirely in a <$1 microcontroller-sized device
(probably 5 years off still).
This thread is annoying. The problem that triggered this thread is not
in SLOB/SLUB/SLQB, nor even in our bog-standard 10yo deep-maintenance
known-to-work SLAB code. The problem was a FALSE POSITIVE from lockdep
on code that PREDATES lockdep itself. There is nothing in this thread to
indicate that there is a serious problem maintaining multiple
allocators. In fact, considerably more time has been spent (as usual)
debating non-existent problems than fixing real ones.
I agree that having only one of SLAB/SLUB/SLQB would be nice, but it's
going to take a lot of heavy lifting in the form of hacking and
benchmarking to have confidence that there's a clear performance winner.
Given the multiple dimensions of performance
(scalability/throughput/latency for starters), I don't even think
there's good a priori reason to believe that a clear winner CAN exist.
SLUB may always have better latency, and SLQB may always have better
throughput. If you're NYSE, you might have different performance
priorities than if you're Google or CERN or Sony that amount to millions
of dollars. Repeatedly saying "but we should have only one allocator"
isn't going to change that.
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-12-01 0:21 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-12-01 0:21 UTC (permalink / raw)
To: David Rientjes
Cc: Christoph Lameter, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Mon, 2009-11-30 at 15:14 -0800, David Rientjes wrote:
> On Fri, 27 Nov 2009, Christoph Lameter wrote:
>
> > > > I'm afraid I have only anecdotal reports from SLOB users, and embedded
> > > > folks are notorious for lack of feedback, but I only need a few people
> > > > to tell me they're shipping 100k units/mo to be confident that SLOB is
> > > > in use in millions of devices.
> > > >
> > >
> > > It's much more popular than I had expected; do you think it would be
> > > possible to merge slob's core into another allocator or will it require
> > > seperation forever?
> >
> > It would be possible to create a slab-common.c and isolate common handling
> > of all allocators. SLUB and SLQB share quite a lot of code and SLAB could
> > be cleaned up and made to fit into such a framework.
> >
>
> Right, but the user is still left with a decision of which slab allocator
> to compile into their kernel, each with distinct advantages and
> disadvantages that get exploited for the wide range of workloads that it
> runs. If slob could be merged into another allocator, it would be simple
> to remove the distinction of it being seperate altogether, the differences
> would depend on CONFIG_EMBEDDED instead.
No no no wrong wrong wrong. Again, SLOB is the least mergeable of the
set. It has vastly different priorities, design, and code from the rest.
Literally the only thing it has in common with the other three is the
interface.
And it's not even something that -most- of embedded devices will want to
use, so it can't be keyed off CONFIG_EMBEDDED anyway. If you've got even
16MB of memory, you probably want to use a SLAB-like allocator (ie not
SLOB). But there are -millions- of devices being shipped that don't have
that much memory, a situation that's likely to continue until you can
fit a larger Linux system entirely in a <$1 microcontroller-sized device
(probably 5 years off still).
This thread is annoying. The problem that triggered this thread is not
in SLOB/SLUB/SLQB, nor even in our bog-standard 10yo deep-maintenance
known-to-work SLAB code. The problem was a FALSE POSITIVE from lockdep
on code that PREDATES lockdep itself. There is nothing in this thread to
indicate that there is a serious problem maintaining multiple
allocators. In fact, considerably more time has been spent (as usual)
debating non-existent problems than fixing real ones.
I agree that having only one of SLAB/SLUB/SLQB would be nice, but it's
going to take a lot of heavy lifting in the form of hacking and
benchmarking to have confidence that there's a clear performance winner.
Given the multiple dimensions of performance
(scalability/throughput/latency for starters), I don't even think
there's good a priori reason to believe that a clear winner CAN exist.
SLUB may always have better latency, and SLQB may always have better
throughput. If you're NYSE, you might have different performance
priorities than if you're Google or CERN or Sony that amount to millions
of dollars. Repeatedly saying "but we should have only one allocator"
isn't going to change that.
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-12-01 0:21 ` Matt Mackall
@ 2009-12-01 22:41 ` David Rientjes
-1 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-12-01 22:41 UTC (permalink / raw)
To: Matt Mackall
Cc: Christoph Lameter, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Mon, 30 Nov 2009, Matt Mackall wrote:
> And it's not even something that -most- of embedded devices will want to
> use, so it can't be keyed off CONFIG_EMBEDDED anyway. If you've got even
> 16MB of memory, you probably want to use a SLAB-like allocator (ie not
> SLOB). But there are -millions- of devices being shipped that don't have
> that much memory, a situation that's likely to continue until you can
> fit a larger Linux system entirely in a <$1 microcontroller-sized device
> (probably 5 years off still).
>
What qualifying criteria can we use to automatically select slob for a
kernel or the disqualifying criteria to automatically select slub as a
default, then? It currently depends on CONFIG_EMBEDDED, but it still
requires the user to specifically chose the allocator over another. Could
we base this decision on another config option enabled for systems with
less than 16MB?
> This thread is annoying. The problem that triggered this thread is not
> in SLOB/SLUB/SLQB, nor even in our bog-standard 10yo deep-maintenance
> known-to-work SLAB code. The problem was a FALSE POSITIVE from lockdep
> on code that PREDATES lockdep itself. There is nothing in this thread to
> indicate that there is a serious problem maintaining multiple
> allocators. In fact, considerably more time has been spent (as usual)
> debating non-existent problems than fixing real ones.
>
We could move the discussion on the long-term maintainable aspects of
multiple slab allocators to a new thread if you'd like.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-12-01 22:41 ` David Rientjes
0 siblings, 0 replies; 121+ messages in thread
From: David Rientjes @ 2009-12-01 22:41 UTC (permalink / raw)
To: Matt Mackall
Cc: Christoph Lameter, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Mon, 30 Nov 2009, Matt Mackall wrote:
> And it's not even something that -most- of embedded devices will want to
> use, so it can't be keyed off CONFIG_EMBEDDED anyway. If you've got even
> 16MB of memory, you probably want to use a SLAB-like allocator (ie not
> SLOB). But there are -millions- of devices being shipped that don't have
> that much memory, a situation that's likely to continue until you can
> fit a larger Linux system entirely in a <$1 microcontroller-sized device
> (probably 5 years off still).
>
What qualifying criteria can we use to automatically select slob for a
kernel or the disqualifying criteria to automatically select slub as a
default, then? It currently depends on CONFIG_EMBEDDED, but it still
requires the user to specifically chose the allocator over another. Could
we base this decision on another config option enabled for systems with
less than 16MB?
> This thread is annoying. The problem that triggered this thread is not
> in SLOB/SLUB/SLQB, nor even in our bog-standard 10yo deep-maintenance
> known-to-work SLAB code. The problem was a FALSE POSITIVE from lockdep
> on code that PREDATES lockdep itself. There is nothing in this thread to
> indicate that there is a serious problem maintaining multiple
> allocators. In fact, considerably more time has been spent (as usual)
> debating non-existent problems than fixing real ones.
>
We could move the discussion on the long-term maintainable aspects of
multiple slab allocators to a new thread if you'd like.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-30 23:14 ` David Rientjes
@ 2009-12-01 16:47 ` Christoph Lameter
-1 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-12-01 16:47 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Mon, 30 Nov 2009, David Rientjes wrote:
> Right, but the user is still left with a decision of which slab allocator
> to compile into their kernel, each with distinct advantages and
> disadvantages that get exploited for the wide range of workloads that it
> runs. If slob could be merged into another allocator, it would be simple
> to remove the distinction of it being seperate altogether, the differences
> would depend on CONFIG_EMBEDDED instead.
No embedded folks that I know are using SLOB. CONFIG_EMBEDDED still would
require a selection of allocators. I have no direct knowledge of anyone
using SLOB (despite traveling widely this year) aside from what Matt tells
me.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-12-01 16:47 ` Christoph Lameter
0 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-12-01 16:47 UTC (permalink / raw)
To: David Rientjes
Cc: Matt Mackall, Pekka Enberg, Peter Zijlstra, Paul E. McKenney,
linux-mm, linux-kernel, Nick Piggin
On Mon, 30 Nov 2009, David Rientjes wrote:
> Right, but the user is still left with a decision of which slab allocator
> to compile into their kernel, each with distinct advantages and
> disadvantages that get exploited for the wide range of workloads that it
> runs. If slob could be merged into another allocator, it would be simple
> to remove the distinction of it being seperate altogether, the differences
> would depend on CONFIG_EMBEDDED instead.
No embedded folks that I know are using SLOB. CONFIG_EMBEDDED still would
require a selection of allocators. I have no direct knowledge of anyone
using SLOB (despite traveling widely this year) aside from what Matt tells
me.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:07 ` Pekka Enberg
@ 2009-11-27 17:26 ` Christoph Lameter
-1 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-27 17:26 UTC (permalink / raw)
To: Pekka Enberg
Cc: Matt Mackall, Peter Zijlstra, paulmck, linux-mm, LKML,
Nick Piggin
On Tue, 24 Nov 2009, Pekka Enberg wrote:
> Yeah, something like that. I don't think we were really able to decide
> anything at the KS. IIRC Christoph was in favor of having multiple
> slab allocators in the tree whereas I, for example, would rather have
> only one. The SLOB allocator is bit special here because it's for
> embedded. However, I also talked to some embedded folks at the summit
> and none of them were using SLOB because the gains weren't big enough.
> So I don't know if it's being used that widely.
Are there any current numbers on SLOB memory advantage vs the other
allcoators?
> I personally was hoping for SLUB or SLQB to emerge as a clear winner
> so we could delete the rest but that hasn't really happened.
I think having multiple allocators makes for a heathly competition between
them and stabillizes the allocator API. Frankly I would like to see more
exchangable subsystems in the core. The scheduler seems to be not
competitive for my current workloads running on 2.6.22 (we have not tried
2.6.32 yet) and I have a lot of concerns about the continual performance
deteriorations in the page allocator and the reclaim logic due to feature
bloat.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-27 17:26 ` Christoph Lameter
0 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-27 17:26 UTC (permalink / raw)
To: Pekka Enberg
Cc: Matt Mackall, Peter Zijlstra, paulmck, linux-mm, LKML,
Nick Piggin
On Tue, 24 Nov 2009, Pekka Enberg wrote:
> Yeah, something like that. I don't think we were really able to decide
> anything at the KS. IIRC Christoph was in favor of having multiple
> slab allocators in the tree whereas I, for example, would rather have
> only one. The SLOB allocator is bit special here because it's for
> embedded. However, I also talked to some embedded folks at the summit
> and none of them were using SLOB because the gains weren't big enough.
> So I don't know if it's being used that widely.
Are there any current numbers on SLOB memory advantage vs the other
allcoators?
> I personally was hoping for SLUB or SLQB to emerge as a clear winner
> so we could delete the rest but that hasn't really happened.
I think having multiple allocators makes for a heathly competition between
them and stabillizes the allocator API. Frankly I would like to see more
exchangable subsystems in the core. The scheduler seems to be not
competitive for my current workloads running on 2.6.22 (we have not tried
2.6.32 yet) and I have a lot of concerns about the continual performance
deteriorations in the page allocator and the reclaim logic due to feature
bloat.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-23 19:00 ` Pekka Enberg
@ 2009-11-23 19:30 ` Christoph Lameter
-1 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-23 19:30 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, paulmck, linux-mm, mpm, LKML, Nick Piggin
On Mon, 23 Nov 2009, Pekka Enberg wrote:
> That turns out to be _very_ hard. How about something like the following
> untested patch which delays slab_destroy() while we're under nc->lock.
Code changes to deal with a diagnostic issue?
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-23 19:30 ` Christoph Lameter
0 siblings, 0 replies; 121+ messages in thread
From: Christoph Lameter @ 2009-11-23 19:30 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, paulmck, linux-mm, mpm, LKML, Nick Piggin
On Mon, 23 Nov 2009, Pekka Enberg wrote:
> That turns out to be _very_ hard. How about something like the following
> untested patch which delays slab_destroy() while we're under nc->lock.
Code changes to deal with a diagnostic issue?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-23 19:30 ` Christoph Lameter
@ 2009-11-23 19:43 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-23 19:43 UTC (permalink / raw)
To: Christoph Lameter
Cc: Pekka Enberg, Peter Zijlstra, linux-mm, mpm, LKML, Nick Piggin
On Mon, Nov 23, 2009 at 01:30:50PM -0600, Christoph Lameter wrote:
> On Mon, 23 Nov 2009, Pekka Enberg wrote:
>
> > That turns out to be _very_ hard. How about something like the following
> > untested patch which delays slab_destroy() while we're under nc->lock.
>
> Code changes to deal with a diagnostic issue?
Indeed! At least if we want the diagnostics to have any value, we do
need to avoid false alarms. Same reasoning as for gcc warnings, right?
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-23 19:43 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-23 19:43 UTC (permalink / raw)
To: Christoph Lameter
Cc: Pekka Enberg, Peter Zijlstra, linux-mm, mpm, LKML, Nick Piggin
On Mon, Nov 23, 2009 at 01:30:50PM -0600, Christoph Lameter wrote:
> On Mon, 23 Nov 2009, Pekka Enberg wrote:
>
> > That turns out to be _very_ hard. How about something like the following
> > untested patch which delays slab_destroy() while we're under nc->lock.
>
> Code changes to deal with a diagnostic issue?
Indeed! At least if we want the diagnostics to have any value, we do
need to avoid false alarms. Same reasoning as for gcc warnings, right?
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-23 19:30 ` Christoph Lameter
@ 2009-11-23 19:50 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-23 19:50 UTC (permalink / raw)
To: Christoph Lameter
Cc: Peter Zijlstra, paulmck, linux-mm, mpm, LKML, Nick Piggin
On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > That turns out to be _very_ hard. How about something like the following
> > untested patch which delays slab_destroy() while we're under nc->lock.
On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> Code changes to deal with a diagnostic issue?
OK, fair enough. If I suffer permanent brain damage from staring at the
SLAB code for too long, I hope you and Matt will chip in to pay for my
medication.
I think I was looking at the wrong thing here. The problem is in
cache_free_alien() so the comment in slab_destroy() isn't relevant.
Looking at init_lock_keys() we already do special lockdep annotations
but there's a catch (as explained in a comment on top of
on_slab_alc_key):
* We set lock class for alien array caches which are up during init.
* The lock annotation will be lost if all cpus of a node goes down and
* then comes back up during hotplug
Paul said he was running CPU hotplug so maybe that explains the problem?
Pekka
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-23 19:50 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-23 19:50 UTC (permalink / raw)
To: Christoph Lameter
Cc: Peter Zijlstra, paulmck, linux-mm, mpm, LKML, Nick Piggin
On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > That turns out to be _very_ hard. How about something like the following
> > untested patch which delays slab_destroy() while we're under nc->lock.
On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> Code changes to deal with a diagnostic issue?
OK, fair enough. If I suffer permanent brain damage from staring at the
SLAB code for too long, I hope you and Matt will chip in to pay for my
medication.
I think I was looking at the wrong thing here. The problem is in
cache_free_alien() so the comment in slab_destroy() isn't relevant.
Looking at init_lock_keys() we already do special lockdep annotations
but there's a catch (as explained in a comment on top of
on_slab_alc_key):
* We set lock class for alien array caches which are up during init.
* The lock annotation will be lost if all cpus of a node goes down and
* then comes back up during hotplug
Paul said he was running CPU hotplug so maybe that explains the problem?
Pekka
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-23 19:50 ` Pekka Enberg
@ 2009-11-23 20:01 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-23 20:01 UTC (permalink / raw)
To: Christoph Lameter
Cc: Peter Zijlstra, paulmck, linux-mm, mpm, LKML, Nick Piggin
On Mon, 2009-11-23 at 21:50 +0200, Pekka Enberg wrote:
> On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > > That turns out to be _very_ hard. How about something like the following
> > > untested patch which delays slab_destroy() while we're under nc->lock.
>
> On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> > Code changes to deal with a diagnostic issue?
>
> OK, fair enough. If I suffer permanent brain damage from staring at the
> SLAB code for too long, I hope you and Matt will chip in to pay for my
> medication.
>
> I think I was looking at the wrong thing here. The problem is in
> cache_free_alien() so the comment in slab_destroy() isn't relevant.
> Looking at init_lock_keys() we already do special lockdep annotations
> but there's a catch (as explained in a comment on top of
> on_slab_alc_key):
>
> * We set lock class for alien array caches which are up during init.
> * The lock annotation will be lost if all cpus of a node goes down and
> * then comes back up during hotplug
>
> Paul said he was running CPU hotplug so maybe that explains the problem?
Maybe something like this untested patch fixes the issue...
Pekka
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481..84de47e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
#define BAD_ALIEN_MAGIC 0x01020304ul
+/*
+ * chicken and egg problem: delay the per-cpu array allocation
+ * until the general caches are up.
+ */
+static enum {
+ NONE,
+ PARTIAL_AC,
+ PARTIAL_L3,
+ EARLY,
+ FULL
+} g_cpucache_up;
+
+/*
+ * used by boot code to determine if it can use slab based allocator
+ */
+int slab_is_available(void)
+{
+ return g_cpucache_up >= EARLY;
+}
+
#ifdef CONFIG_LOCKDEP
/*
@@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
static struct lock_class_key on_slab_l3_key;
static struct lock_class_key on_slab_alc_key;
-static inline void init_lock_keys(void)
-
+static void init_node_lock_keys(int q)
{
- int q;
struct cache_sizes *s = malloc_sizes;
- while (s->cs_size != ULONG_MAX) {
- for_each_node(q) {
- struct array_cache **alc;
- int r;
- struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
- if (!l3 || OFF_SLAB(s->cs_cachep))
- continue;
- lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
- alc = l3->alien;
- /*
- * FIXME: This check for BAD_ALIEN_MAGIC
- * should go away when common slab code is taught to
- * work even without alien caches.
- * Currently, non NUMA code returns BAD_ALIEN_MAGIC
- * for alloc_alien_cache,
- */
- if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
- continue;
- for_each_node(r) {
- if (alc[r])
- lockdep_set_class(&alc[r]->lock,
- &on_slab_alc_key);
- }
+ if (g_cpucache_up != FULL)
+ return;
+
+ for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
+ struct array_cache **alc;
+ struct kmem_list3 *l3;
+ int r;
+
+ l3 = s->cs_cachep->nodelists[q];
+ if (!l3 || OFF_SLAB(s->cs_cachep))
+ return;
+ lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
+ alc = l3->alien;
+ /*
+ * FIXME: This check for BAD_ALIEN_MAGIC
+ * should go away when common slab code is taught to
+ * work even without alien caches.
+ * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+ * for alloc_alien_cache,
+ */
+ if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+ return;
+ for_each_node(r) {
+ if (alc[r])
+ lockdep_set_class(&alc[r]->lock,
+ &on_slab_alc_key);
}
- s++;
}
}
+
+static inline void init_lock_keys(void)
+{
+ int node;
+
+ for_each_node(node)
+ init_node_lock_keys(node);
+}
#else
+static void init_node_lock_keys(int q)
+{
+}
+
static inline void init_lock_keys(void)
{
}
@@ -665,26 +697,6 @@ static inline void init_lock_keys(void)
static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
-/*
- * chicken and egg problem: delay the per-cpu array allocation
- * until the general caches are up.
- */
-static enum {
- NONE,
- PARTIAL_AC,
- PARTIAL_L3,
- EARLY,
- FULL
-} g_cpucache_up;
-
-/*
- * used by boot code to determine if it can use slab based allocator
- */
-int slab_is_available(void)
-{
- return g_cpucache_up >= EARLY;
-}
-
static DEFINE_PER_CPU(struct delayed_work, reap_work);
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu)
kfree(shared);
free_alien_cache(alien);
}
+ init_node_lock_keys(node);
+
return 0;
bad:
cpuup_canceled(cpu);
^ permalink raw reply related [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
@ 2009-11-23 20:01 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-23 20:01 UTC (permalink / raw)
To: Christoph Lameter
Cc: Peter Zijlstra, paulmck, linux-mm, mpm, LKML, Nick Piggin
On Mon, 2009-11-23 at 21:50 +0200, Pekka Enberg wrote:
> On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > > That turns out to be _very_ hard. How about something like the following
> > > untested patch which delays slab_destroy() while we're under nc->lock.
>
> On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> > Code changes to deal with a diagnostic issue?
>
> OK, fair enough. If I suffer permanent brain damage from staring at the
> SLAB code for too long, I hope you and Matt will chip in to pay for my
> medication.
>
> I think I was looking at the wrong thing here. The problem is in
> cache_free_alien() so the comment in slab_destroy() isn't relevant.
> Looking at init_lock_keys() we already do special lockdep annotations
> but there's a catch (as explained in a comment on top of
> on_slab_alc_key):
>
> * We set lock class for alien array caches which are up during init.
> * The lock annotation will be lost if all cpus of a node goes down and
> * then comes back up during hotplug
>
> Paul said he was running CPU hotplug so maybe that explains the problem?
Maybe something like this untested patch fixes the issue...
Pekka
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481..84de47e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
#define BAD_ALIEN_MAGIC 0x01020304ul
+/*
+ * chicken and egg problem: delay the per-cpu array allocation
+ * until the general caches are up.
+ */
+static enum {
+ NONE,
+ PARTIAL_AC,
+ PARTIAL_L3,
+ EARLY,
+ FULL
+} g_cpucache_up;
+
+/*
+ * used by boot code to determine if it can use slab based allocator
+ */
+int slab_is_available(void)
+{
+ return g_cpucache_up >= EARLY;
+}
+
#ifdef CONFIG_LOCKDEP
/*
@@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
static struct lock_class_key on_slab_l3_key;
static struct lock_class_key on_slab_alc_key;
-static inline void init_lock_keys(void)
-
+static void init_node_lock_keys(int q)
{
- int q;
struct cache_sizes *s = malloc_sizes;
- while (s->cs_size != ULONG_MAX) {
- for_each_node(q) {
- struct array_cache **alc;
- int r;
- struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
- if (!l3 || OFF_SLAB(s->cs_cachep))
- continue;
- lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
- alc = l3->alien;
- /*
- * FIXME: This check for BAD_ALIEN_MAGIC
- * should go away when common slab code is taught to
- * work even without alien caches.
- * Currently, non NUMA code returns BAD_ALIEN_MAGIC
- * for alloc_alien_cache,
- */
- if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
- continue;
- for_each_node(r) {
- if (alc[r])
- lockdep_set_class(&alc[r]->lock,
- &on_slab_alc_key);
- }
+ if (g_cpucache_up != FULL)
+ return;
+
+ for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
+ struct array_cache **alc;
+ struct kmem_list3 *l3;
+ int r;
+
+ l3 = s->cs_cachep->nodelists[q];
+ if (!l3 || OFF_SLAB(s->cs_cachep))
+ return;
+ lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
+ alc = l3->alien;
+ /*
+ * FIXME: This check for BAD_ALIEN_MAGIC
+ * should go away when common slab code is taught to
+ * work even without alien caches.
+ * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+ * for alloc_alien_cache,
+ */
+ if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+ return;
+ for_each_node(r) {
+ if (alc[r])
+ lockdep_set_class(&alc[r]->lock,
+ &on_slab_alc_key);
}
- s++;
}
}
+
+static inline void init_lock_keys(void)
+{
+ int node;
+
+ for_each_node(node)
+ init_node_lock_keys(node);
+}
#else
+static void init_node_lock_keys(int q)
+{
+}
+
static inline void init_lock_keys(void)
{
}
@@ -665,26 +697,6 @@ static inline void init_lock_keys(void)
static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
-/*
- * chicken and egg problem: delay the per-cpu array allocation
- * until the general caches are up.
- */
-static enum {
- NONE,
- PARTIAL_AC,
- PARTIAL_L3,
- EARLY,
- FULL
-} g_cpucache_up;
-
-/*
- * used by boot code to determine if it can use slab based allocator
- */
-int slab_is_available(void)
-{
- return g_cpucache_up >= EARLY;
-}
-
static DEFINE_PER_CPU(struct delayed_work, reap_work);
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu)
kfree(shared);
free_alien_cache(alien);
}
+ init_node_lock_keys(node);
+
return 0;
bad:
cpuup_canceled(cpu);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
2009-11-23 20:01 ` Pekka Enberg
@ 2009-11-23 20:57 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-23 20:57 UTC (permalink / raw)
To: Pekka Enberg
Cc: Christoph Lameter, Peter Zijlstra, linux-mm, mpm, LKML,
Nick Piggin
On Mon, Nov 23, 2009 at 10:01:15PM +0200, Pekka Enberg wrote:
> On Mon, 2009-11-23 at 21:50 +0200, Pekka Enberg wrote:
> > On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > > > That turns out to be _very_ hard. How about something like the following
> > > > untested patch which delays slab_destroy() while we're under nc->lock.
> >
> > On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> > > Code changes to deal with a diagnostic issue?
> >
> > OK, fair enough. If I suffer permanent brain damage from staring at the
> > SLAB code for too long, I hope you and Matt will chip in to pay for my
> > medication.
> >
> > I think I was looking at the wrong thing here. The problem is in
> > cache_free_alien() so the comment in slab_destroy() isn't relevant.
> > Looking at init_lock_keys() we already do special lockdep annotations
> > but there's a catch (as explained in a comment on top of
> > on_slab_alc_key):
> >
> > * We set lock class for alien array caches which are up during init.
> > * The lock annotation will be lost if all cpus of a node goes down and
> > * then comes back up during hotplug
> >
> > Paul said he was running CPU hotplug so maybe that explains the problem?
>
> Maybe something like this untested patch fixes the issue...
I will give it a go!
Thanx, Paul
> Pekka
>
> diff --git a/mm/slab.c b/mm/slab.c
> index 7dfa481..84de47e 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
>
> #define BAD_ALIEN_MAGIC 0x01020304ul
>
> +/*
> + * chicken and egg problem: delay the per-cpu array allocation
> + * until the general caches are up.
> + */
> +static enum {
> + NONE,
> + PARTIAL_AC,
> + PARTIAL_L3,
> + EARLY,
> + FULL
> +} g_cpucache_up;
> +
> +/*
> + * used by boot code to determine if it can use slab based allocator
> + */
> +int slab_is_available(void)
> +{
> + return g_cpucache_up >= EARLY;
> +}
> +
> #ifdef CONFIG_LOCKDEP
>
> /*
> @@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
> static struct lock_class_key on_slab_l3_key;
> static struct lock_class_key on_slab_alc_key;
>
> -static inline void init_lock_keys(void)
> -
> +static void init_node_lock_keys(int q)
> {
> - int q;
> struct cache_sizes *s = malloc_sizes;
>
> - while (s->cs_size != ULONG_MAX) {
> - for_each_node(q) {
> - struct array_cache **alc;
> - int r;
> - struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
> - if (!l3 || OFF_SLAB(s->cs_cachep))
> - continue;
> - lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
> - alc = l3->alien;
> - /*
> - * FIXME: This check for BAD_ALIEN_MAGIC
> - * should go away when common slab code is taught to
> - * work even without alien caches.
> - * Currently, non NUMA code returns BAD_ALIEN_MAGIC
> - * for alloc_alien_cache,
> - */
> - if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
> - continue;
> - for_each_node(r) {
> - if (alc[r])
> - lockdep_set_class(&alc[r]->lock,
> - &on_slab_alc_key);
> - }
> + if (g_cpucache_up != FULL)
> + return;
> +
> + for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
> + struct array_cache **alc;
> + struct kmem_list3 *l3;
> + int r;
> +
> + l3 = s->cs_cachep->nodelists[q];
> + if (!l3 || OFF_SLAB(s->cs_cachep))
> + return;
> + lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
> + alc = l3->alien;
> + /*
> + * FIXME: This check for BAD_ALIEN_MAGIC
> + * should go away when common slab code is taught to
> + * work even without alien caches.
> + * Currently, non NUMA code returns BAD_ALIEN_MAGIC
> + * for alloc_alien_cache,
> + */
> + if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
> + return;
> + for_each_node(r) {
> + if (alc[r])
> + lockdep_set_class(&alc[r]->lock,
> + &on_slab_alc_key);
> }
> - s++;
> }
> }
> +
> +static inline void init_lock_keys(void)
> +{
> + int node;
> +
> + for_each_node(node)
> + init_node_lock_keys(node);
> +}
> #else
> +static void init_node_lock_keys(int q)
> +{
> +}
> +
> static inline void init_lock_keys(void)
> {
> }
> @@ -665,26 +697,6 @@ static inline void init_lock_keys(void)
> static DEFINE_MUTEX(cache_chain_mutex);
> static struct list_head cache_chain;
>
> -/*
> - * chicken and egg problem: delay the per-cpu array allocation
> - * until the general caches are up.
> - */
> -static enum {
> - NONE,
> - PARTIAL_AC,
> - PARTIAL_L3,
> - EARLY,
> - FULL
> -} g_cpucache_up;
> -
> -/*
> - * used by boot code to determine if it can use slab based allocator
> - */
> -int slab_is_available(void)
> -{
> - return g_cpucache_up >= EARLY;
> -}
> -
> static DEFINE_PER_CPU(struct delayed_work, reap_work);
>
> static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
> @@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu)
> kfree(shared);
> free_alien_cache(alien);
> }
> + init_node_lock_keys(node);
> +
> return 0;
> bad:
> cpuup_canceled(cpu);
>
>
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
@ 2009-11-23 20:57 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-23 20:57 UTC (permalink / raw)
To: Pekka Enberg
Cc: Christoph Lameter, Peter Zijlstra, linux-mm, mpm, LKML,
Nick Piggin
On Mon, Nov 23, 2009 at 10:01:15PM +0200, Pekka Enberg wrote:
> On Mon, 2009-11-23 at 21:50 +0200, Pekka Enberg wrote:
> > On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > > > That turns out to be _very_ hard. How about something like the following
> > > > untested patch which delays slab_destroy() while we're under nc->lock.
> >
> > On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> > > Code changes to deal with a diagnostic issue?
> >
> > OK, fair enough. If I suffer permanent brain damage from staring at the
> > SLAB code for too long, I hope you and Matt will chip in to pay for my
> > medication.
> >
> > I think I was looking at the wrong thing here. The problem is in
> > cache_free_alien() so the comment in slab_destroy() isn't relevant.
> > Looking at init_lock_keys() we already do special lockdep annotations
> > but there's a catch (as explained in a comment on top of
> > on_slab_alc_key):
> >
> > * We set lock class for alien array caches which are up during init.
> > * The lock annotation will be lost if all cpus of a node goes down and
> > * then comes back up during hotplug
> >
> > Paul said he was running CPU hotplug so maybe that explains the problem?
>
> Maybe something like this untested patch fixes the issue...
I will give it a go!
Thanx, Paul
> Pekka
>
> diff --git a/mm/slab.c b/mm/slab.c
> index 7dfa481..84de47e 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
>
> #define BAD_ALIEN_MAGIC 0x01020304ul
>
> +/*
> + * chicken and egg problem: delay the per-cpu array allocation
> + * until the general caches are up.
> + */
> +static enum {
> + NONE,
> + PARTIAL_AC,
> + PARTIAL_L3,
> + EARLY,
> + FULL
> +} g_cpucache_up;
> +
> +/*
> + * used by boot code to determine if it can use slab based allocator
> + */
> +int slab_is_available(void)
> +{
> + return g_cpucache_up >= EARLY;
> +}
> +
> #ifdef CONFIG_LOCKDEP
>
> /*
> @@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
> static struct lock_class_key on_slab_l3_key;
> static struct lock_class_key on_slab_alc_key;
>
> -static inline void init_lock_keys(void)
> -
> +static void init_node_lock_keys(int q)
> {
> - int q;
> struct cache_sizes *s = malloc_sizes;
>
> - while (s->cs_size != ULONG_MAX) {
> - for_each_node(q) {
> - struct array_cache **alc;
> - int r;
> - struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
> - if (!l3 || OFF_SLAB(s->cs_cachep))
> - continue;
> - lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
> - alc = l3->alien;
> - /*
> - * FIXME: This check for BAD_ALIEN_MAGIC
> - * should go away when common slab code is taught to
> - * work even without alien caches.
> - * Currently, non NUMA code returns BAD_ALIEN_MAGIC
> - * for alloc_alien_cache,
> - */
> - if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
> - continue;
> - for_each_node(r) {
> - if (alc[r])
> - lockdep_set_class(&alc[r]->lock,
> - &on_slab_alc_key);
> - }
> + if (g_cpucache_up != FULL)
> + return;
> +
> + for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
> + struct array_cache **alc;
> + struct kmem_list3 *l3;
> + int r;
> +
> + l3 = s->cs_cachep->nodelists[q];
> + if (!l3 || OFF_SLAB(s->cs_cachep))
> + return;
> + lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
> + alc = l3->alien;
> + /*
> + * FIXME: This check for BAD_ALIEN_MAGIC
> + * should go away when common slab code is taught to
> + * work even without alien caches.
> + * Currently, non NUMA code returns BAD_ALIEN_MAGIC
> + * for alloc_alien_cache,
> + */
> + if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
> + return;
> + for_each_node(r) {
> + if (alc[r])
> + lockdep_set_class(&alc[r]->lock,
> + &on_slab_alc_key);
> }
> - s++;
> }
> }
> +
> +static inline void init_lock_keys(void)
> +{
> + int node;
> +
> + for_each_node(node)
> + init_node_lock_keys(node);
> +}
> #else
> +static void init_node_lock_keys(int q)
> +{
> +}
> +
> static inline void init_lock_keys(void)
> {
> }
> @@ -665,26 +697,6 @@ static inline void init_lock_keys(void)
> static DEFINE_MUTEX(cache_chain_mutex);
> static struct list_head cache_chain;
>
> -/*
> - * chicken and egg problem: delay the per-cpu array allocation
> - * until the general caches are up.
> - */
> -static enum {
> - NONE,
> - PARTIAL_AC,
> - PARTIAL_L3,
> - EARLY,
> - FULL
> -} g_cpucache_up;
> -
> -/*
> - * used by boot code to determine if it can use slab based allocator
> - */
> -int slab_is_available(void)
> -{
> - return g_cpucache_up >= EARLY;
> -}
> -
> static DEFINE_PER_CPU(struct delayed_work, reap_work);
>
> static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
> @@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu)
> kfree(shared);
> free_alien_cache(alien);
> }
> + init_node_lock_keys(node);
> +
> return 0;
> bad:
> cpuup_canceled(cpu);
>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-23 20:01 ` Pekka Enberg
@ 2009-11-23 21:01 ` Matt Mackall
-1 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-23 21:01 UTC (permalink / raw)
To: Pekka Enberg
Cc: Christoph Lameter, Peter Zijlstra, paulmck, linux-mm, LKML,
Nick Piggin
On Mon, 2009-11-23 at 22:01 +0200, Pekka Enberg wrote:
> On Mon, 2009-11-23 at 21:50 +0200, Pekka Enberg wrote:
> > On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > > > That turns out to be _very_ hard. How about something like the following
> > > > untested patch which delays slab_destroy() while we're under nc->lock.
> >
> > On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> > > Code changes to deal with a diagnostic issue?
> >
> > OK, fair enough. If I suffer permanent brain damage from staring at the
> > SLAB code for too long, I hope you and Matt will chip in to pay for my
> > medication.
You Europeans and your droll health care jokes.
> Maybe something like this untested patch fixes the issue...
This looks like a much better approach.
--
http://selenic.com : development and support for Mercurial and Linux
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-23 21:01 ` Matt Mackall
0 siblings, 0 replies; 121+ messages in thread
From: Matt Mackall @ 2009-11-23 21:01 UTC (permalink / raw)
To: Pekka Enberg
Cc: Christoph Lameter, Peter Zijlstra, paulmck, linux-mm, LKML,
Nick Piggin
On Mon, 2009-11-23 at 22:01 +0200, Pekka Enberg wrote:
> On Mon, 2009-11-23 at 21:50 +0200, Pekka Enberg wrote:
> > On Mon, 23 Nov 2009, Pekka Enberg wrote:
> > > > That turns out to be _very_ hard. How about something like the following
> > > > untested patch which delays slab_destroy() while we're under nc->lock.
> >
> > On Mon, 2009-11-23 at 13:30 -0600, Christoph Lameter wrote:
> > > Code changes to deal with a diagnostic issue?
> >
> > OK, fair enough. If I suffer permanent brain damage from staring at the
> > SLAB code for too long, I hope you and Matt will chip in to pay for my
> > medication.
You Europeans and your droll health care jokes.
> Maybe something like this untested patch fixes the issue...
This looks like a much better approach.
--
http://selenic.com : development and support for Mercurial and Linux
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-23 19:00 ` Pekka Enberg
@ 2009-11-24 16:23 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 16:23 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
> Hi Peter,
>
> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > > Uh, ok, so apparently I was right after all. There's a comment in
> > > free_block() above the slab_destroy() call that refers to the comment
> > > above alloc_slabmgmt() function definition which explains it all.
> > >
> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> > > we're allocating or freeing from. Where do we need to put the
> > > spin_lock_nested() annotation? Would it be enough to just use it in
> > > cache_free_alien() for alien->lock or do we need it in
> > > cache_flusharray() as well?
> >
> > You'd have to somehow push the nested state down from the
> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
>
> That turns out to be _very_ hard. How about something like the following
> untested patch which delays slab_destroy() while we're under nc->lock.
>
> Pekka
Preliminary tests look good! The test was a ten-hour rcutorture run on
an 8-CPU Power system with a half-second delay between randomly chosen
CPU-hotplug operations. No lockdep warnings. ;-)
Will keep hammering on it.
Thanx, Paul
> diff --git a/mm/slab.c b/mm/slab.c
> index 7dfa481..6f522e3 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -316,7 +316,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
> static int drain_freelist(struct kmem_cache *cache,
> struct kmem_list3 *l3, int tofree);
> static void free_block(struct kmem_cache *cachep, void **objpp, int len,
> - int node);
> + int node, struct list_head *to_destroy);
> static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
> static void cache_reap(struct work_struct *unused);
>
> @@ -1002,7 +1002,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
> }
>
> static void __drain_alien_cache(struct kmem_cache *cachep,
> - struct array_cache *ac, int node)
> + struct array_cache *ac, int node,
> + struct list_head *to_destroy)
> {
> struct kmem_list3 *rl3 = cachep->nodelists[node];
>
> @@ -1016,12 +1017,22 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
> if (rl3->shared)
> transfer_objects(rl3->shared, ac, ac->limit);
>
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, to_destroy);
> ac->avail = 0;
> spin_unlock(&rl3->list_lock);
> }
> }
>
> +static void slab_destroy(struct kmem_cache *, struct slab *);
> +
> +static void destroy_slabs(struct kmem_cache *cache, struct list_head *to_destroy)
> +{
> + struct slab *slab, *tmp;
> +
> + list_for_each_entry_safe(slab, tmp, to_destroy, list)
> + slab_destroy(cache, slab);
> +}
> +
> /*
> * Called from cache_reap() to regularly drain alien caches round robin.
> */
> @@ -1033,8 +1044,11 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
> struct array_cache *ac = l3->alien[node];
>
> if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
> - __drain_alien_cache(cachep, ac, node);
> + LIST_HEAD(to_destroy);
> +
> + __drain_alien_cache(cachep, ac, node, &to_destroy);
> spin_unlock_irq(&ac->lock);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1049,9 +1063,12 @@ static void drain_alien_cache(struct kmem_cache *cachep,
> for_each_online_node(i) {
> ac = alien[i];
> if (ac) {
> + LIST_HEAD(to_destroy);
> +
> spin_lock_irqsave(&ac->lock, flags);
> - __drain_alien_cache(cachep, ac, i);
> + __drain_alien_cache(cachep, ac, i, &to_destroy);
> spin_unlock_irqrestore(&ac->lock, flags);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1076,17 +1093,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
> l3 = cachep->nodelists[node];
> STATS_INC_NODEFREES(cachep);
> if (l3->alien && l3->alien[nodeid]) {
> + LIST_HEAD(to_destroy);
> +
> alien = l3->alien[nodeid];
> spin_lock(&alien->lock);
> if (unlikely(alien->avail == alien->limit)) {
> STATS_INC_ACOVERFLOW(cachep);
> - __drain_alien_cache(cachep, alien, nodeid);
> + __drain_alien_cache(cachep, alien, nodeid, &to_destroy);
> }
> alien->entry[alien->avail++] = objp;
> spin_unlock(&alien->lock);
> + destroy_slabs(cachep, &to_destroy);
> } else {
> spin_lock(&(cachep->nodelists[nodeid])->list_lock);
> - free_block(cachep, &objp, 1, nodeid);
> + free_block(cachep, &objp, 1, nodeid, NULL);
> spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
> }
> return 1;
> @@ -1118,7 +1138,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> /* Free limit for this kmem_list3 */
> l3->free_limit -= cachep->batchcount;
> if (nc)
> - free_block(cachep, nc->entry, nc->avail, node);
> + free_block(cachep, nc->entry, nc->avail, node, NULL);
>
> if (!cpus_empty(*mask)) {
> spin_unlock_irq(&l3->list_lock);
> @@ -1128,7 +1148,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> shared = l3->shared;
> if (shared) {
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
> l3->shared = NULL;
> }
>
> @@ -2402,7 +2422,7 @@ static void do_drain(void *arg)
> check_irq_off();
> ac = cpu_cache_get(cachep);
> spin_lock(&cachep->nodelists[node]->list_lock);
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, NULL);
> spin_unlock(&cachep->nodelists[node]->list_lock);
> ac->avail = 0;
> }
> @@ -3410,7 +3430,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
> * Caller needs to acquire correct kmem_list's list_lock
> */
> static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> - int node)
> + int node, struct list_head *to_destroy)
> {
> int i;
> struct kmem_list3 *l3;
> @@ -3439,7 +3459,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> * a different cache, refer to comments before
> * alloc_slabmgmt.
> */
> - slab_destroy(cachep, slabp);
> + if (to_destroy)
> + list_add(&slabp->list, to_destroy);
> + else
> + slab_destroy(cachep, slabp);
> } else {
> list_add(&slabp->list, &l3->slabs_free);
> }
> @@ -3479,7 +3502,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
> }
> }
>
> - free_block(cachep, ac->entry, batchcount, node);
> + free_block(cachep, ac->entry, batchcount, node, NULL);
> free_done:
> #if STATS
> {
> @@ -3822,7 +3845,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
>
> if (shared)
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
>
> l3->shared = new_shared;
> if (!l3->alien) {
> @@ -3925,7 +3948,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
> if (!ccold)
> continue;
> spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> - free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
> + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), NULL);
> spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> kfree(ccold);
> }
> @@ -4007,7 +4030,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
> tofree = force ? ac->avail : (ac->limit + 4) / 5;
> if (tofree > ac->avail)
> tofree = (ac->avail + 1) / 2;
> - free_block(cachep, ac->entry, tofree, node);
> + free_block(cachep, ac->entry, tofree, node, NULL);
> ac->avail -= tofree;
> memmove(ac->entry, &(ac->entry[tofree]),
> sizeof(void *) * ac->avail);
>
>
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
@ 2009-11-24 16:23 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 16:23 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
> Hi Peter,
>
> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > > Uh, ok, so apparently I was right after all. There's a comment in
> > > free_block() above the slab_destroy() call that refers to the comment
> > > above alloc_slabmgmt() function definition which explains it all.
> > >
> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> > > we're allocating or freeing from. Where do we need to put the
> > > spin_lock_nested() annotation? Would it be enough to just use it in
> > > cache_free_alien() for alien->lock or do we need it in
> > > cache_flusharray() as well?
> >
> > You'd have to somehow push the nested state down from the
> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
>
> That turns out to be _very_ hard. How about something like the following
> untested patch which delays slab_destroy() while we're under nc->lock.
>
> Pekka
Preliminary tests look good! The test was a ten-hour rcutorture run on
an 8-CPU Power system with a half-second delay between randomly chosen
CPU-hotplug operations. No lockdep warnings. ;-)
Will keep hammering on it.
Thanx, Paul
> diff --git a/mm/slab.c b/mm/slab.c
> index 7dfa481..6f522e3 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -316,7 +316,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
> static int drain_freelist(struct kmem_cache *cache,
> struct kmem_list3 *l3, int tofree);
> static void free_block(struct kmem_cache *cachep, void **objpp, int len,
> - int node);
> + int node, struct list_head *to_destroy);
> static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
> static void cache_reap(struct work_struct *unused);
>
> @@ -1002,7 +1002,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
> }
>
> static void __drain_alien_cache(struct kmem_cache *cachep,
> - struct array_cache *ac, int node)
> + struct array_cache *ac, int node,
> + struct list_head *to_destroy)
> {
> struct kmem_list3 *rl3 = cachep->nodelists[node];
>
> @@ -1016,12 +1017,22 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
> if (rl3->shared)
> transfer_objects(rl3->shared, ac, ac->limit);
>
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, to_destroy);
> ac->avail = 0;
> spin_unlock(&rl3->list_lock);
> }
> }
>
> +static void slab_destroy(struct kmem_cache *, struct slab *);
> +
> +static void destroy_slabs(struct kmem_cache *cache, struct list_head *to_destroy)
> +{
> + struct slab *slab, *tmp;
> +
> + list_for_each_entry_safe(slab, tmp, to_destroy, list)
> + slab_destroy(cache, slab);
> +}
> +
> /*
> * Called from cache_reap() to regularly drain alien caches round robin.
> */
> @@ -1033,8 +1044,11 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
> struct array_cache *ac = l3->alien[node];
>
> if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
> - __drain_alien_cache(cachep, ac, node);
> + LIST_HEAD(to_destroy);
> +
> + __drain_alien_cache(cachep, ac, node, &to_destroy);
> spin_unlock_irq(&ac->lock);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1049,9 +1063,12 @@ static void drain_alien_cache(struct kmem_cache *cachep,
> for_each_online_node(i) {
> ac = alien[i];
> if (ac) {
> + LIST_HEAD(to_destroy);
> +
> spin_lock_irqsave(&ac->lock, flags);
> - __drain_alien_cache(cachep, ac, i);
> + __drain_alien_cache(cachep, ac, i, &to_destroy);
> spin_unlock_irqrestore(&ac->lock, flags);
> + destroy_slabs(cachep, &to_destroy);
> }
> }
> }
> @@ -1076,17 +1093,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
> l3 = cachep->nodelists[node];
> STATS_INC_NODEFREES(cachep);
> if (l3->alien && l3->alien[nodeid]) {
> + LIST_HEAD(to_destroy);
> +
> alien = l3->alien[nodeid];
> spin_lock(&alien->lock);
> if (unlikely(alien->avail == alien->limit)) {
> STATS_INC_ACOVERFLOW(cachep);
> - __drain_alien_cache(cachep, alien, nodeid);
> + __drain_alien_cache(cachep, alien, nodeid, &to_destroy);
> }
> alien->entry[alien->avail++] = objp;
> spin_unlock(&alien->lock);
> + destroy_slabs(cachep, &to_destroy);
> } else {
> spin_lock(&(cachep->nodelists[nodeid])->list_lock);
> - free_block(cachep, &objp, 1, nodeid);
> + free_block(cachep, &objp, 1, nodeid, NULL);
> spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
> }
> return 1;
> @@ -1118,7 +1138,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> /* Free limit for this kmem_list3 */
> l3->free_limit -= cachep->batchcount;
> if (nc)
> - free_block(cachep, nc->entry, nc->avail, node);
> + free_block(cachep, nc->entry, nc->avail, node, NULL);
>
> if (!cpus_empty(*mask)) {
> spin_unlock_irq(&l3->list_lock);
> @@ -1128,7 +1148,7 @@ static void __cpuinit cpuup_canceled(long cpu)
> shared = l3->shared;
> if (shared) {
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
> l3->shared = NULL;
> }
>
> @@ -2402,7 +2422,7 @@ static void do_drain(void *arg)
> check_irq_off();
> ac = cpu_cache_get(cachep);
> spin_lock(&cachep->nodelists[node]->list_lock);
> - free_block(cachep, ac->entry, ac->avail, node);
> + free_block(cachep, ac->entry, ac->avail, node, NULL);
> spin_unlock(&cachep->nodelists[node]->list_lock);
> ac->avail = 0;
> }
> @@ -3410,7 +3430,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
> * Caller needs to acquire correct kmem_list's list_lock
> */
> static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> - int node)
> + int node, struct list_head *to_destroy)
> {
> int i;
> struct kmem_list3 *l3;
> @@ -3439,7 +3459,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> * a different cache, refer to comments before
> * alloc_slabmgmt.
> */
> - slab_destroy(cachep, slabp);
> + if (to_destroy)
> + list_add(&slabp->list, to_destroy);
> + else
> + slab_destroy(cachep, slabp);
> } else {
> list_add(&slabp->list, &l3->slabs_free);
> }
> @@ -3479,7 +3502,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
> }
> }
>
> - free_block(cachep, ac->entry, batchcount, node);
> + free_block(cachep, ac->entry, batchcount, node, NULL);
> free_done:
> #if STATS
> {
> @@ -3822,7 +3845,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
>
> if (shared)
> free_block(cachep, shared->entry,
> - shared->avail, node);
> + shared->avail, node, NULL);
>
> l3->shared = new_shared;
> if (!l3->alien) {
> @@ -3925,7 +3948,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
> if (!ccold)
> continue;
> spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> - free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
> + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), NULL);
> spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> kfree(ccold);
> }
> @@ -4007,7 +4030,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
> tofree = force ? ac->avail : (ac->limit + 4) / 5;
> if (tofree > ac->avail)
> tofree = (ac->avail + 1) / 2;
> - free_block(cachep, ac->entry, tofree, node);
> + free_block(cachep, ac->entry, tofree, node, NULL);
> ac->avail -= tofree;
> memmove(ac->entry, &(ac->entry[tofree]),
> sizeof(void *) * ac->avail);
>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread* Re: lockdep complaints in slab allocator
2009-11-24 16:23 ` Paul E. McKenney
@ 2009-11-24 20:59 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 20:59 UTC (permalink / raw)
To: paulmck; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 6:23 PM, Paul E. McKenney
<paulmck@linux.vnet.ibm.com> wrote:
> On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
>> Hi Peter,
>>
>> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
>> > > Uh, ok, so apparently I was right after all. There's a comment in
>> > > free_block() above the slab_destroy() call that refers to the comment
>> > > above alloc_slabmgmt() function definition which explains it all.
>> > >
>> > > Long story short: ->slab_cachep never points to the same kmalloc cache
>> > > we're allocating or freeing from. Where do we need to put the
>> > > spin_lock_nested() annotation? Would it be enough to just use it in
>> > > cache_free_alien() for alien->lock or do we need it in
>> > > cache_flusharray() as well?
>> >
>> > You'd have to somehow push the nested state down from the
>> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
>>
>> That turns out to be _very_ hard. How about something like the following
>> untested patch which delays slab_destroy() while we're under nc->lock.
>>
>> Pekka
>
> Preliminary tests look good! The test was a ten-hour rcutorture run on
> an 8-CPU Power system with a half-second delay between randomly chosen
> CPU-hotplug operations. No lockdep warnings. ;-)
>
> Will keep hammering on it.
Thanks! Please let me know when you're hammered it enough :-). Peter,
may I have your ACK or NAK on the patch, please?
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 20:59 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-24 20:59 UTC (permalink / raw)
To: paulmck; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 6:23 PM, Paul E. McKenney
<paulmck@linux.vnet.ibm.com> wrote:
> On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
>> Hi Peter,
>>
>> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
>> > > Uh, ok, so apparently I was right after all. There's a comment in
>> > > free_block() above the slab_destroy() call that refers to the comment
>> > > above alloc_slabmgmt() function definition which explains it all.
>> > >
>> > > Long story short: ->slab_cachep never points to the same kmalloc cache
>> > > we're allocating or freeing from. Where do we need to put the
>> > > spin_lock_nested() annotation? Would it be enough to just use it in
>> > > cache_free_alien() for alien->lock or do we need it in
>> > > cache_flusharray() as well?
>> >
>> > You'd have to somehow push the nested state down from the
>> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
>>
>> That turns out to be _very_ hard. How about something like the following
>> untested patch which delays slab_destroy() while we're under nc->lock.
>>
>> Pekka
>
> Preliminary tests look good! The test was a ten-hour rcutorture run on
> an 8-CPU Power system with a half-second delay between randomly chosen
> CPU-hotplug operations. No lockdep warnings. ;-)
>
> Will keep hammering on it.
Thanks! Please let me know when you're hammered it enough :-). Peter,
may I have your ACK or NAK on the patch, please?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 20:59 ` Pekka Enberg
@ 2009-11-24 21:26 ` Peter Zijlstra
-1 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:26 UTC (permalink / raw)
To: Pekka Enberg; +Cc: paulmck, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, 2009-11-24 at 22:59 +0200, Pekka Enberg wrote:
> Thanks! Please let me know when you're hammered it enough :-). Peter,
> may I have your ACK or NAK on the patch, please?
Well, I'm not going to NAK it, for I think it does clean up that
recursion crap a little, but it should have more merit that
side-stepping lockdep.
If you too feel it make SLAB ever so slightly more palatable then ACK,
otherwise I'm perfectly fine with letting SLAB bitrot.
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:26 ` Peter Zijlstra
0 siblings, 0 replies; 121+ messages in thread
From: Peter Zijlstra @ 2009-11-24 21:26 UTC (permalink / raw)
To: Pekka Enberg; +Cc: paulmck, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, 2009-11-24 at 22:59 +0200, Pekka Enberg wrote:
> Thanks! Please let me know when you're hammered it enough :-). Peter,
> may I have your ACK or NAK on the patch, please?
Well, I'm not going to NAK it, for I think it does clean up that
recursion crap a little, but it should have more merit that
side-stepping lockdep.
If you too feel it make SLAB ever so slightly more palatable then ACK,
otherwise I'm perfectly fine with letting SLAB bitrot.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:26 ` Peter Zijlstra
@ 2009-11-25 10:42 ` Pekka Enberg
-1 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-25 10:42 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: paulmck, linux-mm, cl, mpm, LKML, Nick Piggin
Peter Zijlstra kirjoitti:
> On Tue, 2009-11-24 at 22:59 +0200, Pekka Enberg wrote:
>
>> Thanks! Please let me know when you're hammered it enough :-). Peter,
>> may I have your ACK or NAK on the patch, please?
>
> Well, I'm not going to NAK it, for I think it does clean up that
> recursion crap a little, but it should have more merit that
> side-stepping lockdep.
>
> If you too feel it make SLAB ever so slightly more palatable then ACK,
> otherwise I'm perfectly fine with letting SLAB bitrot.
I'll take that as an ACK ;-) Thanks!
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-25 10:42 ` Pekka Enberg
0 siblings, 0 replies; 121+ messages in thread
From: Pekka Enberg @ 2009-11-25 10:42 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: paulmck, linux-mm, cl, mpm, LKML, Nick Piggin
Peter Zijlstra kirjoitti:
> On Tue, 2009-11-24 at 22:59 +0200, Pekka Enberg wrote:
>
>> Thanks! Please let me know when you're hammered it enough :-). Peter,
>> may I have your ACK or NAK on the patch, please?
>
> Well, I'm not going to NAK it, for I think it does clean up that
> recursion crap a little, but it should have more merit that
> side-stepping lockdep.
>
> If you too feel it make SLAB ever so slightly more palatable then ACK,
> otherwise I'm perfectly fine with letting SLAB bitrot.
I'll take that as an ACK ;-) Thanks!
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 20:59 ` Pekka Enberg
@ 2009-11-24 21:47 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 21:47 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 10:59:44PM +0200, Pekka Enberg wrote:
> On Tue, Nov 24, 2009 at 6:23 PM, Paul E. McKenney
> <paulmck@linux.vnet.ibm.com> wrote:
> > On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
> >> Hi Peter,
> >>
> >> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> >> > > Uh, ok, so apparently I was right after all. There's a comment in
> >> > > free_block() above the slab_destroy() call that refers to the comment
> >> > > above alloc_slabmgmt() function definition which explains it all.
> >> > >
> >> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> >> > > we're allocating or freeing from. Where do we need to put the
> >> > > spin_lock_nested() annotation? Would it be enough to just use it in
> >> > > cache_free_alien() for alien->lock or do we need it in
> >> > > cache_flusharray() as well?
> >> >
> >> > You'd have to somehow push the nested state down from the
> >> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
> >>
> >> That turns out to be _very_ hard. How about something like the following
> >> untested patch which delays slab_destroy() while we're under nc->lock.
> >>
> >> Pekka
> >
> > Preliminary tests look good! The test was a ten-hour rcutorture run on
> > an 8-CPU Power system with a half-second delay between randomly chosen
> > CPU-hotplug operations. No lockdep warnings. ;-)
> >
> > Will keep hammering on it.
>
> Thanks! Please let me know when you're hammered it enough :-). Peter,
> may I have your ACK or NAK on the patch, please?
I expect to hammer it over the USA Thanksgiving holiday Thu-Sun this week.
It is like this, Pekka: since I don't drink, it is instead your code
that is going to get hammered this weekend!
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-24 21:47 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-24 21:47 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 10:59:44PM +0200, Pekka Enberg wrote:
> On Tue, Nov 24, 2009 at 6:23 PM, Paul E. McKenney
> <paulmck@linux.vnet.ibm.com> wrote:
> > On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
> >> Hi Peter,
> >>
> >> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> >> > > Uh, ok, so apparently I was right after all. There's a comment in
> >> > > free_block() above the slab_destroy() call that refers to the comment
> >> > > above alloc_slabmgmt() function definition which explains it all.
> >> > >
> >> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> >> > > we're allocating or freeing from. Where do we need to put the
> >> > > spin_lock_nested() annotation? Would it be enough to just use it in
> >> > > cache_free_alien() for alien->lock or do we need it in
> >> > > cache_flusharray() as well?
> >> >
> >> > You'd have to somehow push the nested state down from the
> >> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
> >>
> >> That turns out to be _very_ hard. How about something like the following
> >> untested patch which delays slab_destroy() while we're under nc->lock.
> >>
> >> Pekka
> >
> > Preliminary tests look good! The test was a ten-hour rcutorture run on
> > an 8-CPU Power system with a half-second delay between randomly chosen
> > CPU-hotplug operations. No lockdep warnings. ;-)
> >
> > Will keep hammering on it.
>
> Thanks! Please let me know when you're hammered it enough :-). Peter,
> may I have your ACK or NAK on the patch, please?
I expect to hammer it over the USA Thanksgiving holiday Thu-Sun this week.
It is like this, Pekka: since I don't drink, it is instead your code
that is going to get hammered this weekend!
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
2009-11-24 21:47 ` Paul E. McKenney
@ 2009-11-30 16:18 ` Paul E. McKenney
-1 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-30 16:18 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 01:47:40PM -0800, Paul E. McKenney wrote:
> On Tue, Nov 24, 2009 at 10:59:44PM +0200, Pekka Enberg wrote:
> > On Tue, Nov 24, 2009 at 6:23 PM, Paul E. McKenney
> > <paulmck@linux.vnet.ibm.com> wrote:
> > > On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
> > >> Hi Peter,
> > >>
> > >> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > >> > > Uh, ok, so apparently I was right after all. There's a comment in
> > >> > > free_block() above the slab_destroy() call that refers to the comment
> > >> > > above alloc_slabmgmt() function definition which explains it all.
> > >> > >
> > >> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> > >> > > we're allocating or freeing from. Where do we need to put the
> > >> > > spin_lock_nested() annotation? Would it be enough to just use it in
> > >> > > cache_free_alien() for alien->lock or do we need it in
> > >> > > cache_flusharray() as well?
> > >> >
> > >> > You'd have to somehow push the nested state down from the
> > >> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
> > >>
> > >> That turns out to be _very_ hard. How about something like the following
> > >> untested patch which delays slab_destroy() while we're under nc->lock.
> > >>
> > >> Pekka
> > >
> > > Preliminary tests look good! The test was a ten-hour rcutorture run on
> > > an 8-CPU Power system with a half-second delay between randomly chosen
> > > CPU-hotplug operations. No lockdep warnings. ;-)
> > >
> > > Will keep hammering on it.
> >
> > Thanks! Please let me know when you're hammered it enough :-). Peter,
> > may I have your ACK or NAK on the patch, please?
>
> I expect to hammer it over the USA Thanksgiving holiday Thu-Sun this week.
> It is like this, Pekka: since I don't drink, it is instead your code
> that is going to get hammered this weekend!
And the runs with your patch were free from lockdep complaints, while
those runs lacking your patch did raise lockdep's ire. So feel free to
add "Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>" to the
patch.
And thank you for the fix!!!
Thanx, Paul
^ permalink raw reply [flat|nested] 121+ messages in thread
* Re: lockdep complaints in slab allocator
@ 2009-11-30 16:18 ` Paul E. McKenney
0 siblings, 0 replies; 121+ messages in thread
From: Paul E. McKenney @ 2009-11-30 16:18 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Peter Zijlstra, linux-mm, cl, mpm, LKML, Nick Piggin
On Tue, Nov 24, 2009 at 01:47:40PM -0800, Paul E. McKenney wrote:
> On Tue, Nov 24, 2009 at 10:59:44PM +0200, Pekka Enberg wrote:
> > On Tue, Nov 24, 2009 at 6:23 PM, Paul E. McKenney
> > <paulmck@linux.vnet.ibm.com> wrote:
> > > On Mon, Nov 23, 2009 at 09:00:00PM +0200, Pekka Enberg wrote:
> > >> Hi Peter,
> > >>
> > >> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > >> > > Uh, ok, so apparently I was right after all. There's a comment in
> > >> > > free_block() above the slab_destroy() call that refers to the comment
> > >> > > above alloc_slabmgmt() function definition which explains it all.
> > >> > >
> > >> > > Long story short: ->slab_cachep never points to the same kmalloc cache
> > >> > > we're allocating or freeing from. Where do we need to put the
> > >> > > spin_lock_nested() annotation? Would it be enough to just use it in
> > >> > > cache_free_alien() for alien->lock or do we need it in
> > >> > > cache_flusharray() as well?
> > >> >
> > >> > You'd have to somehow push the nested state down from the
> > >> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
> > >>
> > >> That turns out to be _very_ hard. How about something like the following
> > >> untested patch which delays slab_destroy() while we're under nc->lock.
> > >>
> > >> Pekka
> > >
> > > Preliminary tests look good! The test was a ten-hour rcutorture run on
> > > an 8-CPU Power system with a half-second delay between randomly chosen
> > > CPU-hotplug operations. No lockdep warnings. ;-)
> > >
> > > Will keep hammering on it.
> >
> > Thanks! Please let me know when you're hammered it enough :-). Peter,
> > may I have your ACK or NAK on the patch, please?
>
> I expect to hammer it over the USA Thanksgiving holiday Thu-Sun this week.
> It is like this, Pekka: since I don't drink, it is instead your code
> that is going to get hammered this weekend!
And the runs with your patch were free from lockdep complaints, while
those runs lacking your patch did raise lockdep's ire. So feel free to
add "Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>" to the
patch.
And thank you for the fix!!!
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 121+ messages in thread