From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
To: Christoph Lameter <clameter@sgi.com>
Cc: Martin Bligh <mbligh@mbligh.org>,
Andi Kleen <andi@firstfloor.org>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
David Miller <davem@davemloft.net>
Subject: Re: [patch 00/10] [RFC] SLUB patches for more functionality, performance and maintenance
Date: Mon, 13 Aug 2007 18:18:47 -0400 [thread overview]
Message-ID: <20070813221847.GA20314@Krystal> (raw)
In-Reply-To: <Pine.LNX.4.64.0707091715450.2062@schroedinger.engr.sgi.com>
Some review here. I think we could do much better..
* Christoph Lameter (clameter@sgi.com) wrote:
> Index: linux-2.6.22-rc6-mm1/mm/slub.c
> ===================================================================
> --- linux-2.6.22-rc6-mm1.orig/mm/slub.c 2007-07-09 15:04:46.000000000 -0700
> +++ linux-2.6.22-rc6-mm1/mm/slub.c 2007-07-09 17:09:00.000000000 -0700
> @@ -1467,12 +1467,14 @@ static void *__slab_alloc(struct kmem_ca
> {
> void **object;
> struct page *new;
> + unsigned long flags;
>
> + local_irq_save(flags);
> if (!c->page)
> goto new_slab;
>
> slab_lock(c->page);
> - if (unlikely(!node_match(c, node)))
> + if (unlikely(!node_match(c, node) || c->freelist))
> goto another_slab;
> load_freelist:
> object = c->page->freelist;
> @@ -1486,7 +1488,14 @@ load_freelist:
> c->page->inuse = s->objects;
> c->page->freelist = NULL;
> c->node = page_to_nid(c->page);
> +out:
> slab_unlock(c->page);
> + local_irq_restore(flags);
> + preempt_enable();
> +
> + if (unlikely((gfpflags & __GFP_ZERO)))
> + memset(object, 0, c->objsize);
> +
> return object;
>
> another_slab:
> @@ -1527,6 +1536,8 @@ new_slab:
> c->page = new;
> goto load_freelist;
> }
> + local_irq_restore(flags);
> + preempt_enable();
> return NULL;
> debug:
> c->freelist = NULL;
> @@ -1536,8 +1547,7 @@ debug:
>
> c->page->inuse++;
> c->page->freelist = object[c->offset];
> - slab_unlock(c->page);
> - return object;
> + goto out;
> }
>
> /*
> @@ -1554,23 +1564,20 @@ static void __always_inline *slab_alloc(
> gfp_t gfpflags, int node, void *addr)
> {
> void **object;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
>
What if we prefetch c->freelist here ? I see in this diff that the other
code just reads it sooner as a condition for the if().
> - local_irq_save(flags);
> + preempt_disable();
> c = get_cpu_slab(s, smp_processor_id());
> - if (unlikely(!c->page || !c->freelist ||
> - !node_match(c, node)))
> +redo:
> + object = c->freelist;
> + if (unlikely(!object || !node_match(c, node)))
> + return __slab_alloc(s, gfpflags, node, addr, c);
>
> - object = __slab_alloc(s, gfpflags, node, addr, c);
> + if (cmpxchg_local(&c->freelist, object, object[c->offset]) != object)
> + goto redo;
>
> - else {
> - object = c->freelist;
> - c->freelist = object[c->offset];
> - }
> - local_irq_restore(flags);
> -
> - if (unlikely((gfpflags & __GFP_ZERO) && object))
> + preempt_enable();
> + if (unlikely((gfpflags & __GFP_ZERO)))
> memset(object, 0, c->objsize);
>
> return object;
> @@ -1603,7 +1610,9 @@ static void __slab_free(struct kmem_cach
> {
> void *prior;
> void **object = (void *)x;
> + unsigned long flags;
>
> + local_irq_save(flags);
> slab_lock(page);
>
> if (unlikely(SlabDebug(page)))
> @@ -1629,6 +1638,8 @@ checks_ok:
>
> out_unlock:
> slab_unlock(page);
> + local_irq_restore(flags);
> + preempt_enable();
> return;
>
> slab_empty:
> @@ -1639,6 +1650,8 @@ slab_empty:
> remove_partial(s, page);
>
> slab_unlock(page);
> + local_irq_restore(flags);
> + preempt_enable();
> discard_slab(s, page);
> return;
>
> @@ -1663,18 +1676,31 @@ static void __always_inline slab_free(st
> struct page *page, void *x, void *addr)
> {
> void **object = (void *)x;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
> + void **freelist;
>
Prefetching c->freelist would also make sense here.
> - local_irq_save(flags);
> + preempt_disable();
> c = get_cpu_slab(s, smp_processor_id());
> - if (likely(page == c->page && c->freelist)) {
> - object[c->offset] = c->freelist;
> - c->freelist = object;
> - } else
> - __slab_free(s, page, x, addr, c->offset);
> +redo:
> + freelist = c->freelist;
I suspect this smp_rmb() may be the cause of a major slowdown.
Therefore, I think we should try taking a copy of c->page and simply
check if it has changed right after the cmpxchg_local:
page = c->page;
> + /*
> + * Must read freelist before c->page. If a interrupt occurs and
> + * changes c->page after we have read it here then it
> + * will also have changed c->freelist and the cmpxchg will fail.
> + *
> + * If we would have checked c->page first then the freelist could
> + * have been changed under us before we read c->freelist and we
> + * would not be able to detect that situation.
> + */
> + smp_rmb();
> + if (unlikely(page != c->page || !freelist))
> + return __slab_free(s, page, x, addr, c->offset);
> +
> + object[c->offset] = freelist;
-> + if (cmpxchg_local(&c->freelist, freelist, object) != freelist)
+> + if (cmpxchg_local(&c->freelist, freelist, object) != freelist
|| page != c->page)
> + goto redo;
>
Therefore, in the scenario where:
1 - c->page is read
2 - Interrupt comes, changes c->page and c->freelist
3 - c->freelist is read
4 - cmpxchg c->freelist succeeds
5 - Then, page != c->page, so we goto redo.
It also works if 4 and 5 are swapped.
I could test the modification if you point to me which kernel version it
should apply to. However, I don't have the same hardware you use.
By the way, the smp_rmb() barrier does not make sense with the comment.
If it is _really_ protecting against reordering wrt interrupts, then it
should be a rmb(), not smp_rmb() (because it will be reordered on UP).
But I think the best would just be to work without rmb() at all, as
proposed here.
Mathieu
> - local_irq_restore(flags);
> + preempt_enable();
> }
>
> void kmem_cache_free(struct kmem_cache *s, void *x)
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
WARNING: multiple messages have this Message-ID (diff)
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
To: Christoph Lameter <clameter@sgi.com>
Cc: Martin Bligh <mbligh@mbligh.org>,
Andi Kleen <andi@firstfloor.org>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
David Miller <davem@davemloft.net>
Subject: Re: [patch 00/10] [RFC] SLUB patches for more functionality, performance and maintenance
Date: Mon, 13 Aug 2007 18:18:47 -0400 [thread overview]
Message-ID: <20070813221847.GA20314@Krystal> (raw)
In-Reply-To: <Pine.LNX.4.64.0707091715450.2062@schroedinger.engr.sgi.com>
Some review here. I think we could do much better..
* Christoph Lameter (clameter@sgi.com) wrote:
> Index: linux-2.6.22-rc6-mm1/mm/slub.c
> ===================================================================
> --- linux-2.6.22-rc6-mm1.orig/mm/slub.c 2007-07-09 15:04:46.000000000 -0700
> +++ linux-2.6.22-rc6-mm1/mm/slub.c 2007-07-09 17:09:00.000000000 -0700
> @@ -1467,12 +1467,14 @@ static void *__slab_alloc(struct kmem_ca
> {
> void **object;
> struct page *new;
> + unsigned long flags;
>
> + local_irq_save(flags);
> if (!c->page)
> goto new_slab;
>
> slab_lock(c->page);
> - if (unlikely(!node_match(c, node)))
> + if (unlikely(!node_match(c, node) || c->freelist))
> goto another_slab;
> load_freelist:
> object = c->page->freelist;
> @@ -1486,7 +1488,14 @@ load_freelist:
> c->page->inuse = s->objects;
> c->page->freelist = NULL;
> c->node = page_to_nid(c->page);
> +out:
> slab_unlock(c->page);
> + local_irq_restore(flags);
> + preempt_enable();
> +
> + if (unlikely((gfpflags & __GFP_ZERO)))
> + memset(object, 0, c->objsize);
> +
> return object;
>
> another_slab:
> @@ -1527,6 +1536,8 @@ new_slab:
> c->page = new;
> goto load_freelist;
> }
> + local_irq_restore(flags);
> + preempt_enable();
> return NULL;
> debug:
> c->freelist = NULL;
> @@ -1536,8 +1547,7 @@ debug:
>
> c->page->inuse++;
> c->page->freelist = object[c->offset];
> - slab_unlock(c->page);
> - return object;
> + goto out;
> }
>
> /*
> @@ -1554,23 +1564,20 @@ static void __always_inline *slab_alloc(
> gfp_t gfpflags, int node, void *addr)
> {
> void **object;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
>
What if we prefetch c->freelist here ? I see in this diff that the other
code just reads it sooner as a condition for the if().
> - local_irq_save(flags);
> + preempt_disable();
> c = get_cpu_slab(s, smp_processor_id());
> - if (unlikely(!c->page || !c->freelist ||
> - !node_match(c, node)))
> +redo:
> + object = c->freelist;
> + if (unlikely(!object || !node_match(c, node)))
> + return __slab_alloc(s, gfpflags, node, addr, c);
>
> - object = __slab_alloc(s, gfpflags, node, addr, c);
> + if (cmpxchg_local(&c->freelist, object, object[c->offset]) != object)
> + goto redo;
>
> - else {
> - object = c->freelist;
> - c->freelist = object[c->offset];
> - }
> - local_irq_restore(flags);
> -
> - if (unlikely((gfpflags & __GFP_ZERO) && object))
> + preempt_enable();
> + if (unlikely((gfpflags & __GFP_ZERO)))
> memset(object, 0, c->objsize);
>
> return object;
> @@ -1603,7 +1610,9 @@ static void __slab_free(struct kmem_cach
> {
> void *prior;
> void **object = (void *)x;
> + unsigned long flags;
>
> + local_irq_save(flags);
> slab_lock(page);
>
> if (unlikely(SlabDebug(page)))
> @@ -1629,6 +1638,8 @@ checks_ok:
>
> out_unlock:
> slab_unlock(page);
> + local_irq_restore(flags);
> + preempt_enable();
> return;
>
> slab_empty:
> @@ -1639,6 +1650,8 @@ slab_empty:
> remove_partial(s, page);
>
> slab_unlock(page);
> + local_irq_restore(flags);
> + preempt_enable();
> discard_slab(s, page);
> return;
>
> @@ -1663,18 +1676,31 @@ static void __always_inline slab_free(st
> struct page *page, void *x, void *addr)
> {
> void **object = (void *)x;
> - unsigned long flags;
> struct kmem_cache_cpu *c;
> + void **freelist;
>
Prefetching c->freelist would also make sense here.
> - local_irq_save(flags);
> + preempt_disable();
> c = get_cpu_slab(s, smp_processor_id());
> - if (likely(page == c->page && c->freelist)) {
> - object[c->offset] = c->freelist;
> - c->freelist = object;
> - } else
> - __slab_free(s, page, x, addr, c->offset);
> +redo:
> + freelist = c->freelist;
I suspect this smp_rmb() may be the cause of a major slowdown.
Therefore, I think we should try taking a copy of c->page and simply
check if it has changed right after the cmpxchg_local:
page = c->page;
> + /*
> + * Must read freelist before c->page. If a interrupt occurs and
> + * changes c->page after we have read it here then it
> + * will also have changed c->freelist and the cmpxchg will fail.
> + *
> + * If we would have checked c->page first then the freelist could
> + * have been changed under us before we read c->freelist and we
> + * would not be able to detect that situation.
> + */
> + smp_rmb();
> + if (unlikely(page != c->page || !freelist))
> + return __slab_free(s, page, x, addr, c->offset);
> +
> + object[c->offset] = freelist;
-> + if (cmpxchg_local(&c->freelist, freelist, object) != freelist)
+> + if (cmpxchg_local(&c->freelist, freelist, object) != freelist
|| page != c->page)
> + goto redo;
>
Therefore, in the scenario where:
1 - c->page is read
2 - Interrupt comes, changes c->page and c->freelist
3 - c->freelist is read
4 - cmpxchg c->freelist succeeds
5 - Then, page != c->page, so we goto redo.
It also works if 4 and 5 are swapped.
I could test the modification if you point to me which kernel version it
should apply to. However, I don't have the same hardware you use.
By the way, the smp_rmb() barrier does not make sense with the comment.
If it is _really_ protecting against reordering wrt interrupts, then it
should be a rmb(), not smp_rmb() (because it will be reordered on UP).
But I think the best would just be to work without rmb() at all, as
proposed here.
Mathieu
> - local_irq_restore(flags);
> + preempt_enable();
> }
>
> void kmem_cache_free(struct kmem_cache *s, void *x)
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-08-13 22:19 UTC|newest]
Thread overview: 111+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-08 3:49 [patch 00/10] [RFC] SLUB patches for more functionality, performance and maintenance Christoph Lameter
2007-07-08 3:49 ` [patch 01/10] SLUB: Direct pass through of page size or higher kmalloc requests Christoph Lameter
2007-07-08 3:49 ` [patch 02/10] SLUB: Avoid page struct cacheline bouncing due to remote frees to cpu slab Christoph Lameter
2007-07-08 3:49 ` [patch 03/10] SLUB: Do not use page->mapping Christoph Lameter
2007-07-08 3:49 ` [patch 04/10] SLUB: Move page->offset to kmem_cache_cpu->offset Christoph Lameter
2007-07-08 3:49 ` [patch 05/10] SLUB: Avoid touching page struct when freeing to per cpu slab Christoph Lameter
2007-07-08 3:49 ` [patch 06/10] SLUB: Place kmem_cache_cpu structures in a NUMA aware way Christoph Lameter
2007-07-08 3:49 ` [patch 07/10] SLUB: Optimize cacheline use for zeroing Christoph Lameter
2007-07-08 3:50 ` [patch 08/10] SLUB: Single atomic instruction alloc/free using cmpxchg Christoph Lameter
2007-07-08 3:50 ` [patch 09/10] Remove the SLOB allocator for 2.6.23 Christoph Lameter
2007-07-08 7:51 ` Ingo Molnar
2007-07-08 9:43 ` Nick Piggin
2007-07-08 9:54 ` Ingo Molnar
2007-07-08 10:23 ` Nick Piggin
2007-07-08 10:42 ` Ingo Molnar
2007-07-08 18:02 ` Andrew Morton
2007-07-09 2:57 ` Nick Piggin
2007-07-09 11:04 ` Pekka Enberg
2007-07-09 11:16 ` Nick Piggin
2007-07-09 12:47 ` Pekka Enberg
2007-07-09 13:46 ` Pekka J Enberg
2007-07-09 16:08 ` Christoph Lameter
2007-07-09 16:08 ` Christoph Lameter
2007-07-10 8:17 ` Pekka J Enberg
2007-07-10 8:17 ` Pekka J Enberg
2007-07-10 8:27 ` Nick Piggin
2007-07-10 8:27 ` Nick Piggin
2007-07-10 9:31 ` Pekka Enberg
2007-07-10 9:31 ` Pekka Enberg
2007-07-10 10:09 ` Nick Piggin
2007-07-10 10:09 ` Nick Piggin
2007-07-10 12:02 ` Matt Mackall
2007-07-10 12:02 ` Matt Mackall
2007-07-10 12:57 ` Pekka J Enberg
2007-07-10 12:57 ` Pekka J Enberg
2007-07-10 22:12 ` Christoph Lameter
2007-07-10 22:40 ` Matt Mackall
2007-07-10 22:40 ` Matt Mackall
2007-07-10 22:50 ` Christoph Lameter
2007-07-10 22:50 ` Christoph Lameter
2007-07-09 16:06 ` Christoph Lameter
2007-07-09 16:51 ` Andrew Morton
2007-07-09 17:26 ` Christoph Lameter
2007-07-09 18:00 ` Andrew Morton
2007-07-10 1:43 ` Nick Piggin
2007-07-10 1:56 ` Christoph Lameter
2007-07-10 2:02 ` Nick Piggin
2007-07-10 2:11 ` Christoph Lameter
2007-07-10 7:09 ` Nick Piggin
2007-07-10 22:09 ` Christoph Lameter
2007-07-10 23:12 ` Matt Mackall
2007-07-10 8:32 ` Matt Mackall
2007-07-10 9:01 ` Håvard Skinnemoen
2007-07-10 9:11 ` Nick Piggin
2007-07-10 9:21 ` Håvard Skinnemoen
2007-07-11 1:37 ` Christoph Lameter
2007-07-11 2:06 ` Matt Mackall
2007-07-11 18:06 ` Christoph Lameter
2007-07-11 18:25 ` Pekka J Enberg
2007-07-11 18:33 ` Christoph Lameter
2007-07-11 18:36 ` Pekka J Enberg
2007-07-12 0:33 ` Nick Piggin
2007-07-09 23:09 ` Matt Mackall
2007-07-10 1:41 ` Nick Piggin
2007-07-10 1:51 ` Christoph Lameter
2007-07-10 1:58 ` Nick Piggin
2007-07-10 6:22 ` Matt Mackall
2007-07-10 7:03 ` Nick Piggin
2007-07-10 2:32 ` Matt Mackall
2007-07-09 21:57 ` Matt Mackall
2007-07-09 12:31 ` Matthieu CASTET
2007-07-09 16:00 ` Christoph Lameter
2007-07-09 20:52 ` Matt Mackall
2007-07-08 3:50 ` [patch 10/10] Remove slab in 2.6.24 Christoph Lameter
2007-07-08 4:37 ` [patch 00/10] [RFC] SLUB patches for more functionality, performance and maintenance David Miller
2007-07-09 15:45 ` Christoph Lameter
2007-07-09 19:43 ` David Miller
2007-07-09 21:21 ` Christoph Lameter
2007-07-08 11:20 ` Andi Kleen
2007-07-09 15:50 ` Christoph Lameter
2007-07-09 15:50 ` Christoph Lameter
2007-07-09 15:59 ` Martin Bligh
2007-07-09 15:59 ` Martin Bligh
2007-07-09 18:11 ` Christoph Lameter
2007-07-09 18:11 ` Christoph Lameter
2007-07-09 21:00 ` Martin Bligh
2007-07-09 21:00 ` Martin Bligh
2007-07-09 21:44 ` Mathieu Desnoyers
2007-07-09 21:44 ` Mathieu Desnoyers
2007-07-09 21:55 ` Christoph Lameter
2007-07-09 21:55 ` Christoph Lameter
2007-07-09 22:58 ` Mathieu Desnoyers
2007-07-09 22:58 ` Mathieu Desnoyers
2007-07-09 23:08 ` Christoph Lameter
2007-07-09 23:08 ` Christoph Lameter
2007-07-10 5:16 ` [PATCH] x86_64 - Use non locked version for local_cmpxchg() Mathieu Desnoyers
2007-07-10 5:16 ` Mathieu Desnoyers
2007-07-10 20:46 ` Christoph Lameter
2007-07-10 20:46 ` Christoph Lameter
2007-07-10 0:55 ` [patch 00/10] [RFC] SLUB patches for more functionality, performance and maintenance Christoph Lameter
2007-07-10 0:55 ` Christoph Lameter
2007-07-10 8:27 ` Mathieu Desnoyers
2007-07-10 8:27 ` Mathieu Desnoyers
2007-07-10 18:38 ` Christoph Lameter
2007-07-10 18:38 ` Christoph Lameter
2007-07-10 20:59 ` Mathieu Desnoyers
2007-07-10 20:59 ` Mathieu Desnoyers
2007-08-13 22:18 ` Mathieu Desnoyers [this message]
2007-08-13 22:18 ` Mathieu Desnoyers
2007-08-13 22:28 ` Christoph Lameter
2007-08-13 22:28 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070813221847.GA20314@Krystal \
--to=mathieu.desnoyers@polymtl.ca \
--cc=andi@firstfloor.org \
--cc=clameter@sgi.com \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mbligh@mbligh.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.