linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: Christoph Lameter <clameter@sgi.com>
Cc: matthew@wil.cx, linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	penberg@cs.helsinki.fi, linux-arch@vger.kernel.org
Subject: Re: [patch 08/10] SLUB: Optional fast path using cmpxchg_local
Date: Tue, 30 Oct 2007 11:49:33 -0700	[thread overview]
Message-ID: <20071030114933.904a4cf8.akpm@linux-foundation.org> (raw)
In-Reply-To: <20071028033300.240703208@sgi.com>

On Sat, 27 Oct 2007 20:32:04 -0700
Christoph Lameter <clameter@sgi.com> wrote:

> Provide an alternate implementation of the SLUB fast paths for alloc
> and free using cmpxchg_local. The cmpxchg_local fast path is selected
> for arches that have CONFIG_FAST_CMPXCHG_LOCAL set. An arch should only
> set CONFIG_FAST_CMPXCHG_LOCAL if the cmpxchg_local is faster than an
> interrupt enable/disable sequence. This is known to be true for both
> x86 platforms so set FAST_CMPXCHG_LOCAL for both arches.
> 
> Not all arches can support fast cmpxchg operations. Typically the
> architecture must have an optimized cmpxchg instruction. The
> cmpxchg fast path makes no sense on platforms whose cmpxchg is
> slower than interrupt enable/disable (like f.e. IA64).
> 
> The advantages of a cmpxchg_local based fast path are:
> 
> 1. Lower cycle count (30%-60% faster)
> 
> 2. There is no need to disable and enable interrupts on the fast path.
>    Currently interrupts have to be disabled and enabled on every
>    slab operation. This is likely saving a significant percentage
>    of interrupt off / on sequences in the kernel.
> 
> 3. The disposal of freed slabs can occur with interrupts enabled.
> 
> The alternate path is realized using #ifdef's. Several attempts to do the
> same with macros and in line functions resulted in a mess (in particular due
> to the strange way that local_interrupt_save() handles its argument and due
> to the need to define macros/functions that sometimes disable interrupts
> and sometimes do something else. The macro based approaches made it also
> difficult to preserve the optimizations for the non cmpxchg paths).
> 
> #ifdef seems to be the way to go here to have a readable source.
> 
> 
> ---
>  arch/x86/Kconfig.i386   |    4 ++
>  arch/x86/Kconfig.x86_64 |    4 ++
>  mm/slub.c               |   71 ++++++++++++++++++++++++++++++++++++++++++++++--

Let's cc linux-arch: presumably other architectures can implement cpu-local
cmpxchg and would see some benefit from doing so.

The semantics are "atomic wrt interrutps on this cpu, not atomic wrt other
cpus", yes?

Do you have a feel for how useful it would be for arch maintainers to implement
this?  IOW, is it worth their time?

> 
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c	2007-10-27 10:39:07.583665939 -0700
> +++ linux-2.6/mm/slub.c	2007-10-27 10:40:19.710415861 -0700
> @@ -1496,7 +1496,12 @@ static void *__slab_alloc(struct kmem_ca
>  {
>  	void **object;
>  	struct page *new;
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> +	unsigned long flags;
>  
> +	local_irq_save(flags);
> +	preempt_enable_no_resched();
> +#endif
>  	if (!c->page)
>  		goto new_slab;
>  
> @@ -1518,6 +1523,10 @@ load_freelist:
>  unlock_out:
>  	slab_unlock(c->page);
>  out:
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> +	preempt_disable();
> +	local_irq_restore(flags);
> +#endif
>  	return object;
>  
>  another_slab:
> @@ -1592,9 +1601,26 @@ static void __always_inline *slab_alloc(
>  		gfp_t gfpflags, int node, void *addr)
>  {
>  	void **object;
> -	unsigned long flags;
>  	struct kmem_cache_cpu *c;
>  
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> +	c = get_cpu_slab(s, get_cpu());
> +	do {
> +		object = c->freelist;
> +		if (unlikely(is_end(object) || !node_match(c, node))) {
> +			object = __slab_alloc(s, gfpflags, node, addr, c);
> +			if (unlikely(!object)) {
> +				put_cpu();
> +				goto out;
> +			}
> +			break;
> +		}
> +	} while (cmpxchg_local(&c->freelist, object, object[c->offset])
> +								!= object);
> +	put_cpu();
> +#else
> +	unsigned long flags;
> +
>  	local_irq_save(flags);
>  	c = get_cpu_slab(s, smp_processor_id());
>  	if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
> @@ -1609,6 +1635,7 @@ static void __always_inline *slab_alloc(
>  		c->freelist = object[c->offset];
>  	}
>  	local_irq_restore(flags);
> +#endif
>  
>  	if (unlikely((gfpflags & __GFP_ZERO)))
>  		memset(object, 0, c->objsize);
> @@ -1644,6 +1671,11 @@ static void __slab_free(struct kmem_cach
>  	void *prior;
>  	void **object = (void *)x;
>  
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +#endif
>  	slab_lock(page);
>  
>  	if (unlikely(SlabDebug(page)))
> @@ -1669,6 +1701,9 @@ checks_ok:
>  
>  out_unlock:
>  	slab_unlock(page);
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> +	local_irq_restore(flags);
> +#endif
>  	return;
>  
>  slab_empty:
> @@ -1679,6 +1714,9 @@ slab_empty:
>  		remove_partial(s, page);
>  
>  	slab_unlock(page);
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> +	local_irq_restore(flags);
> +#endif
>  	discard_slab(s, page);
>  	return;
>  
> @@ -1703,9 +1741,37 @@ static void __always_inline slab_free(st
>  			struct page *page, void *x, void *addr)
>  {
>  	void **object = (void *)x;
> -	unsigned long flags;
>  	struct kmem_cache_cpu *c;
>  
> +#ifdef CONFIG_FAST_CMPXCHG_LOCAL
> +	void **freelist;
> +
> +	c = get_cpu_slab(s, get_cpu());
> +	debug_check_no_locks_freed(object, s->objsize);
> +	do {
> +		freelist = c->freelist;
> +		barrier();
> +		/*
> +		 * If the compiler would reorder the retrieval of c->page to
> +		 * come before c->freelist then an interrupt could
> +		 * change the cpu slab before we retrieve c->freelist. We
> +		 * could be matching on a page no longer active and put the
> +		 * object onto the freelist of the wrong slab.
> +		 *
> +		 * On the other hand: If we already have the freelist pointer
> +		 * then any change of cpu_slab will cause the cmpxchg to fail
> +		 * since the freelist pointers are unique per slab.
> +		 */
> +		if (unlikely(page != c->page || c->node < 0)) {
> +			__slab_free(s, page, x, addr, c->offset);
> +			break;
> +		}
> +		object[c->offset] = freelist;
> +	} while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
> +	put_cpu();
> +#else
> +	unsigned long flags;
> +
>  	local_irq_save(flags);
>  	debug_check_no_locks_freed(object, s->objsize);
>  	c = get_cpu_slab(s, smp_processor_id());
> @@ -1716,6 +1782,7 @@ static void __always_inline slab_free(st
>  		__slab_free(s, page, x, addr, c->offset);
>  
>  	local_irq_restore(flags);
> +#endif
>  }
>  
>  void kmem_cache_free(struct kmem_cache *s, void *x)
> Index: linux-2.6/arch/x86/Kconfig.i386
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig.i386	2007-10-27 10:38:33.630415778 -0700
> +++ linux-2.6/arch/x86/Kconfig.i386	2007-10-27 10:40:19.710415861 -0700
> @@ -51,6 +51,10 @@ config X86
>  	bool
>  	default y
>  
> +config FAST_CMPXCHG_LOCAL
> +	bool
> +	default y
> +
>  config MMU
>  	bool
>  	default y
> Index: linux-2.6/arch/x86/Kconfig.x86_64
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig.x86_64	2007-10-27 10:38:33.630415778 -0700
> +++ linux-2.6/arch/x86/Kconfig.x86_64	2007-10-27 10:40:19.710415861 -0700
> @@ -97,6 +97,10 @@ config X86_CMPXCHG
>  	bool
>  	default y
>  
> +config FAST_CMPXCHG_LOCAL
> +	bool
> +	default y
> +
>  config EARLY_PRINTK
>  	bool
>  	default y
> 
> -- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2007-10-30 18:49 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-28  3:31 [patch 00/10] SLUB: SMP regression tests on Dual Xeon E5345 (8p) and new performance patches Christoph Lameter
2007-10-28  3:31 ` [patch 01/10] SLUB: Consolidate add_partial and add_partial_tail to one function Christoph Lameter
2007-10-28 13:07   ` Pekka J Enberg
2007-10-28  3:31 ` [patch 02/10] SLUB: Noinline some functions to avoid them being folded into alloc/free Christoph Lameter
2007-10-28 13:08   ` Pekka J Enberg
2007-10-29 23:25   ` Matt Mackall
2007-10-28  3:31 ` [patch 03/10] SLUB: Move kmem_cache_node determination into add_full and add_partial Christoph Lameter
2007-10-28 13:09   ` Pekka J Enberg
2007-10-28  3:32 ` [patch 04/10] SLUB: Avoid checking for a valid object before zeroing on the fast path Christoph Lameter
2007-10-28 13:10   ` Pekka J Enberg
2007-10-28  3:32 ` [patch 05/10] SLUB: __slab_alloc() exit path consolidation Christoph Lameter
2007-10-28 13:11   ` Pekka J Enberg
2007-10-28  3:32 ` [patch 06/10] SLUB: Provide unique end marker for each slab Christoph Lameter
2007-10-28  3:32 ` [patch 07/10] SLUB: Avoid referencing kmem_cache structure in __slab_alloc Christoph Lameter
2007-10-28 13:12   ` Pekka J Enberg
2007-10-30 18:38   ` Andrew Morton
2007-10-28  3:32 ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Christoph Lameter
2007-10-28 13:05   ` Pekka J Enberg
2007-10-29  2:59     ` Christoph Lameter
2007-10-29  3:34     ` Christoph Lameter
2007-10-30 18:30     ` Andrew Morton
2007-10-30 18:49   ` Andrew Morton [this message]
2007-10-30 18:58     ` Christoph Lameter
2007-10-30 19:12       ` Mathieu Desnoyers
2007-10-31  1:52       ` [PATCH] local_t Documentation update 2 Mathieu Desnoyers
2007-10-31  2:28   ` [patch 08/10] SLUB: Optional fast path using cmpxchg_local Mathieu Desnoyers
2007-10-28  3:32 ` [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock Christoph Lameter
2007-10-28 15:10   ` Pekka J Enberg
2007-10-28 15:14     ` Pekka J Enberg
2007-10-29  3:03     ` Christoph Lameter
2007-10-29  6:30       ` Pekka Enberg
2007-10-30  4:50   ` Nick Piggin
2007-10-30 18:32     ` Christoph Lameter
2007-10-31  1:17       ` Nick Piggin
2007-10-28  3:32 ` [patch 10/10] SLUB: Restructure slab alloc Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071030114933.904a4cf8.akpm@linux-foundation.org \
    --to=akpm@linux-foundation.org \
    --cc=clameter@sgi.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=matthew@wil.cx \
    --cc=penberg@cs.helsinki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).