All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pekka Enberg <penberg@cs.helsinki.fi>
To: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux-foundation.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [patch] slub: default min_partial to at least highest cpus per node
Date: Tue, 07 Apr 2009 21:58:02 +0300	[thread overview]
Message-ID: <49DBA23A.3000106@cs.helsinki.fi> (raw)
In-Reply-To: <alpine.DEB.2.00.0904051653420.25339@chino.kir.corp.google.com>

Hi David,

David Rientjes wrote:
> The pre-defined MIN_PARTIAL value may not be suitable for machines with a
> large number of cpus per node.  To avoid excessively allocating new slabs
> because there is not at least the same number of slabs on a node's
> partial list as cpus, this will default a cache's min_partial value to be
> at least the highest number of cpus per node on the system.
> 
> This default will never exceed MAX_PARTIAL, however, so very large
> systems don't waste an excessive amount of memory.
> 
> When remote_node_defrag_ratio allows defragmenting remote nodes, it
> ensures that nr_partial exceeds min_partial so there will always be a
> local reserve when a cpu slab is filled to avoid allocating new slabs
> locally as a result of a remote cpu stealing a partial slab.
> 
> The cache's min_partial setting may still be changed by writing to
> /sys/kernel/slab/cache/min_partial.  The only restriction when doing so
> is that the value be within MIN_PARTIAL and MAX_PARTIAL.
> 
> Cc: Christoph Lameter <cl@linux-foundation.org>
> Signed-off-by: David Rientjes <rientjes@google.com>

Hmm, partial lists are per-node, so wouldn't it be better to do the 
adjustment for every struct kmem_cache_node separately? The 
'min_partial_per_node' global seems just too ugly and confusing to live 
with.

			Pekka

> ---
>  mm/slub.c |   50 ++++++++++++++++++++++++++++++++++++++++++--------
>  1 files changed, 42 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1941,6 +1941,15 @@ static unsigned long calculate_alignment(unsigned long flags,
>  	return ALIGN(align, sizeof(void *));
>  }
>  
> +static void set_min_partial(struct kmem_cache *s, unsigned long min)
> +{
> +	if (min < MIN_PARTIAL)
> +		min = MIN_PARTIAL;
> +	else if (min > MAX_PARTIAL)
> +		min = MAX_PARTIAL;
> +	s->min_partial = min;
> +}
> +
>  static void init_kmem_cache_cpu(struct kmem_cache *s,
>  			struct kmem_cache_cpu *c)
>  {
> @@ -2093,6 +2102,8 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
>  #endif
>  
>  #ifdef CONFIG_NUMA
> +static unsigned long min_partial_per_node;
> +
>  /*
>   * No kmalloc_node yet so do it by hand. We know that this is the first
>   * slab on the node for this slabcache. There are no concurrent accesses
> @@ -2188,6 +2199,26 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
>  	}
>  	return 1;
>  }
> +
> +static void __cpuinit set_min_partial_per_node(int node)
> +{
> +	unsigned int nr_cpus;
> +
> +	/*
> +	 * Add one cpu since this is called for CPU_UP_PREPARE and this cpu is
> +	 * not online yet.  CPU_ONLINE would require taking slub_lock.
> +	 */
> +	nr_cpus = nr_cpus_node(node) + 1;
> +	if (nr_cpus > min_partial_per_node)
> +		min_partial_per_node = min_t(unsigned long,
> +					     nr_cpus, MAX_PARTIAL);
> +}
> +
> +static void set_cache_min_partial_per_node(struct kmem_cache *s)
> +{
> +	if (s->min_partial < min_partial_per_node)
> +		set_min_partial(s, min_partial_per_node);
> +}
>  #else
>  static void free_kmem_cache_nodes(struct kmem_cache *s)
>  {
> @@ -2198,16 +2229,15 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
>  	init_kmem_cache_node(&s->local_node, s);
>  	return 1;
>  }
> -#endif
>  
> -static void set_min_partial(struct kmem_cache *s, unsigned long min)
> +static inline void set_min_partial_per_node(int node)
> +{
> +}
> +
> +static inline void set_cache_min_partial_per_node(const struct kmem_cache *s)
>  {
> -	if (min < MIN_PARTIAL)
> -		min = MIN_PARTIAL;
> -	else if (min > MAX_PARTIAL)
> -		min = MAX_PARTIAL;
> -	s->min_partial = min;
>  }
> +#endif
>  
>  /*
>   * calculate_sizes() determines the order and the distribution of data within
> @@ -2352,6 +2382,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
>  	 * list to avoid pounding the page allocator excessively.
>  	 */
>  	set_min_partial(s, ilog2(s->size));
> +	set_cache_min_partial_per_node(s);
>  	s->refcount = 1;
>  #ifdef CONFIG_NUMA
>  	s->remote_node_defrag_ratio = 1000;
> @@ -3239,10 +3270,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
>  	case CPU_UP_PREPARE:
>  	case CPU_UP_PREPARE_FROZEN:
>  		init_alloc_cpu_cpu(cpu);
> +		set_min_partial_per_node(cpu_to_node(cpu));
>  		down_read(&slub_lock);
> -		list_for_each_entry(s, &slab_caches, list)
> +		list_for_each_entry(s, &slab_caches, list) {
>  			s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
>  							GFP_KERNEL);
> +			set_cache_min_partial_per_node(s);
> +		}
>  		up_read(&slub_lock);
>  		break;
>  


  reply	other threads:[~2009-04-07 19:02 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-06  0:52 [patch] slub: default min_partial to at least highest cpus per node David Rientjes
2009-04-07 18:58 ` Pekka Enberg [this message]
2009-04-07 19:09   ` Pekka Enberg
2009-04-07 19:44     ` David Rientjes
2009-04-07 19:54       ` Pekka Enberg
2009-04-07 20:09         ` David Rientjes
2009-04-07 20:11           ` Pekka Enberg
2009-04-07 20:22             ` David Rientjes
2009-04-07 20:35               ` Pekka Enberg
2009-04-07 20:43                 ` Christoph Lameter
2009-04-07 20:48                   ` Pekka Enberg
2009-04-07 21:31                     ` Christoph Lameter
2009-04-07 21:46                       ` David Rientjes
2009-04-07 21:48                         ` Christoph Lameter
2009-04-07 22:03                           ` David Rientjes
2009-04-13 18:07                             ` Christoph Lameter
2009-04-07 22:01                 ` David Rientjes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49DBA23A.3000106@cs.helsinki.fi \
    --to=penberg@cs.helsinki.fi \
    --cc=cl@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.