[PATCH/RFC 6/6] numa: slab: use numa_mem_id() for slab local memory node

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Lee Schermerhorn <lee.schermerhorn@hp.com>
To: linux-arch@vger.kernel.org, linux-mm@kvack.org
Cc: akpm@linux-foundation.org, Mel Gorman <mel@csn.ul.ie>,
	Christoph Lameter <clameter@sgi.com>,
	Nick Piggin <npiggin@suse.de>,
	David Rientjes <rientjes@google.com>,
	eric.whitney@hp.com
Subject: [PATCH/RFC 6/6] numa: slab:  use numa_mem_id() for slab local memory node
Date: Fri, 13 Nov 2009 16:18:23 -0500	[thread overview]
Message-ID: <20091113211823.15074.1305.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20091113211714.15074.29078.sendpatchset@localhost.localdomain>

[PATCH] numa:  Slab handle memoryless nodes

Against:  2.6.32-rc5-mmotm-091101-1001

Example usage of generic "numa_mem_id()":

The mainline slab code, since ~ 2.6.19, does not handle memoryless
nodes well.  Specifically, the "fast path"--____cache_alloc()--will
never succeed as slab doesn't cache offnode object on the per cpu
queues, and for memoryless nodes, all memory will be "off node"
relative to numa_node_id().  This adds significant overhead to all
kmem cache allocations, incurring a significant regression relative
to earlier kernels [from before slab.c was reorganized].

This patch uses the generic topology function "numa_mem_id()" to
return the "effective local memory node" for the calling context.
This is the first node in the local node's generic fallback zonelist--
i.e., the same node that "local" mempolicy-based allocations would
use.  This lets slab cache these "local" allocations and avoid 
fallback/refill on every allocation.

N.B.:  incomplete.  slab will need to handle node and memory hotplug
that could change the value returned by numa_mem_id() for any given
node.  This will be addressed by a subsequent patch, if we decide to
go this route.

Performance impact on "hackbench 400 process 200"

2.6.32-rc5+mmotm-091101		no-patch	this-patch
no memoryless nodes [avg of 10]:  12.700	  12.856  ~1.2%
cpus all on memless nodes  [20]: 261.530	  27.700 ~10x speedup

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>

---

 mm/slab.c |   27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

Index: linux-2.6.32-rc5-mmotm-091101-1001/mm/slab.c
===================================================================
--- linux-2.6.32-rc5-mmotm-091101-1001.orig/mm/slab.c
+++ linux-2.6.32-rc5-mmotm-091101-1001/mm/slab.c
@@ -1064,7 +1064,7 @@ static inline int cache_free_alien(struc
 	struct array_cache *alien = NULL;
 	int node;
 
-	node = numa_node_id();
+	node = numa_mem_id();
 
 	/*
 	 * Make sure we are not freeing a object from another node to the array
@@ -1407,7 +1407,7 @@ void __init kmem_cache_init(void)
 	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
 	 */
 
-	node = numa_node_id();
+	node = numa_mem_id();
 
 	/* 1) create the cache_cache */
 	INIT_LIST_HEAD(&cache_chain);
@@ -2041,7 +2041,7 @@ static int __init_refok setup_cpu_cache(
 			}
 		}
 	}
-	cachep->nodelists[numa_node_id()]->next_reap =
+	cachep->nodelists[numa_mem_id()]->next_reap =
 			jiffies + REAPTIMEOUT_LIST3 +
 			((unsigned long)cachep) % REAPTIMEOUT_LIST3;
 
@@ -2372,7 +2372,7 @@ static void check_spinlock_acquired(stru
 {
 #ifdef CONFIG_SMP
 	check_irq_off();
-	assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
+	assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
 #endif
 }
 
@@ -2399,7 +2399,7 @@ static void do_drain(void *arg)
 {
 	struct kmem_cache *cachep = arg;
 	struct array_cache *ac;
-	int node = numa_node_id();
+	int node = numa_mem_id();
 
 	check_irq_off();
 	ac = cpu_cache_get(cachep);
@@ -2932,7 +2932,7 @@ static void *cache_alloc_refill(struct k
 
 retry:
 	check_irq_off();
-	node = numa_node_id();
+	node = numa_mem_id();
 	ac = cpu_cache_get(cachep);
 	batchcount = ac->batchcount;
 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3128,7 +3128,7 @@ static void *alternate_node_alloc(struct
 
 	if (in_interrupt() || (flags & __GFP_THISNODE))
 		return NULL;
-	nid_alloc = nid_here = numa_node_id();
+	nid_alloc = nid_here = numa_mem_id();
 	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
 		nid_alloc = cpuset_mem_spread_node();
 	else if (current->mempolicy)
@@ -3297,6 +3297,7 @@ __cache_alloc_node(struct kmem_cache *ca
 {
 	unsigned long save_flags;
 	void *ptr;
+	int slab_node = numa_mem_id();
 
 	flags &= gfp_allowed_mask;
 
@@ -3309,7 +3310,7 @@ __cache_alloc_node(struct kmem_cache *ca
 	local_irq_save(save_flags);
 
 	if (unlikely(nodeid == -1))
-		nodeid = numa_node_id();
+		nodeid = slab_node;
 
 	if (unlikely(!cachep->nodelists[nodeid])) {
 		/* Node not bootstrapped yet */
@@ -3317,7 +3318,7 @@ __cache_alloc_node(struct kmem_cache *ca
 		goto out;
 	}
 
-	if (nodeid == numa_node_id()) {
+	if (nodeid == slab_node) {
 		/*
 		 * Use the locally cached objects if possible.
 		 * However ____cache_alloc does not allow fallback
@@ -3361,8 +3362,8 @@ __do_cache_alloc(struct kmem_cache *cach
 	 * We may just have run out of memory on the local node.
 	 * ____cache_alloc_node() knows how to locate memory on other nodes
 	 */
- 	if (!objp)
- 		objp = ____cache_alloc_node(cache, flags, numa_node_id());
+	if (!objp)
+		objp = ____cache_alloc_node(cache, flags, numa_mem_id());
 
   out:
 	return objp;
@@ -3459,7 +3460,7 @@ static void cache_flusharray(struct kmem
 {
 	int batchcount;
 	struct kmem_list3 *l3;
-	int node = numa_node_id();
+	int node = numa_mem_id();
 
 	batchcount = ac->batchcount;
 #if DEBUG
@@ -4034,7 +4035,7 @@ static void cache_reap(struct work_struc
 {
 	struct kmem_cache *searchp;
 	struct kmem_list3 *l3;
-	int node = numa_node_id();
+	int node = numa_mem_id();
 	struct delayed_work *work = to_delayed_work(w);
 
 	if (!mutex_trylock(&cache_chain_mutex))

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)

From: Lee Schermerhorn <lee.schermerhorn@hp.com>
To: linux-arch@vger.kernel.org, linux-mm@kvack.org
Cc: akpm@linux-foundation.org, Mel Gorman <mel@csn.ul.ie>,
	Christoph Lameter <clameter@sgi.com>,
	Nick Piggin <npiggin@suse.de>,
	David Rientjes <rientjes@google.com>,
	eric.whitney@hp.com
Subject: [PATCH/RFC 6/6] numa: slab:  use numa_mem_id() for slab local memory node
Date: Fri, 13 Nov 2009 16:18:23 -0500	[thread overview]
Message-ID: <20091113211823.15074.1305.sendpatchset@localhost.localdomain> (raw)
Message-ID: <20091113211823.lSjKxJBw9591NVbRApJsCdaEQ7djsIDHKbehdlWLy-A@z> (raw)
In-Reply-To: <20091113211714.15074.29078.sendpatchset@localhost.localdomain>

[PATCH] numa:  Slab handle memoryless nodes

Against:  2.6.32-rc5-mmotm-091101-1001

Example usage of generic "numa_mem_id()":

The mainline slab code, since ~ 2.6.19, does not handle memoryless
nodes well.  Specifically, the "fast path"--____cache_alloc()--will
never succeed as slab doesn't cache offnode object on the per cpu
queues, and for memoryless nodes, all memory will be "off node"
relative to numa_node_id().  This adds significant overhead to all
kmem cache allocations, incurring a significant regression relative
to earlier kernels [from before slab.c was reorganized].

This patch uses the generic topology function "numa_mem_id()" to
return the "effective local memory node" for the calling context.
This is the first node in the local node's generic fallback zonelist--
i.e., the same node that "local" mempolicy-based allocations would
use.  This lets slab cache these "local" allocations and avoid 
fallback/refill on every allocation.

N.B.:  incomplete.  slab will need to handle node and memory hotplug
that could change the value returned by numa_mem_id() for any given
node.  This will be addressed by a subsequent patch, if we decide to
go this route.

Performance impact on "hackbench 400 process 200"

2.6.32-rc5+mmotm-091101		no-patch	this-patch
no memoryless nodes [avg of 10]:  12.700	  12.856  ~1.2%
cpus all on memless nodes  [20]: 261.530	  27.700 ~10x speedup

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>

---

 mm/slab.c |   27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

Index: linux-2.6.32-rc5-mmotm-091101-1001/mm/slab.c
===================================================================
--- linux-2.6.32-rc5-mmotm-091101-1001.orig/mm/slab.c
+++ linux-2.6.32-rc5-mmotm-091101-1001/mm/slab.c
@@ -1064,7 +1064,7 @@ static inline int cache_free_alien(struc
 	struct array_cache *alien = NULL;
 	int node;
 
-	node = numa_node_id();
+	node = numa_mem_id();
 
 	/*
 	 * Make sure we are not freeing a object from another node to the array
@@ -1407,7 +1407,7 @@ void __init kmem_cache_init(void)
 	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
 	 */
 
-	node = numa_node_id();
+	node = numa_mem_id();
 
 	/* 1) create the cache_cache */
 	INIT_LIST_HEAD(&cache_chain);
@@ -2041,7 +2041,7 @@ static int __init_refok setup_cpu_cache(
 			}
 		}
 	}
-	cachep->nodelists[numa_node_id()]->next_reap =
+	cachep->nodelists[numa_mem_id()]->next_reap =
 			jiffies + REAPTIMEOUT_LIST3 +
 			((unsigned long)cachep) % REAPTIMEOUT_LIST3;
 
@@ -2372,7 +2372,7 @@ static void check_spinlock_acquired(stru
 {
 #ifdef CONFIG_SMP
 	check_irq_off();
-	assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
+	assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
 #endif
 }
 
@@ -2399,7 +2399,7 @@ static void do_drain(void *arg)
 {
 	struct kmem_cache *cachep = arg;
 	struct array_cache *ac;
-	int node = numa_node_id();
+	int node = numa_mem_id();
 
 	check_irq_off();
 	ac = cpu_cache_get(cachep);
@@ -2932,7 +2932,7 @@ static void *cache_alloc_refill(struct k
 
 retry:
 	check_irq_off();
-	node = numa_node_id();
+	node = numa_mem_id();
 	ac = cpu_cache_get(cachep);
 	batchcount = ac->batchcount;
 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3128,7 +3128,7 @@ static void *alternate_node_alloc(struct
 
 	if (in_interrupt() || (flags & __GFP_THISNODE))
 		return NULL;
-	nid_alloc = nid_here = numa_node_id();
+	nid_alloc = nid_here = numa_mem_id();
 	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
 		nid_alloc = cpuset_mem_spread_node();
 	else if (current->mempolicy)
@@ -3297,6 +3297,7 @@ __cache_alloc_node(struct kmem_cache *ca
 {
 	unsigned long save_flags;
 	void *ptr;
+	int slab_node = numa_mem_id();
 
 	flags &= gfp_allowed_mask;
 
@@ -3309,7 +3310,7 @@ __cache_alloc_node(struct kmem_cache *ca
 	local_irq_save(save_flags);
 
 	if (unlikely(nodeid == -1))
-		nodeid = numa_node_id();
+		nodeid = slab_node;
 
 	if (unlikely(!cachep->nodelists[nodeid])) {
 		/* Node not bootstrapped yet */
@@ -3317,7 +3318,7 @@ __cache_alloc_node(struct kmem_cache *ca
 		goto out;
 	}
 
-	if (nodeid == numa_node_id()) {
+	if (nodeid == slab_node) {
 		/*
 		 * Use the locally cached objects if possible.
 		 * However ____cache_alloc does not allow fallback
@@ -3361,8 +3362,8 @@ __do_cache_alloc(struct kmem_cache *cach
 	 * We may just have run out of memory on the local node.
 	 * ____cache_alloc_node() knows how to locate memory on other nodes
 	 */
- 	if (!objp)
- 		objp = ____cache_alloc_node(cache, flags, numa_node_id());
+	if (!objp)
+		objp = ____cache_alloc_node(cache, flags, numa_mem_id());
 
   out:
 	return objp;
@@ -3459,7 +3460,7 @@ static void cache_flusharray(struct kmem
 {
 	int batchcount;
 	struct kmem_list3 *l3;
-	int node = numa_node_id();
+	int node = numa_mem_id();
 
 	batchcount = ac->batchcount;
 #if DEBUG
@@ -4034,7 +4035,7 @@ static void cache_reap(struct work_struc
 {
 	struct kmem_cache *searchp;
 	struct kmem_list3 *l3;
-	int node = numa_node_id();
+	int node = numa_mem_id();
 	struct delayed_work *work = to_delayed_work(w);
 
 	if (!mutex_trylock(&cache_chain_mutex))

next prev parent reply	other threads:[~2009-11-13 21:18 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-13 21:17 [PATCH/RFC 0/6] Numa: Use Generic Per-cpu Variables for numa_*_id() Lee Schermerhorn
2009-11-13 21:17 ` Lee Schermerhorn
2009-11-13 21:17 ` [PATCH/RFC 1/6] numa: Use Generic Per-cpu Variables for numa_node_id() Lee Schermerhorn
2009-11-13 21:17   ` Lee Schermerhorn
2009-11-20 15:46   ` Christoph Lameter
2009-11-20 15:46     ` Christoph Lameter
2009-11-30 20:28     ` Lee Schermerhorn
2009-11-30 20:28       ` Lee Schermerhorn
2009-11-30 20:40       ` Matthew Wilcox
2009-11-30 20:40         ` Matthew Wilcox
2009-11-30 23:43       ` Arnd Bergmann
2009-11-30 23:43         ` Arnd Bergmann
2009-12-02 16:29         ` Lee Schermerhorn
2009-12-02 16:29           ` Lee Schermerhorn
2009-11-13 21:17 ` [PATCH/RFC 2/6] numa: x86_64: use generic percpu var numa_node_id() implementation Lee Schermerhorn
2009-11-13 21:17   ` Lee Schermerhorn
2009-11-20 15:48   ` Christoph Lameter
2009-11-20 15:48     ` Christoph Lameter
2009-11-13 21:18 ` [PATCH/RFC 3/6] numa: ia64: " Lee Schermerhorn
2009-11-13 21:18   ` Lee Schermerhorn
2009-11-20 15:50   ` Christoph Lameter
2009-11-20 15:50     ` Christoph Lameter
2009-11-13 21:18 ` [PATCH/RFC 4/6] numa: Introduce numa_mem_id()- effective local memory node id Lee Schermerhorn
2009-11-13 21:18   ` Lee Schermerhorn
2009-11-20 15:53   ` Christoph Lameter
2009-11-20 15:53     ` Christoph Lameter
2009-11-13 21:18 ` [PATCH/RFC 5/6] numa: ia64: support numa_mem_id() for memoryless nodes Lee Schermerhorn
2009-11-13 21:18   ` Lee Schermerhorn
2009-11-13 21:18 ` Lee Schermerhorn [this message]
2009-11-13 21:18   ` [PATCH/RFC 6/6] numa: slab: use numa_mem_id() for slab local memory node Lee Schermerhorn
2009-11-20 15:56   ` Christoph Lameter
2009-11-20 15:56     ` Christoph Lameter
2009-11-20 15:43 ` [PATCH/RFC 0/6] Numa: Use Generic Per-cpu Variables for numa_*_id() Christoph Lameter
2009-11-20 15:43   ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091113211823.15074.1305.sendpatchset@localhost.localdomain \
    --to=lee.schermerhorn@hp.com \
    --cc=akpm@linux-foundation.org \
    --cc=clameter@sgi.com \
    --cc=eric.whitney@hp.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=npiggin@suse.de \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.