public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: clameter@sgi.com
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org
Subject: [patch 5/7] SLUB: Add MIN_PARTIAL
Date: Wed, 25 Apr 2007 22:07:48 -0700	[thread overview]
Message-ID: <20070426050934.483389047@sgi.com> (raw)
In-Reply-To: 20070426050743.867613938@sgi.com

[-- Attachment #1: slab_partial --]
[-- Type: text/plain, Size: 4006 bytes --]

We leave a mininum of partial slabs on nodes when we search for
partial slabs on other node. Define a constant for that value.

Then modify slub to keep MIN_PARTIAL slabs around.

This avoids bad situations where a function frees the last object
in a slab (which results in the page being returned to the page
allocator) only to then allocate one again (which requires getting
a page back from the page allocator if the partial list was empty).
Keeping a couple of slabs on the partial list reduces overhead.

Empty slabs are added to the end of the partial list to ensure that
partially allocated slabs are consumed first (defragmentation).

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.21-rc7-mm1/mm/slub.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/mm/slub.c	2007-04-25 21:23:59.000000000 -0700
+++ linux-2.6.21-rc7-mm1/mm/slub.c	2007-04-25 21:25:48.000000000 -0700
@@ -109,6 +109,9 @@
 /* Enable to test recovery from slab corruption on boot */
 #undef SLUB_RESILIENCY_TEST
 
+/* Mininum number of partial slabs */
+#define MIN_PARTIAL 2
+
 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
 				SLAB_POISON | SLAB_STORE_USER)
 /*
@@ -635,16 +638,8 @@ static int on_freelist(struct kmem_cache
 /*
  * Tracking of fully allocated slabs for debugging
  */
-static void add_full(struct kmem_cache *s, struct page *page)
+static void add_full(struct kmem_cache_node *n, struct page *page)
 {
-	struct kmem_cache_node *n;
-
-	VM_BUG_ON(!irqs_disabled());
-
-	if (!(s->flags & SLAB_STORE_USER))
-		return;
-
-	n = get_node(s, page_to_nid(page));
 	spin_lock(&n->list_lock);
 	list_add(&page->lru, &n->full);
 	spin_unlock(&n->list_lock);
@@ -923,10 +918,16 @@ static __always_inline int slab_trylock(
 /*
  * Management of partially allocated slabs
  */
-static void add_partial(struct kmem_cache *s, struct page *page)
+static void add_partial_tail(struct kmem_cache_node *n, struct page *page)
 {
-	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+	spin_lock(&n->list_lock);
+	n->nr_partial++;
+	list_add_tail(&page->lru, &n->partial);
+	spin_unlock(&n->list_lock);
+}
 
+static void add_partial(struct kmem_cache_node *n, struct page *page)
+{
 	spin_lock(&n->list_lock);
 	n->nr_partial++;
 	list_add(&page->lru, &n->partial);
@@ -1026,7 +1027,7 @@ static struct page *get_any_partial(stru
 		n = get_node(s, zone_to_nid(*z));
 
 		if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
-				n->nr_partial > 2) {
+				n->nr_partial > MIN_PARTIAL) {
 			page = get_partial_node(n);
 			if (page)
 				return page;
@@ -1060,15 +1061,31 @@ static struct page *get_partial(struct k
  */
 static void putback_slab(struct kmem_cache *s, struct page *page)
 {
+	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+
 	if (page->inuse) {
+
 		if (page->freelist)
-			add_partial(s, page);
-		else if (PageError(page))
-			add_full(s, page);
+			add_partial(n, page);
+		else if (PageError(page) && (s->flags & SLAB_STORE_USER))
+			add_full(n, page);
 		slab_unlock(page);
+
 	} else {
-		slab_unlock(page);
-		discard_slab(s, page);
+		if (n->nr_partial < MIN_PARTIAL) {
+			/*
+			 * Adding an empty page to the partial slabs in order
+			 * to avoid page allocator overhead. This page needs to
+			 * come after all the others that are not fully empty
+			 * in order to make sure that we do maximum
+			 * defragmentation.
+			 */
+			add_partial_tail(n, page);
+			slab_unlock(page);
+		} else {
+			slab_unlock(page);
+			discard_slab(s, page);
+		}
 	}
 }
 
@@ -1325,7 +1342,7 @@ checks_ok:
 	 * then add it.
 	 */
 	if (unlikely(!prior))
-		add_partial(s, page);
+		add_partial(get_node(s, page_to_nid(page)), page);
 
 out_unlock:
 	slab_unlock(page);
@@ -1541,7 +1558,7 @@ static struct kmem_cache_node * __init e
 	kmalloc_caches->node[node] = n;
 	init_kmem_cache_node(n);
 	atomic_long_inc(&n->nr_slabs);
-	add_partial(kmalloc_caches, page);
+	add_partial(n, page);
 	return n;
 }
 

--

  parent reply	other threads:[~2007-04-26  5:10 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-26  5:07 [patch 0/7] SLUB updates clameter
2007-04-26  5:07 ` [patch 1/7] SLUB: Remove duplicate VM_BUG_ON clameter
2007-04-26  5:07 ` [patch 2/7] SLAB: Fix sysfs directory handling clameter
2007-04-26  5:07 ` [patch 3/7] SLUB: debug printk cleanup clameter
2007-04-26  5:07 ` [patch 4/7] SLUB: Conform more to SLABs SLAB_HWCACHE_ALIGN behavior clameter
2007-04-26  5:07 ` clameter [this message]
2007-04-26  5:07 ` [patch 6/7] SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink clameter
2007-04-26  5:07 ` [patch 7/7] SLUB: Major slabinfo update clameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070426050934.483389047@sgi.com \
    --to=clameter@sgi.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox