[PATCH 03/28] mm: slb: add knowledge of reserve pages

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	netdev@vger.kernel.org, trond.myklebust@fys.uio.no
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 03/28] mm: slb: add knowledge of reserve pages
Date: Wed, 20 Feb 2008 15:46:13 +0100	[thread overview]
Message-ID: <20080220150305.520016000@chello.nl> (raw)
In-Reply-To: 20080220144610.548202000@chello.nl

[-- Attachment #1: reserve-slub.patch --]
[-- Type: text/plain, Size: 11005 bytes --]

Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it. This is done to ensure reserve pages don't
leak out and get consumed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/slub_def.h |    1 
 mm/slab.c                |   60 +++++++++++++++++++++++++++++++++++++++--------
 mm/slub.c                |   42 +++++++++++++++++++++-----------
 3 files changed, 80 insertions(+), 23 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -21,11 +21,12 @@
 #include <linux/ctype.h>
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
+#include "internal.h"
 
 /*
  * Lock order:
  *   1. slab_lock(page)
- *   2. slab->list_lock
+ *   2. node->list_lock
  *
  *   The slab_lock protects operations on the object of a particular
  *   slab and its metadata in the page struct. If the slab lock
@@ -1098,15 +1099,15 @@ static struct page *allocate_slab(struct
 	return page;
 }
 
-static void setup_object(struct kmem_cache *s, struct page *page,
-				void *object)
+static void setup_object(struct kmem_cache *s, struct page *page, void *object)
 {
 	setup_object_debug(s, page, object);
 	if (unlikely(s->ctor))
 		s->ctor(s, object);
 }
 
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static
+struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve)
 {
 	struct page *page;
 	struct kmem_cache_node *n;
@@ -1121,6 +1122,7 @@ static struct page *new_slab(struct kmem
 	if (!page)
 		goto out;
 
+	*reserve = page->reserve;
 	n = get_node(s, page_to_nid(page));
 	if (n)
 		atomic_long_inc(&n->nr_slabs);
@@ -1228,8 +1230,7 @@ static __always_inline int slab_trylock(
 /*
  * Management of partially allocated slabs
  */
-static void add_partial(struct kmem_cache_node *n,
-				struct page *page, int tail)
+static void add_partial(struct kmem_cache_node *n, struct page *page, int tail)
 {
 	spin_lock(&n->list_lock);
 	n->nr_partial++;
@@ -1240,8 +1241,7 @@ static void add_partial(struct kmem_cach
 	spin_unlock(&n->list_lock);
 }
 
-static void remove_partial(struct kmem_cache *s,
-						struct page *page)
+static void remove_partial(struct kmem_cache *s, struct page *page)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 
@@ -1256,7 +1256,8 @@ static void remove_partial(struct kmem_c
  *
  * Must hold list_lock.
  */
-static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
+static inline
+int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
 {
 	if (slab_trylock(page)) {
 		list_del(&page->lru);
@@ -1514,11 +1515,21 @@ static void *__slab_alloc(struct kmem_ca
 {
 	void **object;
 	struct page *new;
+	int reserve;
 #ifdef SLUB_FASTPATH
 	unsigned long flags;
 
 	local_irq_save(flags);
 #endif
+	if (unlikely(c->reserve)) {
+		/*
+		 * If the current slab is a reserve slab and the current
+		 * allocation context does not allow access to the reserves we
+		 * must force an allocation to test the current levels.
+		 */
+		if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+			goto grow_slab;
+	}
 	if (!c->page)
 		goto new_slab;
 
@@ -1530,7 +1541,7 @@ load_freelist:
 	object = c->page->freelist;
 	if (unlikely(object == c->page->end))
 		goto another_slab;
-	if (unlikely(SlabDebug(c->page)))
+	if (unlikely(SlabDebug(c->page) || c->reserve))
 		goto debug;
 
 	object = c->page->freelist;
@@ -1557,16 +1568,18 @@ new_slab:
 		goto load_freelist;
 	}
 
+grow_slab:
 	if (gfpflags & __GFP_WAIT)
 		local_irq_enable();
 
-	new = new_slab(s, gfpflags, node);
+	new = new_slab(s, gfpflags, node, &reserve);
 
 	if (gfpflags & __GFP_WAIT)
 		local_irq_disable();
 
 	if (new) {
 		c = get_cpu_slab(s, smp_processor_id());
+		c->reserve = reserve;
 		stat(c, ALLOC_SLAB);
 		if (c->page)
 			flush_slab(s, c);
@@ -1594,8 +1607,8 @@ new_slab:
 
 	return NULL;
 debug:
-	object = c->page->freelist;
-	if (!alloc_debug_processing(s, c->page, object, addr))
+	if (SlabDebug(c->page) &&
+			!alloc_debug_processing(s, c->page, object, addr))
 		goto another_slab;
 
 	c->page->inuse++;
@@ -2153,10 +2166,11 @@ static struct kmem_cache_node *early_kme
 	struct page *page;
 	struct kmem_cache_node *n;
 	unsigned long flags;
+	int reserve;
 
 	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
 
-	page = new_slab(kmalloc_caches, gfpflags, node);
+	page = new_slab(kmalloc_caches, gfpflags, node, &reserve);
 
 	BUG_ON(!page);
 	if (page_to_nid(page) != node) {
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h
+++ linux-2.6/include/linux/slub_def.h
@@ -37,6 +37,7 @@ struct kmem_cache_cpu {
 	int node;		/* The node of the page (or -1 for debug) */
 	unsigned int offset;	/* Freepointer offset (in word units) */
 	unsigned int objsize;	/* Size of an object (from kmem_cache) */
+	int reserve;		/* Did the current page come from the reserve */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -115,6 +115,8 @@
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
+#include 	"internal.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -265,7 +267,8 @@ struct array_cache {
 	unsigned int avail;
 	unsigned int limit;
 	unsigned int batchcount;
-	unsigned int touched;
+	unsigned int touched:1,
+		     reserve:1;
 	spinlock_t lock;
 	void *entry[];	/*
 			 * Must have this definition in here for the proper
@@ -761,6 +764,27 @@ static inline struct array_cache *cpu_ca
 	return cachep->array[smp_processor_id()];
 }
 
+/*
+ * If the last page came from the reserves, and the current allocation context
+ * does not have access to them, force an allocation to test the watermarks.
+ */
+static inline int slab_force_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+	if (unlikely(cpu_cache_get(cachep)->reserve) &&
+			!(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
+		return 1;
+
+	return 0;
+}
+
+static inline void slab_set_reserve(struct kmem_cache *cachep, int reserve)
+{
+	struct array_cache *ac = cpu_cache_get(cachep);
+
+	if (unlikely(ac->reserve != reserve))
+		ac->reserve = reserve;
+}
+
 static inline struct kmem_cache *__find_general_cachep(size_t size,
 							gfp_t gfpflags)
 {
@@ -960,6 +984,7 @@ static struct array_cache *alloc_arrayca
 		nc->limit = entries;
 		nc->batchcount = batchcount;
 		nc->touched = 0;
+		nc->reserve = 0;
 		spin_lock_init(&nc->lock);
 	}
 	return nc;
@@ -1663,7 +1688,8 @@ __initcall(cpucache_init);
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
  */
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+		int *reserve)
 {
 	struct page *page;
 	int nr_pages;
@@ -1685,6 +1711,7 @@ static void *kmem_getpages(struct kmem_c
 	if (!page)
 		return NULL;
 
+	*reserve = page->reserve;
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		add_zone_page_state(page_zone(page),
@@ -2113,6 +2140,7 @@ static int __init_refok setup_cpu_cache(
 	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
 	cpu_cache_get(cachep)->batchcount = 1;
 	cpu_cache_get(cachep)->touched = 0;
+	cpu_cache_get(cachep)->reserve = 0;
 	cachep->batchcount = 1;
 	cachep->limit = BOOT_CPUCACHE_ENTRIES;
 	return 0;
@@ -2768,6 +2796,7 @@ static int cache_grow(struct kmem_cache 
 	size_t offset;
 	gfp_t local_flags;
 	struct kmem_list3 *l3;
+	int reserve;
 
 	/*
 	 * Be lazy and only check for valid flags here,  keeping it out of the
@@ -2806,7 +2835,7 @@ static int cache_grow(struct kmem_cache 
 	 * 'nodeid'.
 	 */
 	if (!objp)
-		objp = kmem_getpages(cachep, local_flags, nodeid);
+		objp = kmem_getpages(cachep, local_flags, nodeid, &reserve);
 	if (!objp)
 		goto failed;
 
@@ -2823,6 +2852,7 @@ static int cache_grow(struct kmem_cache 
 	if (local_flags & __GFP_WAIT)
 		local_irq_disable();
 	check_irq_off();
+	slab_set_reserve(cachep, reserve);
 	spin_lock(&l3->list_lock);
 
 	/* Make slab active. */
@@ -2957,7 +2987,8 @@ bad:
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+static void *cache_alloc_refill(struct kmem_cache *cachep,
+		gfp_t flags, int must_refill)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
@@ -2967,6 +2998,8 @@ static void *cache_alloc_refill(struct k
 	node = numa_node_id();
 
 	check_irq_off();
+	if (unlikely(must_refill))
+		goto force_grow;
 	ac = cpu_cache_get(cachep);
 retry:
 	batchcount = ac->batchcount;
@@ -3035,11 +3068,14 @@ alloc_done:
 
 	if (unlikely(!ac->avail)) {
 		int x;
+force_grow:
 		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = cpu_cache_get(cachep);
-		if (!x && ac->avail == 0)	/* no objects in sight? abort */
+
+		/* no objects in sight? abort */
+		if (!x && (ac->avail == 0 || must_refill))
 			return NULL;
 
 		if (!ac->avail)		/* objects refilled by interrupt? */
@@ -3194,17 +3230,18 @@ static inline void *____cache_alloc(stru
 {
 	void *objp;
 	struct array_cache *ac;
+	int must_refill = slab_force_alloc(cachep, flags);
 
 	check_irq_off();
 
 	ac = cpu_cache_get(cachep);
-	if (likely(ac->avail)) {
+	if (likely(ac->avail && !must_refill)) {
 		STATS_INC_ALLOCHIT(cachep);
 		ac->touched = 1;
 		objp = ac->entry[--ac->avail];
 	} else {
 		STATS_INC_ALLOCMISS(cachep);
-		objp = cache_alloc_refill(cachep, flags);
+		objp = cache_alloc_refill(cachep, flags, must_refill);
 	}
 	return objp;
 }
@@ -3246,7 +3283,7 @@ static void *fallback_alloc(struct kmem_
 	gfp_t local_flags;
 	struct zone **z;
 	void *obj = NULL;
-	int nid;
+	int nid, reserve;
 
 	if (flags & __GFP_THISNODE)
 		return NULL;
@@ -3280,10 +3317,11 @@ retry:
 		if (local_flags & __GFP_WAIT)
 			local_irq_enable();
 		kmem_flagcheck(cache, flags);
-		obj = kmem_getpages(cache, flags, -1);
+		obj = kmem_getpages(cache, flags, -1, &reserve);
 		if (local_flags & __GFP_WAIT)
 			local_irq_disable();
 		if (obj) {
+			slab_set_reserve(cache, reserve);
 			/*
 			 * Insert into the appropriate per node queues
 			 */
@@ -3322,6 +3360,9 @@ static void *____cache_alloc_node(struct
 	l3 = cachep->nodelists[nodeid];
 	BUG_ON(!l3);
 
+	if (unlikely(slab_force_alloc(cachep, flags)))
+		goto force_grow;
+
 retry:
 	check_irq_off();
 	spin_lock(&l3->list_lock);
@@ -3359,6 +3400,7 @@ retry:
 
 must_grow:
 	spin_unlock(&l3->list_lock);
+force_grow:
 	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
 	if (x)
 		goto retry;

--

WARNING: multiple messages have this Message-ID (diff)

From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	netdev@vger.kernel.org, trond.myklebust@fys.uio.no
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 03/28] mm: slb: add knowledge of reserve pages
Date: Wed, 20 Feb 2008 15:46:13 +0100	[thread overview]
Message-ID: <20080220150305.520016000@chello.nl> (raw)
In-Reply-To: 20080220144610.548202000@chello.nl

[-- Attachment #1: reserve-slub.patch --]
[-- Type: text/plain, Size: 11230 bytes --]

Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it. This is done to ensure reserve pages don't
leak out and get consumed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/slub_def.h |    1 
 mm/slab.c                |   60 +++++++++++++++++++++++++++++++++++++++--------
 mm/slub.c                |   42 +++++++++++++++++++++-----------
 3 files changed, 80 insertions(+), 23 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -21,11 +21,12 @@
 #include <linux/ctype.h>
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
+#include "internal.h"
 
 /*
  * Lock order:
  *   1. slab_lock(page)
- *   2. slab->list_lock
+ *   2. node->list_lock
  *
  *   The slab_lock protects operations on the object of a particular
  *   slab and its metadata in the page struct. If the slab lock
@@ -1098,15 +1099,15 @@ static struct page *allocate_slab(struct
 	return page;
 }
 
-static void setup_object(struct kmem_cache *s, struct page *page,
-				void *object)
+static void setup_object(struct kmem_cache *s, struct page *page, void *object)
 {
 	setup_object_debug(s, page, object);
 	if (unlikely(s->ctor))
 		s->ctor(s, object);
 }
 
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static
+struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve)
 {
 	struct page *page;
 	struct kmem_cache_node *n;
@@ -1121,6 +1122,7 @@ static struct page *new_slab(struct kmem
 	if (!page)
 		goto out;
 
+	*reserve = page->reserve;
 	n = get_node(s, page_to_nid(page));
 	if (n)
 		atomic_long_inc(&n->nr_slabs);
@@ -1228,8 +1230,7 @@ static __always_inline int slab_trylock(
 /*
  * Management of partially allocated slabs
  */
-static void add_partial(struct kmem_cache_node *n,
-				struct page *page, int tail)
+static void add_partial(struct kmem_cache_node *n, struct page *page, int tail)
 {
 	spin_lock(&n->list_lock);
 	n->nr_partial++;
@@ -1240,8 +1241,7 @@ static void add_partial(struct kmem_cach
 	spin_unlock(&n->list_lock);
 }
 
-static void remove_partial(struct kmem_cache *s,
-						struct page *page)
+static void remove_partial(struct kmem_cache *s, struct page *page)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 
@@ -1256,7 +1256,8 @@ static void remove_partial(struct kmem_c
  *
  * Must hold list_lock.
  */
-static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
+static inline
+int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
 {
 	if (slab_trylock(page)) {
 		list_del(&page->lru);
@@ -1514,11 +1515,21 @@ static void *__slab_alloc(struct kmem_ca
 {
 	void **object;
 	struct page *new;
+	int reserve;
 #ifdef SLUB_FASTPATH
 	unsigned long flags;
 
 	local_irq_save(flags);
 #endif
+	if (unlikely(c->reserve)) {
+		/*
+		 * If the current slab is a reserve slab and the current
+		 * allocation context does not allow access to the reserves we
+		 * must force an allocation to test the current levels.
+		 */
+		if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+			goto grow_slab;
+	}
 	if (!c->page)
 		goto new_slab;
 
@@ -1530,7 +1541,7 @@ load_freelist:
 	object = c->page->freelist;
 	if (unlikely(object == c->page->end))
 		goto another_slab;
-	if (unlikely(SlabDebug(c->page)))
+	if (unlikely(SlabDebug(c->page) || c->reserve))
 		goto debug;
 
 	object = c->page->freelist;
@@ -1557,16 +1568,18 @@ new_slab:
 		goto load_freelist;
 	}
 
+grow_slab:
 	if (gfpflags & __GFP_WAIT)
 		local_irq_enable();
 
-	new = new_slab(s, gfpflags, node);
+	new = new_slab(s, gfpflags, node, &reserve);
 
 	if (gfpflags & __GFP_WAIT)
 		local_irq_disable();
 
 	if (new) {
 		c = get_cpu_slab(s, smp_processor_id());
+		c->reserve = reserve;
 		stat(c, ALLOC_SLAB);
 		if (c->page)
 			flush_slab(s, c);
@@ -1594,8 +1607,8 @@ new_slab:
 
 	return NULL;
 debug:
-	object = c->page->freelist;
-	if (!alloc_debug_processing(s, c->page, object, addr))
+	if (SlabDebug(c->page) &&
+			!alloc_debug_processing(s, c->page, object, addr))
 		goto another_slab;
 
 	c->page->inuse++;
@@ -2153,10 +2166,11 @@ static struct kmem_cache_node *early_kme
 	struct page *page;
 	struct kmem_cache_node *n;
 	unsigned long flags;
+	int reserve;
 
 	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
 
-	page = new_slab(kmalloc_caches, gfpflags, node);
+	page = new_slab(kmalloc_caches, gfpflags, node, &reserve);
 
 	BUG_ON(!page);
 	if (page_to_nid(page) != node) {
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h
+++ linux-2.6/include/linux/slub_def.h
@@ -37,6 +37,7 @@ struct kmem_cache_cpu {
 	int node;		/* The node of the page (or -1 for debug) */
 	unsigned int offset;	/* Freepointer offset (in word units) */
 	unsigned int objsize;	/* Size of an object (from kmem_cache) */
+	int reserve;		/* Did the current page come from the reserve */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -115,6 +115,8 @@
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
+#include 	"internal.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -265,7 +267,8 @@ struct array_cache {
 	unsigned int avail;
 	unsigned int limit;
 	unsigned int batchcount;
-	unsigned int touched;
+	unsigned int touched:1,
+		     reserve:1;
 	spinlock_t lock;
 	void *entry[];	/*
 			 * Must have this definition in here for the proper
@@ -761,6 +764,27 @@ static inline struct array_cache *cpu_ca
 	return cachep->array[smp_processor_id()];
 }
 
+/*
+ * If the last page came from the reserves, and the current allocation context
+ * does not have access to them, force an allocation to test the watermarks.
+ */
+static inline int slab_force_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+	if (unlikely(cpu_cache_get(cachep)->reserve) &&
+			!(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
+		return 1;
+
+	return 0;
+}
+
+static inline void slab_set_reserve(struct kmem_cache *cachep, int reserve)
+{
+	struct array_cache *ac = cpu_cache_get(cachep);
+
+	if (unlikely(ac->reserve != reserve))
+		ac->reserve = reserve;
+}
+
 static inline struct kmem_cache *__find_general_cachep(size_t size,
 							gfp_t gfpflags)
 {
@@ -960,6 +984,7 @@ static struct array_cache *alloc_arrayca
 		nc->limit = entries;
 		nc->batchcount = batchcount;
 		nc->touched = 0;
+		nc->reserve = 0;
 		spin_lock_init(&nc->lock);
 	}
 	return nc;
@@ -1663,7 +1688,8 @@ __initcall(cpucache_init);
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
  */
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+		int *reserve)
 {
 	struct page *page;
 	int nr_pages;
@@ -1685,6 +1711,7 @@ static void *kmem_getpages(struct kmem_c
 	if (!page)
 		return NULL;
 
+	*reserve = page->reserve;
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		add_zone_page_state(page_zone(page),
@@ -2113,6 +2140,7 @@ static int __init_refok setup_cpu_cache(
 	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
 	cpu_cache_get(cachep)->batchcount = 1;
 	cpu_cache_get(cachep)->touched = 0;
+	cpu_cache_get(cachep)->reserve = 0;
 	cachep->batchcount = 1;
 	cachep->limit = BOOT_CPUCACHE_ENTRIES;
 	return 0;
@@ -2768,6 +2796,7 @@ static int cache_grow(struct kmem_cache 
 	size_t offset;
 	gfp_t local_flags;
 	struct kmem_list3 *l3;
+	int reserve;
 
 	/*
 	 * Be lazy and only check for valid flags here,  keeping it out of the
@@ -2806,7 +2835,7 @@ static int cache_grow(struct kmem_cache 
 	 * 'nodeid'.
 	 */
 	if (!objp)
-		objp = kmem_getpages(cachep, local_flags, nodeid);
+		objp = kmem_getpages(cachep, local_flags, nodeid, &reserve);
 	if (!objp)
 		goto failed;
 
@@ -2823,6 +2852,7 @@ static int cache_grow(struct kmem_cache 
 	if (local_flags & __GFP_WAIT)
 		local_irq_disable();
 	check_irq_off();
+	slab_set_reserve(cachep, reserve);
 	spin_lock(&l3->list_lock);
 
 	/* Make slab active. */
@@ -2957,7 +2987,8 @@ bad:
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+static void *cache_alloc_refill(struct kmem_cache *cachep,
+		gfp_t flags, int must_refill)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
@@ -2967,6 +2998,8 @@ static void *cache_alloc_refill(struct k
 	node = numa_node_id();
 
 	check_irq_off();
+	if (unlikely(must_refill))
+		goto force_grow;
 	ac = cpu_cache_get(cachep);
 retry:
 	batchcount = ac->batchcount;
@@ -3035,11 +3068,14 @@ alloc_done:
 
 	if (unlikely(!ac->avail)) {
 		int x;
+force_grow:
 		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = cpu_cache_get(cachep);
-		if (!x && ac->avail == 0)	/* no objects in sight? abort */
+
+		/* no objects in sight? abort */
+		if (!x && (ac->avail == 0 || must_refill))
 			return NULL;
 
 		if (!ac->avail)		/* objects refilled by interrupt? */
@@ -3194,17 +3230,18 @@ static inline void *____cache_alloc(stru
 {
 	void *objp;
 	struct array_cache *ac;
+	int must_refill = slab_force_alloc(cachep, flags);
 
 	check_irq_off();
 
 	ac = cpu_cache_get(cachep);
-	if (likely(ac->avail)) {
+	if (likely(ac->avail && !must_refill)) {
 		STATS_INC_ALLOCHIT(cachep);
 		ac->touched = 1;
 		objp = ac->entry[--ac->avail];
 	} else {
 		STATS_INC_ALLOCMISS(cachep);
-		objp = cache_alloc_refill(cachep, flags);
+		objp = cache_alloc_refill(cachep, flags, must_refill);
 	}
 	return objp;
 }
@@ -3246,7 +3283,7 @@ static void *fallback_alloc(struct kmem_
 	gfp_t local_flags;
 	struct zone **z;
 	void *obj = NULL;
-	int nid;
+	int nid, reserve;
 
 	if (flags & __GFP_THISNODE)
 		return NULL;
@@ -3280,10 +3317,11 @@ retry:
 		if (local_flags & __GFP_WAIT)
 			local_irq_enable();
 		kmem_flagcheck(cache, flags);
-		obj = kmem_getpages(cache, flags, -1);
+		obj = kmem_getpages(cache, flags, -1, &reserve);
 		if (local_flags & __GFP_WAIT)
 			local_irq_disable();
 		if (obj) {
+			slab_set_reserve(cache, reserve);
 			/*
 			 * Insert into the appropriate per node queues
 			 */
@@ -3322,6 +3360,9 @@ static void *____cache_alloc_node(struct
 	l3 = cachep->nodelists[nodeid];
 	BUG_ON(!l3);
 
+	if (unlikely(slab_force_alloc(cachep, flags)))
+		goto force_grow;
+
 retry:
 	check_irq_off();
 	spin_lock(&l3->list_lock);
@@ -3359,6 +3400,7 @@ retry:
 
 must_grow:
 	spin_unlock(&l3->list_lock);
+force_grow:
 	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
 	if (x)
 		goto retry;

--

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2008-02-20 15:18 UTC|newest]

Thread overview: 146+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-20 14:46 [PATCH 00/28] Swap over NFS -v16 Peter Zijlstra
2008-02-20 14:46 ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 01/28] mm: gfp_to_alloc_flags() Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 02/28] mm: tag reseve pages Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` Peter Zijlstra [this message]
2008-02-20 14:46   ` [PATCH 03/28] mm: slb: add knowledge of reserve pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 04/28] mm: kmem_estimate_pages() Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 05/28] mm: allow PF_MEMALLOC from softirq context Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 06/28] mm: serialize access to min_free_kbytes Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 07/28] mm: emergency pool Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 08/28] mm: system wide ALLOC_NO_WATERMARK Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 09/28] mm: __GFP_MEMALLOC Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 10/28] mm: memory reserve management Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 11/28] selinux: tag avc cache alloc as non-critical Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 12/28] net: wrap sk->sk_backlog_rcv() Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 13/28] net: packet split receive api Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 14/28] net: sk_allocation() - concentrate socket related allocations Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 15/28] netvm: network reserve infrastructure Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-24  6:52   ` Mike Snitzer
2008-02-24  6:52     ` Mike Snitzer
2008-02-20 14:46 ` [PATCH 16/28] netvm: INET reserves Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 17/28] netvm: hook skb allocation to reserves Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 18/28] netvm: filter emergency skbs Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 19/28] netvm: prevent a stream specific deadlock Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 20/28] netfilter: NF_QUEUE vs emergency skbs Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 21/28] netvm: skb processing Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 22/28] mm: add support for non block device backed swap files Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 16:30   ` Randy Dunlap
2008-02-20 16:30     ` Randy Dunlap
2008-02-20 16:46     ` Peter Zijlstra
2008-02-20 16:46       ` Peter Zijlstra
2008-02-26 12:45   ` Miklos Szeredi
2008-02-26 12:45     ` Miklos Szeredi
2008-02-26 12:58     ` Peter Zijlstra
2008-02-26 12:58       ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 23/28] mm: methods for teaching filesystems about PG_swapcache pages Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 24/28] nfs: remove mempools Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 25/28] nfs: teach the NFS client how to treat PG_swapcache pages Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 26/28] nfs: disable data cache revalidation for swapfiles Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 27/28] nfs: enable swap on NFS Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 28/28] nfs: fix various memory recursions possible with swap over NFS Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06 ` [PATCH 00/28] Swap over NFS -v16 Andrew Morton
2008-02-23  8:06   ` Andrew Morton
2008-02-26  6:03   ` Neil Brown
2008-02-26  6:03     ` Neil Brown
2008-02-26 10:50     ` Peter Zijlstra
2008-02-26 10:50       ` Peter Zijlstra
2008-02-26 12:00       ` Peter Zijlstra
2008-02-26 12:00         ` Peter Zijlstra
2008-02-26 15:29       ` Miklos Szeredi
2008-02-26 15:29         ` Miklos Szeredi
2008-02-26 15:41         ` Peter Zijlstra
2008-02-26 15:41           ` Peter Zijlstra
2008-02-26 15:43         ` Peter Zijlstra
2008-02-26 15:43           ` Peter Zijlstra
2008-02-26 15:47           ` Miklos Szeredi
2008-02-26 15:47             ` Miklos Szeredi
2008-02-26 17:56       ` Andrew Morton
2008-02-26 17:56         ` Andrew Morton
2008-02-27  5:51       ` Neil Brown
2008-02-27  5:51         ` Neil Brown
2008-02-27  7:58         ` Peter Zijlstra
2008-02-27  7:58           ` Peter Zijlstra
2008-02-27  8:05           ` Pekka Enberg
2008-02-27  8:05             ` Pekka Enberg
2008-02-27  8:14             ` Peter Zijlstra
2008-02-27  8:14               ` Peter Zijlstra
2008-02-27  8:33               ` Peter Zijlstra
2008-02-27  8:33                 ` Peter Zijlstra
2008-02-27  8:43                 ` Pekka J Enberg
2008-02-27  8:43                   ` Pekka J Enberg
2008-02-29 11:51             ` Peter Zijlstra
2008-02-29 11:51               ` Peter Zijlstra
2008-02-29 11:58               ` Pekka Enberg
2008-02-29 11:58                 ` Pekka Enberg
2008-02-29 12:18                 ` Peter Zijlstra
2008-02-29 12:18                   ` Peter Zijlstra
2008-02-29 12:29                   ` Pekka Enberg
2008-02-29 12:29                     ` Pekka Enberg
2008-02-29  1:29           ` Neil Brown
2008-02-29  1:29             ` Neil Brown
2008-02-29 10:21             ` Peter Zijlstra
2008-02-29 10:21               ` Peter Zijlstra
2008-03-02 22:18               ` Neil Brown
2008-03-02 22:18                 ` Neil Brown
2008-03-02 23:33                 ` Peter Zijlstra
2008-03-02 23:33                   ` Peter Zijlstra
2008-03-03 23:41                   ` Neil Brown
2008-03-03 23:41                     ` Neil Brown
2008-03-04 10:28                     ` Peter Zijlstra
2008-03-04 10:28                       ` Peter Zijlstra
     [not found]           ` <1837 <1204626509.6241.39.camel@lappy>
2008-03-07  3:33             ` Neil Brown
2008-03-07  3:33               ` Neil Brown
2008-03-07 11:17               ` Peter Zijlstra
2008-03-07 11:17                 ` Peter Zijlstra
2008-03-07 11:55                 ` Peter Zijlstra
2008-03-07 11:55                   ` Peter Zijlstra
2008-03-10  5:15                 ` Neil Brown
2008-03-10  5:15                   ` Neil Brown
2008-03-10  9:17                   ` Peter Zijlstra
2008-03-10  9:17                     ` Peter Zijlstra
2008-03-14  5:22                     ` Neil Brown
2008-03-14  5:22                       ` Neil Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080220150305.520016000@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@fys.uio.no \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.