[this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

All of lore.kernel.org
 help / color / mirror / Atom feed

From: cl@linux-foundation.org
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org,
	Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
	Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Tejun Heo <tj@kernel.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Subject: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable
Date: Tue, 06 Oct 2009 19:37:13 -0400	[thread overview]
Message-ID: <20091006233733.153341605@gentwo.org> (raw)
In-Reply-To: 20091006233654.815079668@gentwo.org

[-- Attachment #1: this_cpu_slub_irqless --]
[-- Type: text/plain, Size: 7487 bytes --]

This is a bit of a different tack on things than the last version provided
by Matheiu.

Instead of using a cmpxchg we keep a state variable in the per cpu structure
that is incremented when we enter the hot path. We can then detect that
a thread is in the fastpath and fall back to alternate allocation / free
technique that bypasses fastpath caching.

A disadvantage is that we have to disable preempt. But if preemt is disabled
(like on most kernels that I run) then the hotpath becomes very efficient.

Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>


---
 include/linux/slub_def.h |    1 
 mm/slub.c                |   91 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 74 insertions(+), 18 deletions(-)

Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h	2009-10-01 15:53:15.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h	2009-10-01 15:53:15.000000000 -0500
@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to first free per cpu object */
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
+	int active;		/* Active fastpaths */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2009-10-01 15:53:15.000000000 -0500
+++ linux-2.6/mm/slub.c	2009-10-01 15:53:15.000000000 -0500
@@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
 			  unsigned long addr)
 {
 	void **object;
-	struct page *page = __this_cpu_read(s->cpu_slab->page);
+	struct page *page;
+	unsigned long flags;
+	int hotpath;
+
+	local_irq_save(flags);
+	preempt_enable();	/* Get rid of count */
+	hotpath = __this_cpu_read(s->cpu_slab->active) != 0;
+	page = __this_cpu_read(s->cpu_slab->page);
 
 	/* We handle __GFP_ZERO in the caller */
 	gfpflags &= ~__GFP_ZERO;
@@ -1626,13 +1633,21 @@ load_freelist:
 		goto another_slab;
 	if (unlikely(SLABDEBUG && PageSlubDebug(page)))
 		goto debug;
-
-	__this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
-	page->inuse = page->objects;
-	page->freelist = NULL;
-	__this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+	if (unlikely(hotpath)) {
+		/* Object on second free list available and hotpath busy */
+		page->inuse++;
+		page->freelist = get_freepointer(s, object);
+	} else {
+		/* Prepare new list of objects for hotpath */
+		__this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
+		page->inuse = page->objects;
+		page->freelist = NULL;
+		__this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+	}
 unlock_out:
+	__this_cpu_dec(s->cpu_slab->active);
 	slab_unlock(page);
+	local_irq_restore(flags);
 	stat(s, ALLOC_SLOWPATH);
 	return object;
 
@@ -1642,8 +1657,12 @@ another_slab:
 new_slab:
 	page = get_partial(s, gfpflags, node);
 	if (page) {
-		__this_cpu_write(s->cpu_slab->page, page);
 		stat(s, ALLOC_FROM_PARTIAL);
+
+		if (hotpath)
+			goto hot_lock;
+
+		__this_cpu_write(s->cpu_slab->page, page);
 		goto load_freelist;
 	}
 
@@ -1657,6 +1676,10 @@ new_slab:
 
 	if (page) {
 		stat(s, ALLOC_SLAB);
+
+		if (hotpath)
+			 goto hot_no_lock;
+
 		if (__this_cpu_read(s->cpu_slab->page))
 			flush_slab(s, __this_cpu_ptr(s->cpu_slab));
 		slab_lock(page);
@@ -1664,6 +1687,10 @@ new_slab:
 		__this_cpu_write(s->cpu_slab->page, page);
 		goto load_freelist;
 	}
+
+	__this_cpu_dec(s->cpu_slab->active);
+	local_irq_restore(flags);
+
 	if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
 		slab_out_of_memory(s, gfpflags, node);
 	return NULL;
@@ -1675,6 +1702,19 @@ debug:
 	page->freelist = get_freepointer(s, object);
 	__this_cpu_write(s->cpu_slab->node, -1);
 	goto unlock_out;
+
+	/*
+	 * Hotpath is busy and we need to avoid touching
+	 * hotpath variables
+	 */
+hot_no_lock:
+	slab_lock(page);
+hot_lock:
+	__ClearPageSlubFrozen(page);
+	if (get_freepointer(s, page->freelist))
+		/* Cannot put page into the hotpath. Instead back to partial */
+		add_partial(get_node(s, page_to_nid(page)), page, 0);
+	goto load_freelist;
 }
 
 /*
@@ -1691,7 +1731,6 @@ static __always_inline void *slab_alloc(
 		gfp_t gfpflags, int node, unsigned long addr)
 {
 	void **object;
-	unsigned long flags;
 
 	gfpflags &= gfp_allowed_mask;
 
@@ -1701,19 +1740,21 @@ static __always_inline void *slab_alloc(
 	if (should_failslab(s->objsize, gfpflags))
 		return NULL;
 
-	local_irq_save(flags);
+	preempt_disable();
+	irqsafe_cpu_inc(s->cpu_slab->active);
 	object = __this_cpu_read(s->cpu_slab->freelist);
-	if (unlikely(!object || !node_match(s, node)))
+	if (unlikely(!object || !node_match(s, node) ||
+			__this_cpu_read(s->cpu_slab->active)))
 
 		object = __slab_alloc(s, gfpflags, node, addr);
 
 	else {
 		__this_cpu_write(s->cpu_slab->freelist,
 			get_freepointer(s, object));
+		irqsafe_cpu_dec(s->cpu_slab->active);
+		preempt_enable();
 		stat(s, ALLOC_FASTPATH);
 	}
-	local_irq_restore(flags);
-
 	if (unlikely((gfpflags & __GFP_ZERO) && object))
 		memset(object, 0, s->objsize);
 
@@ -1777,6 +1818,11 @@ static void __slab_free(struct kmem_cach
 {
 	void *prior;
 	void **object = (void *)x;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_enable();	/* Fix up count */
+	__this_cpu_dec(s->cpu_slab->active);
 
 	stat(s, FREE_SLOWPATH);
 	slab_lock(page);
@@ -1809,6 +1855,7 @@ checks_ok:
 
 out_unlock:
 	slab_unlock(page);
+	local_irq_restore(flags);
 	return;
 
 slab_empty:
@@ -1820,6 +1867,7 @@ slab_empty:
 		stat(s, FREE_REMOVE_PARTIAL);
 	}
 	slab_unlock(page);
+	local_irq_restore(flags);
 	stat(s, FREE_SLAB);
 	discard_slab(s, page);
 	return;
@@ -1845,24 +1893,26 @@ static __always_inline void slab_free(st
 			struct page *page, void *x, unsigned long addr)
 {
 	void **object = (void *)x;
-	unsigned long flags;
 
 	kmemleak_free_recursive(x, s->flags);
-	local_irq_save(flags);
 	kmemcheck_slab_free(s, object, s->objsize);
 	debug_check_no_locks_freed(object, s->objsize);
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(object, s->objsize);
 
+	preempt_disable();
+	irqsafe_cpu_inc(s->cpu_slab->active);
 	if (likely(page == __this_cpu_read(s->cpu_slab->page) &&
-			__this_cpu_read(s->cpu_slab->node) >= 0)) {
-		set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist));
+			__this_cpu_read(s->cpu_slab->node) >= 0) &&
+			!__this_cpu_read(s->cpu_slab->active)) {
+		set_freepointer(s, object,
+			__this_cpu_read(s->cpu_slab->freelist));
 		__this_cpu_write(s->cpu_slab->freelist, object);
+		irqsafe_cpu_dec(s->cpu_slab->active);
+		preempt_enable();
 		stat(s, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, x, addr);
-
-	local_irq_restore(flags);
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2064,6 +2114,8 @@ static DEFINE_PER_CPU(struct kmem_cache_
 
 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 {
+	int cpu;
+
 	if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
 		/*
 		 * Boot time creation of the kmalloc array. Use static per cpu data
@@ -2073,6 +2125,9 @@ static inline int alloc_kmem_cache_cpus(
 	else
 		s->cpu_slab =  alloc_percpu(struct kmem_cache_cpu);
 
+	for_each_possible_cpu(cpu)
+		per_cpu_ptr(s->cpu_slab, cpu)->active = -1;
+
 	if (!s->cpu_slab)
 		return 0;
 

--

next prev parent reply	other threads:[~2009-10-06 23:44 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-06 23:36 [this_cpu_xx V5 00/19] Introduce per cpu atomic operations and avoid per cpu address arithmetic cl
2009-10-06 23:36 ` [this_cpu_xx V5 01/19] Introduce this_cpu_ptr() and generic this_cpu_* operations cl
2009-10-06 23:52   ` Tejun Heo
2009-10-07 14:23     ` Christoph Lameter
2009-10-07 15:29       ` Tejun Heo
2009-10-06 23:36 ` [this_cpu_xx V5 02/19] this_cpu: X86 optimized this_cpu operations cl
2009-10-06 23:36 ` [this_cpu_xx V5 03/19] Use this_cpu operations for SNMP statistics cl
2009-10-06 23:36 ` [this_cpu_xx V5 04/19] Use this_cpu operations for NFS statistics cl
2009-10-06 23:36 ` [this_cpu_xx V5 05/19] use this_cpu ops for network statistics cl
2009-10-06 23:37 ` [this_cpu_xx V5 06/19] this_cpu_ptr: Straight transformations cl
2009-10-06 23:37 ` [this_cpu_xx V5 07/19] this_cpu_ptr: Eliminate get/put_cpu cl
2009-10-06 23:37 ` [this_cpu_xx V5 09/19] Use this_cpu_ptr in crypto subsystem cl
2009-10-06 23:37 ` [this_cpu_xx V5 10/19] Use this_cpu ops for VM statistics cl
2009-10-06 23:37 ` [this_cpu_xx V5 11/19] RCU: Use this_cpu operations cl
2009-10-06 23:37 ` [this_cpu_xx V5 12/19] this_cpu_ops: page allocator conversion cl
2009-10-06 23:37 ` [this_cpu_xx V5 13/19] this_cpu ops: Remove pageset_notifier cl
2009-10-06 23:37 ` [this_cpu_xx V5 14/19] Use this_cpu operations in slub cl
2009-10-06 23:37 ` [this_cpu_xx V5 15/19] SLUB: Get rid of dynamic DMA kmalloc cache allocation cl
2009-10-06 23:37 ` [this_cpu_xx V5 16/19] this_cpu: Remove slub kmem_cache fields cl
2009-10-06 23:37 ` [this_cpu_xx V5 17/19] Make slub statistics use this_cpu_inc cl
2009-10-06 23:37 ` [this_cpu_xx V5 18/19] this_cpu: slub aggressive use of this_cpu operations in the hotpaths cl
2009-10-06 23:37 ` cl [this message]
2009-10-07  2:54   ` [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable Mathieu Desnoyers
2009-10-07  9:11     ` Peter Zijlstra
2009-10-07 12:46       ` Mathieu Desnoyers
2009-10-07 13:01         ` Peter Zijlstra
2009-10-07 13:31           ` Mathieu Desnoyers
2009-10-07 14:37             ` Peter Zijlstra
2009-10-07 14:21           ` Christoph Lameter
2009-10-07 14:42         ` Christoph Lameter
2009-10-07 15:02           ` Mathieu Desnoyers
2009-10-07 15:05             ` Christoph Lameter
2009-10-07 15:19               ` Mathieu Desnoyers
2009-10-07 15:21                 ` Christoph Lameter
2009-10-07 15:41                   ` Mathieu Desnoyers
2009-10-07 16:42                     ` Christoph Lameter
2009-10-07 17:12                       ` Mathieu Desnoyers
2009-10-08  7:52                   ` Peter Zijlstra
2009-10-08 12:44                     ` Mathieu Desnoyers
2009-10-08 12:53                       ` Peter Zijlstra
2009-10-08 16:17                         ` Christoph Lameter
2009-10-08 17:22                         ` Mathieu Desnoyers
2009-10-08 16:11                     ` Christoph Lameter
2009-10-08 17:17                       ` Mathieu Desnoyers
2009-10-08 17:44                         ` Christoph Lameter
2009-10-08 19:17                           ` Mathieu Desnoyers
2009-10-08 19:21                             ` Christoph Lameter
2009-10-08 20:37                               ` Mathieu Desnoyers
2009-10-08 21:08                                 ` Christoph Lameter
2009-10-12 13:56                                   ` Mathieu Desnoyers
2009-10-12 14:52                                     ` Christoph Lameter
2009-10-12 15:26                                       ` Mathieu Desnoyers
2009-10-12 15:23                                         ` Christoph Lameter
2009-10-12 15:38                                           ` Mathieu Desnoyers
2009-10-12 15:38                                             ` Christoph Lameter
2009-10-12 16:05                                               ` Mathieu Desnoyers
2009-10-07 15:25     ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091006233733.153341605@gentwo.org \
    --to=cl@linux-foundation.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=penberg@cs.helsinki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.