All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Lameter <cl@linux.com>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: linux-mm@kvack.org
Subject: [RFC V2 SLEB 12/14] SLEB: Make the size of the shared cache configurable
Date: Fri, 21 May 2010 16:15:04 -0500	[thread overview]
Message-ID: <20100521211544.174575855@quilx.com> (raw)
In-Reply-To: 20100521211452.659982351@quilx.com

[-- Attachment #1: sled_shared_dynamic --]
[-- Type: text/plain, Size: 6393 bytes --]

This makes the size of the shared array configurable. Not that this is a bit
problematic and there are likely unresolved race conditions. The kmem_cache->node[x]
pointers become unstable if interrupts are allowed.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>

---
 include/linux/slub_def.h |    3 +
 mm/slub.c                |  133 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 116 insertions(+), 20 deletions(-)

Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h	2010-05-21 13:17:14.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h	2010-05-21 13:47:41.000000000 -0500
@@ -81,11 +81,14 @@ struct kmem_cache {
 	struct kmem_cache_order_objects oo;
 	int queue;		/* per cpu queue size */
 	int batch;		/* batch size */
+	int shared;		/* Shared queue size */
+#ifndef CONFIG_NUMA
 	/*
 	 * Avoid an extra cache line for UP, SMP and for the node local to
 	 * struct kmem_cache.
 	 */
 	struct kmem_cache_node local_node;
+#endif
 
 	/* Allocation and freeing of slabs */
 	struct kmem_cache_order_objects max;
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2010-05-21 13:17:14.000000000 -0500
+++ linux-2.6/mm/slub.c	2010-05-21 13:48:01.000000000 -0500
@@ -1754,7 +1754,7 @@ redo:
 				int d;
 
 				spin_lock(&n->shared_lock);
-				d = min(min(s->batch, BOOT_QUEUE_SIZE), n->objects);
+				d = min(min(s->batch, s->shared), n->objects);
 				if (d > 0) {
 					memcpy(c->object + c->objects,
 						n->object + n->objects - d,
@@ -1864,6 +1864,7 @@ void *kmem_cache_alloc_node(struct kmem_
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
+
 #endif
 
 #ifdef CONFIG_TRACING
@@ -2176,10 +2177,7 @@ static void free_kmem_cache_nodes(struct
 	int node;
 
 	for_each_node_state(node, N_NORMAL_MEMORY) {
-		struct kmem_cache_node *n = s->node[node];
-
-		if (n && n != &s->local_node)
-			kfree(n);
+		kfree(s->node[node]);
 		s->node[node] = NULL;
 	}
 }
@@ -2197,27 +2195,96 @@ static int init_kmem_cache_nodes(struct 
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n;
 
-		if (local_node == node)
-			n = &s->local_node;
-		else {
-			if (slab_state == DOWN) {
-				early_kmem_cache_node_alloc(gfpflags, node);
-				continue;
-			}
-			n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags,
-				node);
-
-			if (!n) {
-				free_kmem_cache_nodes(s);
-				return 0;
-			}
+		if (slab_state == DOWN) {
+			early_kmem_cache_node_alloc(gfpflags, node);
+			continue;
+		}
+		n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags,
+			node);
 
+		if (!n) {
+			free_kmem_cache_nodes(s);
+			return 0;
 		}
 		s->node[node] = n;
 		init_kmem_cache_node(n, s);
 	}
 	return 1;
 }
+
+static void resize_shared_queue(struct kmem_cache *s, int shared)
+{
+
+	if (is_kmalloc_cache(s)) {
+		if (shared < BOOT_QUEUE_SIZE) {
+			s->shared = shared;
+		} else {
+			/* More than max. Go to max allowed */
+			s->queue = BOOT_QUEUE_SIZE;
+			s->batch = BOOT_BATCH_SIZE;
+		}
+	} else {
+		int node;
+
+		/* Create the new cpu queue and then free the old one */
+		down_write(&slub_lock);
+
+		/* We can only shrink the queue here since the new
+		 * queue size may be smaller and there may be concurrent
+		 * slab operations. The upate of the queue must be seen
+		 * before the change of the location of the percpu queue.
+		 *
+		 * Note that the queue may contain more object than the
+		 * queue size after this operation.
+		 */
+		if (shared < s->shared) {
+			s->shared = shared;
+			barrier();
+		}
+
+
+		/* Serialization has not been worked out yet */
+		for_each_online_node(node) {
+			struct kmem_cache_node *n = get_node(s, node);
+			struct kmem_cache_node *nn =
+				kmalloc_node(sizeof(struct kmem_cache_node),
+					GFP_KERNEL, node);
+
+			init_kmem_cache_node(nn, s);
+			s->node[node] = nn;
+
+			spin_lock(&nn->list_lock);
+			list_move(&n->partial, &nn->partial);
+#ifdef CONFIG_SLUB_DEBUG
+			list_move(&n->full, &nn->full);
+#endif
+			spin_unlock(&nn->list_lock);
+
+			nn->nr_partial = n->nr_partial;
+#ifdef CONFIG_SLUB_DEBUG
+			nn->nr_slabs = n->nr_slabs;
+			nn->total_objects = n->total_objects;
+#endif
+
+			spin_lock(&nn->shared_lock);
+			nn->objects = n->objects;
+			memcpy(&nn->object, n->object, nn->objects * sizeof(void *));
+			spin_unlock(&nn->shared_lock);
+
+			kfree(n);
+		}
+		/*
+		 * If the queue needs to be extended then we deferred
+		 * the update until now when the larger sized queue
+		 * has been allocated and is working.
+		 */
+		if (shared > s->shared)
+			s->shared = shared;
+
+		up_write(&slub_lock);
+	}
+}
+
 #else
 static void free_kmem_cache_nodes(struct kmem_cache *s)
 {
@@ -3989,6 +4056,31 @@ static ssize_t cpu_queue_size_store(stru
 }
 SLAB_ATTR(cpu_queue_size);
 
+#ifdef CONFIG_NUMA
+static ssize_t shared_queue_size_show(struct kmem_cache *s, char *buf)
+{
+	return sprintf(buf, "%u\n", s->shared);
+}
+
+static ssize_t shared_queue_size_store(struct kmem_cache *s,
+			 const char *buf, size_t length)
+{
+	unsigned long queue;
+	int err;
+
+	err = strict_strtoul(buf, 10, &queue);
+	if (err)
+		return err;
+
+	if (queue > 10000 || queue < s->batch)
+		return -EINVAL;
+
+	resize_shared_queue(s, queue);
+	return length;
+}
+SLAB_ATTR(shared_queue_size);
+#endif
+
 static ssize_t cpu_batch_size_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%u\n", s->batch);
@@ -4388,6 +4480,7 @@ static struct attribute *slab_attrs[] = 
 	&cache_dma_attr.attr,
 #endif
 #ifdef CONFIG_NUMA
+	&shared_queue_size_attr.attr,
 	&remote_node_defrag_ratio_attr.attr,
 #endif
 #ifdef CONFIG_SLUB_STATS
@@ -4720,7 +4813,7 @@ static int s_show(struct seq_file *m, vo
 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
 		   nr_objs, s->size, oo_objects(s->oo),
 		   (1 << oo_order(s->oo)));
-	seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, 0);
+	seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, s->shared);
 	seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
 		   0UL);
 	seq_putc(m, '\n');

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-05-21 21:19 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-21 21:14 [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 01/14] slab: Introduce a constant for a unspecified node Christoph Lameter
2010-06-07 21:44   ` David Rientjes
2010-06-07 22:30     ` Christoph Lameter
2010-06-08  5:41       ` Pekka Enberg
2010-06-08  6:20         ` David Rientjes
2010-06-08  6:34           ` Pekka Enberg
2010-06-08 23:35             ` David Rientjes
2010-06-09  5:55               ` Pekka Enberg
2010-06-09  5:55                 ` Pekka Enberg
2010-06-09  6:20                 ` David Rientjes
2010-06-09  6:20                   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 02/14] SLUB: Constants need UL Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 03/14] SLUB: Use kmem_cache flags to detect if Slab is in debugging mode Christoph Lameter
2010-06-08  3:57   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 04/14] SLUB: discard_slab_unlock Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 05/14] SLUB: is_kmalloc_cache Christoph Lameter
2010-06-08  8:54   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 06/14] SLUB: Get rid of the kmalloc_node slab Christoph Lameter
2010-06-09  6:14   ` David Rientjes
2010-06-09 16:14     ` Christoph Lameter
2010-06-09 16:26       ` Pekka Enberg
2010-06-10  6:07         ` Pekka Enberg
2010-05-21 21:14 ` [RFC V2 SLEB 07/14] SLEB: The Enhanced Slab Allocator Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 08/14] SLEB: Resize cpu queue Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 09/14] SLED: Get rid of useless function Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 10/14] SLEB: Remove MAX_OBJS limitation Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 11/14] SLEB: Add per node cache (with a fixed size for now) Christoph Lameter
2010-05-21 21:15 ` Christoph Lameter [this message]
2010-05-21 21:15 ` [RFC V2 SLEB 13/14] SLEB: Enhanced NUMA support Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 14/14] SLEB: Allocate off node objects from remote shared caches Christoph Lameter
2010-05-22  8:37 ` [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Pekka Enberg
2010-05-24  7:03 ` Nick Piggin
2010-05-24 15:06   ` Christoph Lameter
2010-05-25  2:06     ` Nick Piggin
2010-05-25  6:55       ` Pekka Enberg
2010-05-25  7:07         ` Nick Piggin
2010-05-25  8:03           ` Pekka Enberg
2010-05-25  8:03             ` Pekka Enberg
2010-05-25  8:16             ` Nick Piggin
2010-05-25  8:16               ` Nick Piggin
2010-05-25  9:19               ` Pekka Enberg
2010-05-25  9:19                 ` Pekka Enberg
2010-05-25  9:34                 ` Nick Piggin
2010-05-25  9:34                   ` Nick Piggin
2010-05-25  9:53                   ` Pekka Enberg
2010-05-25  9:53                     ` Pekka Enberg
2010-05-25 10:19                     ` Nick Piggin
2010-05-25 10:19                       ` Nick Piggin
2010-05-25 10:45                       ` Pekka Enberg
2010-05-25 10:45                         ` Pekka Enberg
2010-05-25 11:06                         ` Nick Piggin
2010-05-25 11:06                           ` Nick Piggin
2010-05-25 15:13                         ` Linus Torvalds
2010-05-25 15:13                           ` Linus Torvalds
2010-05-25 15:43                           ` Nick Piggin
2010-05-25 15:43                             ` Nick Piggin
2010-05-25 17:02                             ` Pekka Enberg
2010-05-25 17:02                               ` Pekka Enberg
2010-05-25 17:19                               ` Nick Piggin
2010-05-25 17:19                                 ` Nick Piggin
2010-05-25 17:35                                 ` Pekka Enberg
2010-05-25 17:35                                   ` Pekka Enberg
2010-05-25 17:40                                   ` Nick Piggin
2010-05-25 17:40                                     ` Nick Piggin
2010-05-25 10:07               ` David Rientjes
2010-05-25 10:07                 ` David Rientjes
2010-05-25 10:02             ` David Rientjes
2010-05-25 10:02               ` David Rientjes
2010-05-25 10:47               ` Pekka Enberg
2010-05-25 10:47                 ` Pekka Enberg
2010-05-25 19:57                 ` David Rientjes
2010-05-25 19:57                   ` David Rientjes
2010-05-25 14:13       ` Christoph Lameter
2010-05-25 14:34         ` Nick Piggin
2010-05-25 14:43           ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:11             ` Nick Piggin
2010-05-25 15:28               ` Christoph Lameter
2010-05-25 15:37                 ` Nick Piggin
2010-05-27 14:24                   ` Christoph Lameter
2010-05-27 14:37                     ` Nick Piggin
2010-05-27 15:52                       ` Christoph Lameter
2010-05-27 16:07                         ` Nick Piggin
2010-05-27 16:57                           ` Christoph Lameter
2010-05-28  8:39                             ` Nick Piggin
2010-05-25 14:40         ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:12             ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100521211544.174575855@quilx.com \
    --to=cl@linux.com \
    --cc=linux-mm@kvack.org \
    --cc=penberg@cs.helsinki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.