* [patch 0/7] SLUB updates
@ 2007-04-26 5:07 clameter
2007-04-26 5:07 ` [patch 1/7] SLUB: Remove duplicate VM_BUG_ON clameter
` (6 more replies)
0 siblings, 7 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
A series of updates to slub to make error reporting and recovery
more consistent. Rework sysfs behavior, make kmem_cache_shrink
perform fragmentation avoidance and update the slabinfo tool.
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* [patch 1/7] SLUB: Remove duplicate VM_BUG_ON
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
@ 2007-04-26 5:07 ` clameter
2007-04-26 5:07 ` [patch 2/7] SLAB: Fix sysfs directory handling clameter
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: slub_duplicate --]
[-- Type: text/plain, Size: 541 bytes --]
Somehow this artifact got in during merge with mm.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.21-rc7-mm1/mm/slub.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 09:48:40.000000000 -0700
+++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 09:48:47.000000000 -0700
@@ -633,8 +633,6 @@ static void add_full(struct kmem_cache *
VM_BUG_ON(!irqs_disabled());
- VM_BUG_ON(!irqs_disabled());
-
if (!(s->flags & SLAB_STORE_USER))
return;
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* [patch 2/7] SLAB: Fix sysfs directory handling
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
2007-04-26 5:07 ` [patch 1/7] SLUB: Remove duplicate VM_BUG_ON clameter
@ 2007-04-26 5:07 ` clameter
2007-04-26 5:07 ` [patch 3/7] SLUB: debug printk cleanup clameter
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: slub_sysfs_dir_fix --]
[-- Type: text/plain, Size: 4086 bytes --]
This fixes the problem that SLUB does not track the names of aliased
slabs by changing the way that SLUB manages the files in /sys/slab.
If the slab that is being operated on is not mergeable (usually the
case if we are debugging) then do not create any aliases. If an alias
exists that we conflict with then remove it before creating the
directory for the unmergeable slab. If there is a true slab cache there
and not an alias then we fail since there is a true duplication of
slab cache names. So debugging allows the detection of slab name
duplication as usual.
If the slab is mergeable then we create a directory with a unique name
created from the slab size, slab options and the pointer to the kmem_cache
structure (disambiguation). All names referring to the slabs will
then be created as symlinks to that unique name. These symlinks are
not going to be removed on kmem_cache_destroy() since we only carry
a counter for the number of aliases. If a new symlink is created
then it may just replace an existing one. This means that one can create
a gazillion slabs with the same name (if they all refer to mergeable
caches). It will only increase the alias count. So we have the potential
of not detecting duplicate slab names (there is actually no harm
done by doing that....). We will detect the duplications as
as soon as debugging is enabled because we will then no longer
generate symlinks and special unique names.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.21-rc7-mm1/mm/slub.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 19:41:23.000000000 -0700
+++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 19:41:23.000000000 -0700
@@ -3297,16 +3297,68 @@ static struct kset_uevent_ops slab_ueven
decl_subsys(slab, &slab_ktype, &slab_uevent_ops);
+#define ID_STR_LENGTH 64
+
+/* Create a unique string id for a slab cache:
+ * format
+ * :[flags-]size:[memory address of kmemcache]
+ */
+static char *create_unique_id(struct kmem_cache *s)
+{
+ char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
+ char *p = name;
+
+ BUG_ON(!name);
+
+ *p++ = ':';
+ /*
+ * First flags affecting slabcache operations */
+ if (s->flags & SLAB_CACHE_DMA)
+ *p++ = 'd';
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
+ *p++ = 'a';
+ if (s->flags & SLAB_DESTROY_BY_RCU)
+ *p++ = 'r';\
+ /* Debug flags */
+ if (s->flags & SLAB_RED_ZONE)
+ *p++ = 'Z';
+ if (s->flags & SLAB_POISON)
+ *p++ = 'P';
+ if (s->flags & SLAB_STORE_USER)
+ *p++ = 'U';
+ if (p != name + 1)
+ *p++ = '-';
+ p += sprintf(p,"%07d:0x%p" ,s->size, s);
+ BUG_ON(p > name + ID_STR_LENGTH - 1);
+ return name;
+}
+
static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
+ const char *name;
if (slab_state < SYSFS)
/* Defer until later */
return 0;
+ if (s->flags & SLUB_NEVER_MERGE) {
+ /*
+ * Slabcache can never be merged so we can use the name proper.
+ * This is typically the case for debug situations. In that
+ * case we can catch duplicate names easily.
+ */
+ sysfs_remove_link(&slab_subsys.kset.kobj, s->name);
+ name = s->name;
+ } else
+ /*
+ * Create a unique name for the slab as a target
+ * for the symlinks.
+ */
+ name = create_unique_id(s);
+
kobj_set_kset_s(s, slab_subsys);
- kobject_set_name(&s->kobj, s->name);
+ kobject_set_name(&s->kobj, name);
kobject_init(&s->kobj);
err = kobject_add(&s->kobj);
if (err)
@@ -3316,6 +3368,10 @@ static int sysfs_slab_add(struct kmem_ca
if (err)
return err;
kobject_uevent(&s->kobj, KOBJ_ADD);
+ if (!(s->flags & SLUB_NEVER_MERGE)) {
+ sysfs_slab_alias(s, s->name);
+ kfree(name);
+ }
return 0;
}
@@ -3341,9 +3397,14 @@ static int sysfs_slab_alias(struct kmem_
{
struct saved_alias *al;
- if (slab_state == SYSFS)
+ if (slab_state == SYSFS) {
+ /*
+ * If we have a leftover link then remove it.
+ */
+ sysfs_remove_link(&slab_subsys.kset.kobj, name);
return sysfs_create_link(&slab_subsys.kset.kobj,
&s->kobj, name);
+ }
al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
if (!al)
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* [patch 3/7] SLUB: debug printk cleanup
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
2007-04-26 5:07 ` [patch 1/7] SLUB: Remove duplicate VM_BUG_ON clameter
2007-04-26 5:07 ` [patch 2/7] SLAB: Fix sysfs directory handling clameter
@ 2007-04-26 5:07 ` clameter
2007-04-26 5:07 ` [patch 4/7] SLUB: Conform more to SLABs SLAB_HWCACHE_ALIGN behavior clameter
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: slub_at_cleanup --]
[-- Type: text/plain, Size: 7998 bytes --]
Set up a new function slab_err in order to report errors consistently.
Consistently report corrective actions taken by SLUB by a printk starting
with @@@.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.21-rc7-mm1/mm/slub.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 21:20:36.000000000 -0700
+++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 21:22:50.000000000 -0700
@@ -324,8 +324,8 @@ static void object_err(struct kmem_cache
{
u8 *addr = page_address(page);
- printk(KERN_ERR "*** SLUB: %s in %s@0x%p slab 0x%p\n",
- reason, s->name, object, page);
+ printk(KERN_ERR "*** SLUB %s: %s@0x%p slab 0x%p\n",
+ s->name, reason, object, page);
printk(KERN_ERR " offset=%tu flags=0x%04lx inuse=%u freelist=0x%p\n",
object - addr, page->flags, page->inuse, page->freelist);
if (object > addr + 16)
@@ -335,6 +335,19 @@ static void object_err(struct kmem_cache
dump_stack();
}
+static void slab_err(struct kmem_cache *s, struct page *page, char *reason, ...)
+{
+ va_list args;
+ char buf[100];
+
+ va_start(args, reason);
+ vsnprintf(buf, sizeof(buf), reason, args);
+ va_end(args);
+ printk(KERN_ERR "*** SLUB %s: %s in slab @0x%p\n", s->name, buf,
+ page);
+ dump_stack();
+}
+
static void init_object(struct kmem_cache *s, void *object, int active)
{
u8 *p = object;
@@ -412,7 +425,7 @@ static int check_valid_pointer(struct km
static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
void *from, void *to)
{
- printk(KERN_ERR "@@@ SLUB: %s Restoring %s (0x%x) from 0x%p-0x%p\n",
+ printk(KERN_ERR "@@@ SLUB %s: Restoring %s (0x%x) from 0x%p-0x%p\n",
s->name, message, data, from, to - 1);
memset(from, data, to - from);
}
@@ -459,9 +472,7 @@ static int slab_pad_check(struct kmem_ca
return 1;
if (!check_bytes(p + length, POISON_INUSE, remainder)) {
- printk(KERN_ERR "SLUB: %s slab 0x%p: Padding fails check\n",
- s->name, p);
- dump_stack();
+ slab_err(s, page, "Padding check failed");
restore_bytes(s, "slab padding", POISON_INUSE, p + length,
p + length + remainder);
return 0;
@@ -547,30 +558,25 @@ static int check_slab(struct kmem_cache
VM_BUG_ON(!irqs_disabled());
if (!PageSlab(page)) {
- printk(KERN_ERR "SLUB: %s Not a valid slab page @0x%p "
- "flags=%lx mapping=0x%p count=%d \n",
- s->name, page, page->flags, page->mapping,
+ slab_err(s, page, "Not a valid slab page flags=%lx "
+ "mapping=0x%p count=%d", page->flags, page->mapping,
page_count(page));
return 0;
}
if (page->offset * sizeof(void *) != s->offset) {
- printk(KERN_ERR "SLUB: %s Corrupted offset %lu in slab @0x%p"
- " flags=0x%lx mapping=0x%p count=%d\n",
- s->name,
+ slab_err(s, page, "Corrupted offset %lu flags=0x%lx "
+ "mapping=0x%p count=%d",
(unsigned long)(page->offset * sizeof(void *)),
- page,
page->flags,
page->mapping,
page_count(page));
- dump_stack();
return 0;
}
if (page->inuse > s->objects) {
- printk(KERN_ERR "SLUB: %s inuse %u > max %u in slab "
- "page @0x%p flags=%lx mapping=0x%p count=%d\n",
- s->name, page->inuse, s->objects, page, page->flags,
+ slab_err(s, page, "inuse %u > max %u @0x%p flags=%lx "
+ "mapping=0x%p count=%d",
+ s->name, page->inuse, s->objects, page->flags,
page->mapping, page_count(page));
- dump_stack();
return 0;
}
/* Slab_pad_check fixes things up after itself */
@@ -599,12 +605,13 @@ static int on_freelist(struct kmem_cache
set_freepointer(s, object, NULL);
break;
} else {
- printk(KERN_ERR "SLUB: %s slab 0x%p "
- "freepointer 0x%p corrupted.\n",
- s->name, page, fp);
- dump_stack();
+ slab_err(s, page, "Freepointer 0x%p corrupt",
+ fp);
page->freelist = NULL;
page->inuse = s->objects;
+ printk(KERN_ERR "@@@ SLUB %s: Freelist "
+ "cleared. Slab 0x%p\n",
+ s->name, page);
return 0;
}
break;
@@ -615,11 +622,12 @@ static int on_freelist(struct kmem_cache
}
if (page->inuse != s->objects - nr) {
- printk(KERN_ERR "slab %s: page 0x%p wrong object count."
- " counter is %d but counted were %d\n",
- s->name, page, page->inuse,
- s->objects - nr);
+ slab_err(s, page, "Wrong object count. Counter is %d but "
+ "counted were %d", s, page, page->inuse,
+ s->objects - nr);
page->inuse = s->objects - nr;
+ printk(KERN_ERR "@@@ SLUB %s: Object count adjusted. "
+ "Slab @0x%p\n", s->name, page);
}
return search == NULL;
}
@@ -663,10 +671,7 @@ static int alloc_object_checks(struct km
goto bad;
if (object && !on_freelist(s, page, object)) {
- printk(KERN_ERR "SLUB: %s Object 0x%p@0x%p "
- "already allocated.\n",
- s->name, object, page);
- dump_stack();
+ slab_err(s, page, "Object 0x%p already allocated", object);
goto bad;
}
@@ -706,15 +711,12 @@ static int free_object_checks(struct kme
goto fail;
if (!check_valid_pointer(s, page, object)) {
- printk(KERN_ERR "SLUB: %s slab 0x%p invalid "
- "object pointer 0x%p\n",
- s->name, page, object);
+ slab_err(s, page, "Invalid object pointer 0x%p", object);
goto fail;
}
if (on_freelist(s, page, object)) {
- printk(KERN_ERR "SLUB: %s slab 0x%p object "
- "0x%p already free.\n", s->name, page, object);
+ slab_err(s, page, "Object 0x%p already free", object);
goto fail;
}
@@ -723,24 +725,22 @@ static int free_object_checks(struct kme
if (unlikely(s != page->slab)) {
if (!PageSlab(page))
- printk(KERN_ERR "slab_free %s size %d: attempt to"
- "free object(0x%p) outside of slab.\n",
- s->name, s->size, object);
+ slab_err(s, page, "Attempt to free object(0x%p) "
+ "outside of slab", object);
else
- if (!page->slab)
+ if (!page->slab) {
printk(KERN_ERR
- "slab_free : no slab(NULL) for object 0x%p.\n",
+ "SLUB <none>: no slab for object 0x%p.\n",
object);
+ dump_stack();
+ }
else
- printk(KERN_ERR "slab_free %s(%d): object at 0x%p"
- " belongs to slab %s(%d)\n",
- s->name, s->size, object,
- page->slab->name, page->slab->size);
+ slab_err(s, page, "object at 0x%p belongs "
+ "to slab %s", object, page->slab->name);
goto fail;
}
return 1;
fail:
- dump_stack();
printk(KERN_ERR "@@@ SLUB: %s slab 0x%p object at 0x%p not freed.\n",
s->name, page, object);
return 0;
@@ -2478,6 +2478,8 @@ __initcall(cpucache_init);
#endif
#ifdef SLUB_RESILIENCY_TEST
+static unsigned long validate_slab_cache(struct kmem_cache *s);
+
static void resiliency_test(void)
{
u8 *p;
@@ -2592,16 +2597,16 @@ static void validate_slab_slab(struct km
validate_slab(s, page);
slab_unlock(page);
} else
- printk(KERN_INFO "SLUB: %s Skipped busy slab %p\n",
+ printk(KERN_INFO "SLUB %s: Skipped busy slab %p\n",
s->name, page);
if (s->flags & DEBUG_DEFAULT_FLAGS) {
if (!PageError(page))
- printk(KERN_ERR "SLUB: %s PageError not set "
+ printk(KERN_ERR "SLUB %s: PageError not set "
"on slab %p\n", s->name, page);
} else {
if (PageError(page))
- printk(KERN_ERR "SLUB: %s PageError set on "
+ printk(KERN_ERR "SLUB %s: PageError set on "
"slab %p\n", s->name, page);
}
}
@@ -2619,8 +2624,8 @@ static int validate_slab_node(struct kme
count++;
}
if (count != n->nr_partial)
- printk("SLUB: %s %ld partial slabs counted but counter=%ld\n",
- s->name, count, n->nr_partial);
+ printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
+ "counter=%ld\n", s->name, count, n->nr_partial);
if (!(s->flags & SLAB_STORE_USER))
goto out;
@@ -2630,8 +2635,9 @@ static int validate_slab_node(struct kme
count++;
}
if (count != atomic_long_read(&n->nr_slabs))
- printk("SLUB: %s %ld slabs counted but counter=%ld\n",
- s->name, count, atomic_long_read(&n->nr_slabs));
+ printk(KERN_ERR "SLUB: %s %ld slabs counted but "
+ "counter=%ld\n", s->name, count,
+ atomic_long_read(&n->nr_slabs));
out:
spin_unlock_irqrestore(&n->list_lock, flags);
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* [patch 4/7] SLUB: Conform more to SLABs SLAB_HWCACHE_ALIGN behavior
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
` (2 preceding siblings ...)
2007-04-26 5:07 ` [patch 3/7] SLUB: debug printk cleanup clameter
@ 2007-04-26 5:07 ` clameter
2007-04-26 5:07 ` [patch 5/7] SLUB: Add MIN_PARTIAL clameter
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: slub_hwalign --]
[-- Type: text/plain, Size: 2641 bytes --]
Currently SLUB is using a strict L1_CACHE_BYTES alignment if
SLAB_HWCACHE_ALIGN is specified. SLAB does not align to a cacheline if the
object is smaller than half of a cacheline. Small objects are then aligned
by SLAB to a fraction of a cacheline.
Make SLUB just forget about the alignment requirement if the object size
is less than L1_CACHE_BYTES. It seems that fractional alignments are no
good because they grow the object and reduce the object density in a cache
line needlessly causing additional cache line fetches.
If we are already throwing the user suggestion of a cache line alignment
away then lets do the best we can. Maybe SLAB_HWCACHE_ALIGN also needs
to be tossed given its wishy-washy handling but doing so would require
an audit of all kmem_cache_allocs throughout the kernel source.
In any case one needs to explictly specify an alignment during
kmem_cache_create to either slab allocator in order to ensure that the
objects are cacheline aligned.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.21-rc7-mm1/mm/slub.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 21:23:56.000000000 -0700
+++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 21:23:59.000000000 -0700
@@ -1482,9 +1482,19 @@ static int calculate_order(int size)
* various ways of specifying it.
*/
static unsigned long calculate_alignment(unsigned long flags,
- unsigned long align)
+ unsigned long align, unsigned long size)
{
- if (flags & SLAB_HWCACHE_ALIGN)
+ /*
+ * If the user wants hardware cache aligned objects then
+ * follow that suggestion if the object is sufficiently
+ * large.
+ *
+ * The hardware cache alignment cannot override the
+ * specified alignment though. If that is greater
+ * then use it.
+ */
+ if ((flags & SLAB_HWCACHE_ALIGN) &&
+ size > L1_CACHE_BYTES / 2)
return max_t(unsigned long, align, L1_CACHE_BYTES);
if (align < ARCH_SLAB_MINALIGN)
@@ -1673,7 +1683,7 @@ static int calculate_sizes(struct kmem_c
* user specified (this is unecessarily complex due to the attempt
* to be compatible with SLAB. Should be cleaned up some day).
*/
- align = calculate_alignment(flags, align);
+ align = calculate_alignment(flags, align, s->objsize);
/*
* SLUB stores one object immediately after another beginning from
@@ -2250,7 +2260,7 @@ static struct kmem_cache *find_mergeable
return NULL;
size = ALIGN(size, sizeof(void *));
- align = calculate_alignment(flags, align);
+ align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
list_for_each(h, &slab_caches) {
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* [patch 5/7] SLUB: Add MIN_PARTIAL
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
` (3 preceding siblings ...)
2007-04-26 5:07 ` [patch 4/7] SLUB: Conform more to SLABs SLAB_HWCACHE_ALIGN behavior clameter
@ 2007-04-26 5:07 ` clameter
2007-04-26 5:07 ` [patch 6/7] SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink clameter
2007-04-26 5:07 ` [patch 7/7] SLUB: Major slabinfo update clameter
6 siblings, 0 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: slab_partial --]
[-- Type: text/plain, Size: 4006 bytes --]
We leave a mininum of partial slabs on nodes when we search for
partial slabs on other node. Define a constant for that value.
Then modify slub to keep MIN_PARTIAL slabs around.
This avoids bad situations where a function frees the last object
in a slab (which results in the page being returned to the page
allocator) only to then allocate one again (which requires getting
a page back from the page allocator if the partial list was empty).
Keeping a couple of slabs on the partial list reduces overhead.
Empty slabs are added to the end of the partial list to ensure that
partially allocated slabs are consumed first (defragmentation).
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.21-rc7-mm1/mm/slub.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 21:23:59.000000000 -0700
+++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 21:25:48.000000000 -0700
@@ -109,6 +109,9 @@
/* Enable to test recovery from slab corruption on boot */
#undef SLUB_RESILIENCY_TEST
+/* Mininum number of partial slabs */
+#define MIN_PARTIAL 2
+
#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
SLAB_POISON | SLAB_STORE_USER)
/*
@@ -635,16 +638,8 @@ static int on_freelist(struct kmem_cache
/*
* Tracking of fully allocated slabs for debugging
*/
-static void add_full(struct kmem_cache *s, struct page *page)
+static void add_full(struct kmem_cache_node *n, struct page *page)
{
- struct kmem_cache_node *n;
-
- VM_BUG_ON(!irqs_disabled());
-
- if (!(s->flags & SLAB_STORE_USER))
- return;
-
- n = get_node(s, page_to_nid(page));
spin_lock(&n->list_lock);
list_add(&page->lru, &n->full);
spin_unlock(&n->list_lock);
@@ -923,10 +918,16 @@ static __always_inline int slab_trylock(
/*
* Management of partially allocated slabs
*/
-static void add_partial(struct kmem_cache *s, struct page *page)
+static void add_partial_tail(struct kmem_cache_node *n, struct page *page)
{
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ spin_lock(&n->list_lock);
+ n->nr_partial++;
+ list_add_tail(&page->lru, &n->partial);
+ spin_unlock(&n->list_lock);
+}
+static void add_partial(struct kmem_cache_node *n, struct page *page)
+{
spin_lock(&n->list_lock);
n->nr_partial++;
list_add(&page->lru, &n->partial);
@@ -1026,7 +1027,7 @@ static struct page *get_any_partial(stru
n = get_node(s, zone_to_nid(*z));
if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
- n->nr_partial > 2) {
+ n->nr_partial > MIN_PARTIAL) {
page = get_partial_node(n);
if (page)
return page;
@@ -1060,15 +1061,31 @@ static struct page *get_partial(struct k
*/
static void putback_slab(struct kmem_cache *s, struct page *page)
{
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+
if (page->inuse) {
+
if (page->freelist)
- add_partial(s, page);
- else if (PageError(page))
- add_full(s, page);
+ add_partial(n, page);
+ else if (PageError(page) && (s->flags & SLAB_STORE_USER))
+ add_full(n, page);
slab_unlock(page);
+
} else {
- slab_unlock(page);
- discard_slab(s, page);
+ if (n->nr_partial < MIN_PARTIAL) {
+ /*
+ * Adding an empty page to the partial slabs in order
+ * to avoid page allocator overhead. This page needs to
+ * come after all the others that are not fully empty
+ * in order to make sure that we do maximum
+ * defragmentation.
+ */
+ add_partial_tail(n, page);
+ slab_unlock(page);
+ } else {
+ slab_unlock(page);
+ discard_slab(s, page);
+ }
}
}
@@ -1325,7 +1342,7 @@ checks_ok:
* then add it.
*/
if (unlikely(!prior))
- add_partial(s, page);
+ add_partial(get_node(s, page_to_nid(page)), page);
out_unlock:
slab_unlock(page);
@@ -1541,7 +1558,7 @@ static struct kmem_cache_node * __init e
kmalloc_caches->node[node] = n;
init_kmem_cache_node(n);
atomic_long_inc(&n->nr_slabs);
- add_partial(kmalloc_caches, page);
+ add_partial(n, page);
return n;
}
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* [patch 6/7] SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
` (4 preceding siblings ...)
2007-04-26 5:07 ` [patch 5/7] SLUB: Add MIN_PARTIAL clameter
@ 2007-04-26 5:07 ` clameter
2007-04-26 5:07 ` [patch 7/7] SLUB: Major slabinfo update clameter
6 siblings, 0 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: slab_shrink_cache --]
[-- Type: text/plain, Size: 5237 bytes --]
At kmem_cache_shrink check if we have any empty slabs on the partial
if so then remove them.
Also--as an anti-fragmentation measure--sort the partial slabs so that
the most fully allocated ones come first and the least allocated last.
The next allocations may fill up the nearly full slabs. Having the
least allocated slabs last gives them the maximum chance that their
remaining objects may be freed. Thus we can hopefully minimize the
partial slabs.
I think this is the best one can do in terms antifragmentation
measures. Real defragmentation (meaning moving objects out of slabs with
the least free objects to those that are almost full) can be implemted
by reverse scanning through the list produced here but that would mean
that we need to provide a callback at slab cache creation that allows
the deletion or moving of an object. This will involve slab API
changes so defer for now.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 104 insertions(+), 14 deletions(-)
Index: linux-2.6.21-rc7-mm1/mm/slub.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 21:25:48.000000000 -0700
+++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 21:27:07.000000000 -0700
@@ -109,9 +109,19 @@
/* Enable to test recovery from slab corruption on boot */
#undef SLUB_RESILIENCY_TEST
-/* Mininum number of partial slabs */
+/*
+ * Mininum number of partial slabs. These will be left on the partial
+ * lists even if they are empty. kmem_cache_shrink may reclaim them.
+ */
#define MIN_PARTIAL 2
+/*
+ * Maximum number of desirable partial slabs.
+ * The existence of more partial slabs makes kmem_cache_shrink
+ * sort the partial list by the number of objects in the.
+ */
+#define MAX_PARTIAL 10
+
#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
SLAB_POISON | SLAB_STORE_USER)
/*
@@ -2163,6 +2173,78 @@ void kfree(const void *x)
}
EXPORT_SYMBOL(kfree);
+/*
+ * kmem_cache_shrink removes empty slabs from the partial lists
+ * and then sorts the partially allocated slabs by the number
+ * of items in use. The slabs with the most items in use
+ * come first. New allocations will remove these from the
+ * partial list because they are full. The slabs with the
+ * least items are placed last. If it happens that the objects
+ * are freed then the page can be returned to the page allocator.
+ */
+int kmem_cache_shrink(struct kmem_cache *s)
+{
+ int node;
+ int i;
+ struct kmem_cache_node *n;
+ struct page *page;
+ struct page *t;
+ struct list_head *slabs_by_inuse =
+ kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
+ unsigned long flags;
+
+ if (!slabs_by_inuse)
+ return -ENOMEM;
+
+ flush_all(s);
+ for_each_online_node(node) {
+ n = get_node(s, node);
+
+ if (n->nr_partial <= MIN_PARTIAL)
+ continue;
+
+ for (i = 0; i < s->objects; i++)
+ INIT_LIST_HEAD(slabs_by_inuse + i);
+
+ spin_lock_irqsave(&n->list_lock, flags);
+
+ /*
+ * Build lists indexed by the items in use in
+ * each slab or free slabs if empty.
+ *
+ * Note that concurrent frees may occur while
+ * we hold the list_lock. page->inuse here is
+ * the upper limit.
+ */
+ list_for_each_entry_safe(page, t, &n->partial, lru) {
+ if (!page->inuse) {
+ list_del(&page->lru);
+ discard_slab(s, page);
+ } else
+ if (n->nr_partial > MAX_PARTIAL)
+ list_move(&page->lru,
+ slabs_by_inuse + page->inuse);
+ }
+
+ if (n->nr_partial <= MAX_PARTIAL)
+ goto out;
+
+ /*
+ * Rebuild the partial list with the slabs filled up
+ * most first and the least used slabs at the end.
+ */
+ for (i = s->objects - 1; i > 0; i--)
+ list_splice(slabs_by_inuse + i, n->partial.prev);
+
+ out:
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ }
+
+ kfree(slabs_by_inuse);
+ return 0;
+}
+EXPORT_SYMBOL(kmem_cache_shrink);
+
/**
* krealloc - reallocate memory. The contents will remain unchanged.
*
@@ -2408,17 +2490,6 @@ static struct notifier_block __cpuinitda
#endif
-/***************************************************************
- * Compatiblility definitions
- **************************************************************/
-
-int kmem_cache_shrink(struct kmem_cache *s)
-{
- flush_all(s);
- return 0;
-}
-EXPORT_SYMBOL(kmem_cache_shrink);
-
#ifdef CONFIG_NUMA
/*****************************************************************
@@ -3194,6 +3265,25 @@ static ssize_t validate_store(struct kme
}
SLAB_ATTR(validate);
+static ssize_t shrink_show(struct kmem_cache *s, char *buf)
+{
+ return 0;
+}
+
+static ssize_t shrink_store(struct kmem_cache *s,
+ const char *buf, size_t length)
+{
+ if (buf[0] == '1') {
+ int rc = kmem_cache_shrink(s);
+
+ if (rc)
+ return rc;
+ } else
+ return -EINVAL;
+ return length;
+}
+SLAB_ATTR(shrink);
+
static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
{
if (!(s->flags & SLAB_STORE_USER))
@@ -3250,6 +3340,7 @@ static struct attribute * slab_attrs[] =
&poison_attr.attr,
&store_user_attr.attr,
&validate_attr.attr,
+ &shrink_attr.attr,
&alloc_calls_attr.attr,
&free_calls_attr.attr,
#ifdef CONFIG_ZONE_DMA
--
^ permalink raw reply [flat|nested] 8+ messages in thread
* [patch 7/7] SLUB: Major slabinfo update
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
` (5 preceding siblings ...)
2007-04-26 5:07 ` [patch 6/7] SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink clameter
@ 2007-04-26 5:07 ` clameter
6 siblings, 0 replies; 8+ messages in thread
From: clameter @ 2007-04-26 5:07 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: slub_slabinfo_update --]
[-- Type: text/plain, Size: 23258 bytes --]
Enhancement to slabinfo
- Support for slab shrinking (-r option)
- Slab summary showing system totals
- Sync with new form of alias handling
- Sort by size, reverse sorting etc
- Alias lookups
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.21-rc7-mm1/Documentation/vm/slabinfo.c
===================================================================
--- linux-2.6.21-rc7-mm1.orig/Documentation/vm/slabinfo.c 2007-04-25 21:20:24.000000000 -0700
+++ linux-2.6.21-rc7-mm1/Documentation/vm/slabinfo.c 2007-04-25 21:46:40.000000000 -0700
@@ -3,7 +3,7 @@
*
* (C) 2007 sgi, Christoph Lameter <clameter@sgi.com>
*
- * Compile by doing:
+ * Compile by:
*
* gcc -o slabinfo slabinfo.c
*/
@@ -17,15 +17,47 @@
#include <getopt.h>
#include <regex.h>
+#define MAX_SLABS 500
+#define MAX_ALIASES 500
+#define MAX_NODES 1024
+
+struct slabinfo {
+ char *name;
+ int alias;
+ int refs;
+ int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
+ int hwcache_align, object_size, objs_per_slab;
+ int sanity_checks, slab_size, store_user, trace;
+ int order, poison, reclaim_account, red_zone;
+ unsigned long partial, objects, slabs;
+ int numa[MAX_NODES];
+ int numa_partial[MAX_NODES];
+} slabinfo[MAX_SLABS];
+
+struct aliasinfo {
+ char *name;
+ char *ref;
+ struct slabinfo *slab;
+} aliasinfo[MAX_ALIASES];
+
+int slabs = 0;
+int aliases = 0;
+int highest_node = 0;
+
char buffer[4096];
int show_alias = 0;
int show_slab = 0;
-int show_parameters = 0;
int skip_zero = 1;
int show_numa = 0;
int show_track = 0;
+int show_first_alias = 0;
int validate = 0;
+int shrink = 0;
+int show_inverted = 0;
+int show_single_ref = 0;
+int show_totals = 0;
+int sort_size = 0;
int page_size;
@@ -47,11 +79,16 @@ void usage(void)
"-a|--aliases Show aliases\n"
"-h|--help Show usage information\n"
"-n|--numa Show NUMA information\n"
- "-p|--parameters Show global parameters\n"
+ "-r|--reduce Shrink slabs\n"
"-v|--validate Validate slabs\n"
"-t|--tracking Show alloc/free information\n"
+ "-T|--Totals Show summary information\n"
"-s|--slabs Show slabs\n"
+ "-S|--Size Sort by size\n"
"-z|--zero Include empty slabs\n"
+ "-f|--first-alias Show first alias\n"
+ "-i|--inverted Inverted list\n"
+ "-1|--1ref Single reference\n"
);
}
@@ -86,23 +123,32 @@ unsigned long get_obj(char *name)
unsigned long get_obj_and_str(char *name, char **x)
{
unsigned long result = 0;
+ char *p;
+
+ *x = NULL;
if (!read_obj(name)) {
x = NULL;
return 0;
}
- result = strtoul(buffer, x, 10);
- while (**x == ' ')
- (*x)++;
+ result = strtoul(buffer, &p, 10);
+ while (*p == ' ')
+ p++;
+ if (*p)
+ *x = strdup(p);
return result;
}
-void set_obj(char *name, int n)
+void set_obj(struct slabinfo *s, char *name, int n)
{
- FILE *f = fopen(name, "w");
+ char x[100];
+
+ sprintf(x, "%s/%s", s->name, name);
+
+ FILE *f = fopen(x, "w");
if (!f)
- fatal("Cannot write to %s\n", name);
+ fatal("Cannot write to %s\n", x);
fprintf(f, "%d\n", n);
fclose(f);
@@ -143,167 +189,613 @@ int store_size(char *buffer, unsigned lo
return n;
}
-void alias(const char *name)
+void decode_numa_list(int *numa, char *t)
{
- int count;
- char *p;
-
- if (!show_alias)
- return;
+ int node;
+ int nr;
- count = readlink(name, buffer, sizeof(buffer));
+ memset(numa, 0, MAX_NODES * sizeof(int));
- if (count < 0)
- return;
+ while (*t == 'N') {
+ t++;
+ node = strtoul(t, &t, 10);
+ if (*t == '=') {
+ t++;
+ nr = strtoul(t, &t, 10);
+ numa[node] = nr;
+ if (node > highest_node)
+ highest_node = node;
+ }
+ while (*t == ' ')
+ t++;
+ }
+}
- buffer[count] = 0;
+char *hackname(struct slabinfo *s)
+{
+ char *n = s->name;
- p = buffer + count;
+ if (n[0] == ':') {
+ char *nn = malloc(20);
+ char *p;
+
+ strncpy(nn, n, 20);
+ n = nn;
+ p = n + 4;
+ while (*p && *p !=':')
+ p++;
+ *p = 0;
+ }
+ return n;
+}
- while (p > buffer && p[-1] != '/')
- p--;
- printf("%-20s -> %s\n", name, p);
+void slab_validate(struct slabinfo *s)
+{
+ set_obj(s, "validate", 1);
}
-void slab_validate(char *name)
+void slab_shrink(struct slabinfo *s)
{
- set_obj("validate", 1);
+ set_obj(s, "shrink", 1);
}
int line = 0;
void first_line(void)
{
- printf("Name Objects Objsize Space "
- "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
+ printf("Name Objects Objsize Space "
+ "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
+}
+
+/*
+ * Find the shortest alias of a slab
+ */
+struct aliasinfo *find_one_alias(struct slabinfo *find)
+{
+ struct aliasinfo *a;
+ struct aliasinfo *best = NULL;
+
+ for(a = aliasinfo;a < aliasinfo + aliases; a++) {
+ if (a->slab == find &&
+ (!best || strlen(best->name) < strlen(a->name))) {
+ best = a;
+ if (strncmp(a->name,"kmall", 5) == 0)
+ return best;
+ }
+ }
+ if (best)
+ return best;
+ fatal("Cannot find alias for %s\n", find->name);
+ return NULL;
}
-void slab(const char *name)
+unsigned long slab_size(struct slabinfo *s)
+{
+ return s->slabs * (page_size << s->order);
+}
+
+
+void slabcache(struct slabinfo *s)
{
- unsigned long aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
- unsigned long hwcache_align, object_size, objects, objs_per_slab;
- unsigned long order, partial, poison, reclaim_account, red_zone;
- unsigned long sanity_checks, slab_size, slabs, store_user, trace;
char size_str[20];
char dist_str[40];
char flags[20];
char *p = flags;
+ char *n;
- if (!show_slab)
+ if (skip_zero && !s->slabs)
return;
- aliases = get_obj("aliases");
- align = get_obj("align");
- cache_dma = get_obj("cache_dma");
- cpu_slabs = get_obj("cpu_slabs");
- destroy_by_rcu = get_obj("destroy_by_rcu");
- hwcache_align = get_obj("hwcache_align");
- object_size = get_obj("object_size");
- objects = get_obj("objects");
- objs_per_slab = get_obj("objs_per_slab");
- order = get_obj("order");
- partial = get_obj("partial");
- poison = get_obj("poison");
- reclaim_account = get_obj("reclaim_account");
- red_zone = get_obj("red_zone");
- sanity_checks = get_obj("sanity_checks");
- slab_size = get_obj("slab_size");
- slabs = get_obj("slabs");
- store_user = get_obj("store_user");
- trace = get_obj("trace");
-
- if (skip_zero && !slabs)
- return;
-
- store_size(size_str, slabs * page_size);
- sprintf(dist_str,"%lu/%lu/%lu", slabs, partial, cpu_slabs);
+ store_size(size_str, slab_size(s));
+ sprintf(dist_str,"%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs);
if (!line++)
first_line();
- if (aliases)
+ if (s->aliases)
*p++ = '*';
- if (cache_dma)
+ if (s->cache_dma)
*p++ = 'd';
- if (hwcache_align)
+ if (s->hwcache_align)
*p++ = 'A';
- if (poison)
+ if (s->poison)
*p++ = 'P';
- if (reclaim_account)
+ if (s->reclaim_account)
*p++ = 'a';
- if (red_zone)
+ if (s->red_zone)
*p++ = 'Z';
- if (sanity_checks)
+ if (s->sanity_checks)
*p++ = 'F';
- if (store_user)
+ if (s->store_user)
*p++ = 'U';
- if (trace)
+ if (s->trace)
*p++ = 'T';
*p = 0;
- printf("%-20s %8ld %7ld %8s %14s %3ld %1ld %3ld %3ld %s\n",
- name, objects, object_size, size_str, dist_str,
- objs_per_slab, order,
- slabs ? (partial * 100) / slabs : 100,
- slabs ? (objects * object_size * 100) /
- (slabs * (page_size << order)) : 100,
+ n = hackname(s);
+ printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
+ n, s->objects, s->object_size, size_str, dist_str,
+ s->objs_per_slab, s->order,
+ s->slabs ? (s->partial * 100) / s->slabs : 100,
+ s->slabs ? (s->objects * s->object_size * 100) /
+ (s->slabs * (page_size << s->order)) : 100,
flags);
}
-void slab_numa(const char *name)
+void slab_numa(struct slabinfo *s)
{
- unsigned long slabs;
- char *numainfo;
-
- slabs = get_obj_and_str("slabs", &numainfo);
+ char *n;
+ int node;
- if (skip_zero && !slabs)
+ if (skip_zero && !s->slabs)
return;
+ n = hackname(s);
- printf("%-20s %s", name, numainfo);
-}
+ if (!line) {
+ printf("\nSlab Node ");
+ for(node = 0; node <= highest_node; node++)
+ printf(" %4d", node);
+ printf("\n----------------------");
+ for(node = 0; node <= highest_node; node++)
+ printf("-----");
+ printf("\n");
+ }
+ printf("%-21s ", n);
+ for(node = 0; node <= highest_node; node++) {
+ char b[20];
-void parameter(const char *name)
-{
- if (!show_parameters)
- return;
+ store_size(b, s->numa[node]);
+ printf(" %4s", b);
+ }
+ printf("\n");
+ line++;
}
-void show_tracking(const char *name)
+void show_tracking(struct slabinfo *s)
{
- printf("\n%s: Calls to allocate a slab object\n", name);
+ printf("\n%s: Calls to allocate a slab object\n", s->name);
printf("---------------------------------------------------\n");
if (read_obj("alloc_calls"))
printf(buffer);
- printf("%s: Calls to free a slab object\n", name);
+ printf("%s: Calls to free a slab object\n", s->name);
printf("-----------------------------------------------\n");
if (read_obj("free_calls"))
printf(buffer);
}
+void totals(void)
+{
+ struct slabinfo *s;
+
+ int used_slabs = 0;
+ char b1[20], b2[20], b3[20], b4[20];
+ unsigned long long min_objsize = 0, max_objsize = 0, avg_objsize;
+ unsigned long long min_partial = 0, max_partial = 0, avg_partial, total_partial = 0;
+ unsigned long long min_slabs = 0, max_slabs = 0, avg_slabs, total_slabs = 0;
+ unsigned long long min_size = 0, max_size = 0, avg_size, total_size = 0;
+ unsigned long long min_waste = 0, max_waste = 0, avg_waste, total_waste = 0;
+ unsigned long long min_objects = 0, max_objects = 0, avg_objects, total_objects = 0;
+ unsigned long long min_objwaste = 0, max_objwaste = 0, avg_objwaste;
+ unsigned long long min_used = 0, max_used = 0, avg_used, total_used = 0;
+ unsigned long min_ppart = 0, max_ppart = 0, avg_ppart, total_ppart = 0;
+ unsigned long min_partobj = 0, max_partobj = 0, avg_partobj;
+ unsigned long total_objects_in_partial = 0;
+
+ for (s = slabinfo; s < slabinfo + slabs; s++) {
+ unsigned long long size;
+ unsigned long partial;
+ unsigned long slabs;
+ unsigned long used;
+ unsigned long long wasted;
+ unsigned long long objwaste;
+ long long objects_in_partial;
+ unsigned long percentage_partial;
+
+ if (!s->slabs || !s->objects)
+ continue;
+
+ used_slabs++;
+
+ size = slab_size(s);
+ partial = s->partial << s->order;
+ slabs = s->slabs << s->order;
+ used = s->objects * s->object_size;
+ wasted = size - used;
+ objwaste = wasted / s->objects;
+
+ objects_in_partial = s->objects - (s->slabs - s->partial - s ->cpu_slabs)
+ * s->objs_per_slab;
+
+ if (objects_in_partial < 0)
+ objects_in_partial = 0;
+
+ percentage_partial = objects_in_partial * 100 / s->objects;
+ if (percentage_partial > 100)
+ percentage_partial = 100;
+
+ if (s->object_size < min_objsize || !min_objsize)
+ min_objsize = s->object_size;
+ if (partial && (partial < min_partial || !min_partial))
+ min_partial = partial;
+ if (slabs < min_slabs || !min_partial)
+ min_slabs = slabs;
+ if (size < min_size)
+ min_size = size;
+ if (wasted < min_waste && !min_waste)
+ min_waste = wasted;
+ if (objwaste < min_objwaste || !min_objwaste)
+ min_objwaste = objwaste;
+ if (s->objects < min_objects || !min_objects)
+ min_objects = s->objects;
+ if (used < min_used || !min_used)
+ min_used = used;
+ if (objects_in_partial < min_partobj || !min_partobj)
+ min_partobj = objects_in_partial;
+ if (percentage_partial < min_ppart || !min_ppart)
+ min_ppart = percentage_partial;
+
+ if (s->object_size > max_objsize)
+ max_objsize = s->object_size;
+ if (partial > max_partial)
+ max_partial = partial;
+ if (slabs > max_slabs)
+ max_slabs = slabs;
+ if (size > max_size)
+ max_size = size;
+ if (wasted > max_waste)
+ max_waste = wasted;
+ if (objwaste > max_objwaste)
+ max_objwaste = objwaste;
+ if (s->objects > max_objects)
+ max_objects = s->objects;
+ if (used > max_used)
+ max_used = used;
+ if (objects_in_partial > max_partobj)
+ max_partobj = objects_in_partial;
+ if (percentage_partial > max_ppart)
+ max_ppart = percentage_partial;
+
+ total_objects += s->objects;
+ total_partial += partial;
+ total_slabs += slabs;
+ total_used += used;
+ total_waste += wasted;
+ total_size += size;
+ total_ppart += percentage_partial;
+ total_objects_in_partial += objects_in_partial;
+ }
+
+ if (!total_objects) {
+ printf("No objects\n");
+ return;
+ }
+ if (!used_slabs) {
+ printf("No slabs\n");
+ return;
+ }
+ avg_partial = total_partial / used_slabs;
+ avg_slabs = total_slabs / used_slabs;
+ avg_waste = total_waste / used_slabs;
+ avg_size = total_waste / used_slabs;
+ avg_objects = total_objects / used_slabs;
+ avg_used = total_used / used_slabs;
+ avg_ppart = total_ppart / used_slabs;
+ avg_partobj = total_objects_in_partial / used_slabs;
+
+ avg_objsize = total_used / total_objects;
+ avg_objwaste = total_waste / total_objects;
+
+ printf("Slabcache Totals\n");
+ printf("----------------\n");
+ printf("Slabcaches : %3d Aliases : %3d Active: %3d\n",
+ slabs, aliases, used_slabs);
+
+ store_size(b1, total_used);store_size(b2, total_waste);
+ store_size(b3, total_waste * 100 / total_used);
+ printf("Memory used: %5s # Loss : %5s MRatio: %3s%%\n", b1, b2, b3);
+
+ store_size(b1, total_objects);store_size(b2, total_objects_in_partial);
+ store_size(b3, total_objects_in_partial * 100 / total_objects);
+ printf("# Objects : %5s # PartObj: %5s ORatio: %3s%%\n", b1, b2, b3);
+
+ printf("\n");
+ printf("Per Cache Average Min Max Total\n");
+ printf("---------------------------------------------------------\n");
+
+ store_size(b1, avg_objects);store_size(b2, min_objects);
+ store_size(b3, max_objects);store_size(b4, total_objects);
+ printf("# Objects %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_slabs);store_size(b2, min_slabs);
+ store_size(b3, max_slabs);store_size(b4, total_slabs);
+ printf("# Slabs %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_partial);store_size(b2, min_partial);
+ store_size(b3, max_partial);store_size(b4, total_partial);
+ printf("# Partial %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+ store_size(b1, avg_ppart);store_size(b2, min_ppart);
+ store_size(b3, max_ppart);
+ printf("Partial %10s%% %10s%% %10s%%\n",
+ b1, b2, b3);
+
+ store_size(b1, avg_size);store_size(b2, min_size);
+ store_size(b3, max_size);store_size(b4, total_size);
+ printf("Memory %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_used);store_size(b2, min_used);
+ store_size(b3, max_used);store_size(b4, total_used);
+ printf("Used %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_slabs);store_size(b2, min_slabs);
+ store_size(b3, max_slabs);store_size(b4, total_slabs);
+ printf("Waste %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ printf("\n");
+ printf("Per Object Average Min Max\n");
+ printf("---------------------------------------------\n");
+
+ store_size(b1, avg_objsize);store_size(b2, min_objsize);
+ store_size(b3, max_objsize);
+ printf("Size %10s %10s %10s\n",
+ b1, b2, b3);
+
+ store_size(b1, avg_objwaste);store_size(b2, min_objwaste);
+ store_size(b3, max_objwaste);
+ printf("Loss %10s %10s %10s\n",
+ b1, b2, b3);
+}
+
+void sort_slabs(void)
+{
+ struct slabinfo *s1,*s2;
+
+ for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) {
+ for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
+ int result;
+
+ if (sort_size)
+ result = slab_size(s1) < slab_size(s2);
+ else
+ result = strcasecmp(s1->name, s2->name);
+
+ if (show_inverted)
+ result = -result;
+
+ if (result > 0) {
+ struct slabinfo t;
+
+ memcpy(&t, s1, sizeof(struct slabinfo));
+ memcpy(s1, s2, sizeof(struct slabinfo));
+ memcpy(s2, &t, sizeof(struct slabinfo));
+ }
+ }
+ }
+}
+
+void sort_aliases(void)
+{
+ struct aliasinfo *a1,*a2;
+
+ for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) {
+ for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) {
+ char *n1, *n2;
+
+ n1 = a1->name;
+ n2 = a2->name;
+ if (show_alias && !show_inverted) {
+ n1 = a1->ref;
+ n2 = a2->ref;
+ }
+ if (strcasecmp(n1, n2) > 0) {
+ struct aliasinfo t;
+
+ memcpy(&t, a1, sizeof(struct aliasinfo));
+ memcpy(a1, a2, sizeof(struct aliasinfo));
+ memcpy(a2, &t, sizeof(struct aliasinfo));
+ }
+ }
+ }
+}
+
+void link_slabs(void)
+{
+ struct aliasinfo *a;
+ struct slabinfo *s;
+
+ for (a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+ for(s = slabinfo; s < slabinfo + slabs; s++)
+ if (strcmp(a->ref, s->name) == 0) {
+ a->slab = s;
+ s->refs++;
+ break;
+ }
+ if (s == slabinfo + slabs)
+ fatal("Unresolved alias %s\n", a->ref);
+ }
+}
+
+void alias(void)
+{
+ struct aliasinfo *a;
+ char *active = NULL;
+
+ sort_aliases();
+ link_slabs();
+
+ for(a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+ if (!show_single_ref && a->slab->refs == 1)
+ continue;
+
+ if (!show_inverted) {
+ if (active) {
+ if (strcmp(a->slab->name, active) == 0) {
+ printf(" %s", a->name);
+ continue;
+ }
+ }
+ printf("\n%-20s <- %s", a->slab->name, a->name);
+ active = a->slab->name;
+ }
+ else
+ printf("%-20s -> %s\n", a->name, a->slab->name);
+ }
+ if (active)
+ printf("\n");
+}
+
+
+void rename_slabs(void)
+{
+ struct slabinfo *s;
+ struct aliasinfo *a;
+
+ for (s = slabinfo; s < slabinfo + slabs; s++) {
+ if (*s->name != ':')
+ continue;
+
+ if (s->refs > 1 && !show_first_alias)
+ continue;
+
+ a = find_one_alias(s);
+
+ s->name = a->name;
+ }
+}
+
int slab_mismatch(char *slab)
{
return regexec(&pattern, slab, 0, NULL, 0);
}
+void read_slab_dir(void)
+{
+ DIR *dir;
+ struct dirent *de;
+ struct slabinfo *slab = slabinfo;
+ struct aliasinfo *alias = aliasinfo;
+ char *p;
+ char *t;
+ int count;
+
+ dir = opendir(".");
+ while ((de = readdir(dir))) {
+ if (de->d_name[0] == '.' ||
+ slab_mismatch(de->d_name))
+ continue;
+ switch (de->d_type) {
+ case DT_LNK:
+ alias->name = strdup(de->d_name);
+ count = readlink(de->d_name, buffer, sizeof(buffer));
+
+ if (count < 0)
+ fatal("Cannot read symlink %s\n", de->d_name);
+
+ buffer[count] = 0;
+ p = buffer + count;
+ while (p > buffer && p[-1] != '/')
+ p--;
+ alias->ref = strdup(p);
+ alias++;
+ break;
+ case DT_DIR:
+ if (chdir(de->d_name))
+ fatal("Unable to access slab %s\n", slab->name);
+ slab->name = strdup(de->d_name);
+ slab->alias = 0;
+ slab->refs = 0;
+ slab->aliases = get_obj("aliases");
+ slab->align = get_obj("align");
+ slab->cache_dma = get_obj("cache_dma");
+ slab->cpu_slabs = get_obj("cpu_slabs");
+ slab->destroy_by_rcu = get_obj("destroy_by_rcu");
+ slab->hwcache_align = get_obj("hwcache_align");
+ slab->object_size = get_obj("object_size");
+ slab->objects = get_obj("objects");
+ slab->objs_per_slab = get_obj("objs_per_slab");
+ slab->order = get_obj("order");
+ slab->partial = get_obj("partial");
+ slab->partial = get_obj_and_str("partial", &t);
+ decode_numa_list(slab->numa_partial, t);
+ slab->poison = get_obj("poison");
+ slab->reclaim_account = get_obj("reclaim_account");
+ slab->red_zone = get_obj("red_zone");
+ slab->sanity_checks = get_obj("sanity_checks");
+ slab->slab_size = get_obj("slab_size");
+ slab->slabs = get_obj_and_str("slabs", &t);
+ decode_numa_list(slab->numa, t);
+ slab->store_user = get_obj("store_user");
+ slab->trace = get_obj("trace");
+ chdir("..");
+ slab++;
+ break;
+ default :
+ fatal("Unknown file type %lx\n", de->d_type);
+ }
+ }
+ closedir(dir);
+ slabs = slab - slabinfo;
+ aliases = alias - aliasinfo;
+ if (slabs > MAX_SLABS)
+ fatal("Too many slabs\n");
+ if (aliases > MAX_ALIASES)
+ fatal("Too many aliases\n");
+}
+
+void output_slabs(void)
+{
+ struct slabinfo *slab;
+
+ for (slab = slabinfo; slab < slabinfo + slabs; slab++) {
+
+ if (slab->alias)
+ continue;
+
+
+ if (show_numa)
+ slab_numa(slab);
+ else
+ if (show_track)
+ show_tracking(slab);
+ else
+ if (validate)
+ slab_validate(slab);
+ else
+ if (shrink)
+ slab_shrink(slab);
+ else {
+ if (show_slab)
+ slabcache(slab);
+ }
+ }
+}
+
struct option opts[] = {
{ "aliases", 0, NULL, 'a' },
{ "slabs", 0, NULL, 's' },
{ "numa", 0, NULL, 'n' },
- { "parameters", 0, NULL, 'p' },
{ "zero", 0, NULL, 'z' },
{ "help", 0, NULL, 'h' },
{ "validate", 0, NULL, 'v' },
+ { "first-alias", 0, NULL, 'f' },
+ { "reduce", 0, NULL, 'r' },
{ "track", 0, NULL, 't'},
+ { "inverted", 0, NULL, 'i'},
+ { "1ref", 0, NULL, '1'},
{ NULL, 0, NULL, 0 }
};
int main(int argc, char *argv[])
{
- DIR *dir;
- struct dirent *de;
int c;
int err;
char *pattern_source;
@@ -312,22 +804,31 @@ int main(int argc, char *argv[])
if (chdir("/sys/slab"))
fatal("This kernel does not have SLUB support.\n");
- while ((c = getopt_long(argc, argv, "ahtvnpsz", opts, NULL)) != -1)
+ while ((c = getopt_long(argc, argv, "afhi1nprstvzTS", opts, NULL)) != -1)
switch(c) {
- case 's':
- show_slab = 1;
+ case '1':
+ show_single_ref = 1;
break;
case 'a':
show_alias = 1;
break;
+ case 'f':
+ show_first_alias = 1;
+ break;
+ case 'h':
+ usage();
+ return 0;
+ case 'i':
+ show_inverted = 1;
+ break;
case 'n':
show_numa = 1;
break;
- case 'p':
- show_parameters = 1;
+ case 'r':
+ shrink = 1;
break;
- case 'z':
- skip_zero = 0;
+ case 's':
+ show_slab = 1;
break;
case 't':
show_track = 1;
@@ -335,17 +836,23 @@ int main(int argc, char *argv[])
case 'v':
validate = 1;
break;
- case 'h':
- usage();
- return 0;
+ case 'z':
+ skip_zero = 0;
+ break;
+ case 'T':
+ show_totals = 1;
+ break;
+ case 'S':
+ sort_size = 1;
+ break;
default:
fatal("%s: Invalid option '%c'\n", argv[0], optopt);
}
- if (!show_slab && !show_alias && !show_parameters && !show_track
- && !validate)
+ if (!show_slab && !show_alias && !show_track
+ && !validate && !shrink)
show_slab = 1;
if (argc > optind)
@@ -357,39 +864,17 @@ int main(int argc, char *argv[])
if (err)
fatal("%s: Invalid pattern '%s' code %d\n",
argv[0], pattern_source, err);
-
- dir = opendir(".");
- while ((de = readdir(dir))) {
- if (de->d_name[0] == '.' ||
- slab_mismatch(de->d_name))
- continue;
- switch (de->d_type) {
- case DT_LNK:
- alias(de->d_name);
- break;
- case DT_DIR:
- if (chdir(de->d_name))
- fatal("Unable to access slab %s\n", de->d_name);
-
- if (show_numa)
- slab_numa(de->d_name);
- else
- if (show_track)
- show_tracking(de->d_name);
- else
- if (validate)
- slab_validate(de->d_name);
- else
- slab(de->d_name);
- chdir("..");
- break;
- case DT_REG:
- parameter(de->d_name);
- break;
- default :
- fatal("Unknown file type %lx\n", de->d_type);
- }
+ read_slab_dir();
+ if (show_alias)
+ alias();
+ else
+ if (show_totals)
+ totals();
+ else {
+ link_slabs();
+ rename_slabs();
+ sort_slabs();
+ output_slabs();
}
- closedir(dir);
return 0;
}
--
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2007-04-26 5:12 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-26 5:07 [patch 0/7] SLUB updates clameter
2007-04-26 5:07 ` [patch 1/7] SLUB: Remove duplicate VM_BUG_ON clameter
2007-04-26 5:07 ` [patch 2/7] SLAB: Fix sysfs directory handling clameter
2007-04-26 5:07 ` [patch 3/7] SLUB: debug printk cleanup clameter
2007-04-26 5:07 ` [patch 4/7] SLUB: Conform more to SLABs SLAB_HWCACHE_ALIGN behavior clameter
2007-04-26 5:07 ` [patch 5/7] SLUB: Add MIN_PARTIAL clameter
2007-04-26 5:07 ` [patch 6/7] SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink clameter
2007-04-26 5:07 ` [patch 7/7] SLUB: Major slabinfo update clameter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox