* [patch 0/3] Slab Defrag / Slab Targeted Reclaim
@ 2007-05-17 5:16 clameter
2007-05-17 5:17 ` [patch 1/3] SLUB: add support for kmem_cache_ops clameter
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: clameter @ 2007-05-17 5:16 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, linux-mm, Mel Gorman, dgc
Initial support for Slab defragmentation and targeted reclaim. The
functionality here is minimal. We establish a slab API to allow removal
or moving of objects between slabs.
The only user provided here is a dentry cache reclaim capability. This is
limited to the removal of unused dentries for now. It is planned to later
add a similar inode reclaim capability and then extend the move/reclaim
to support moving of dentries and inodes.
Slab defragmentation is performed during kmem_cache_shrink. This is usually
triggered through the slab shrinkers but can also be manually triggered
through the slabinfo command.
Support is provided for antifrag/defrag to evict a specific slab page
through the kmem_cache_vacate function call. Since we can only reclaim
unused dentries for now that functionality is pretty limited (we need to
put some work into making dentries and inode more reclaimable or movable)
but we can increase the capabilities over time which will allow us to move
slabs from the reclaimable area into the movable area. This will shrink
the reclaimable area significantly. Since we can target the vacating of
pages this may allow the antifrag code to remove a page that hinders the
freeing of higher order page.
--
^ permalink raw reply [flat|nested] 4+ messages in thread
* [patch 1/3] SLUB: add support for kmem_cache_ops
2007-05-17 5:16 [patch 0/3] Slab Defrag / Slab Targeted Reclaim clameter
@ 2007-05-17 5:17 ` clameter
2007-05-17 5:17 ` [patch 2/3] SLUB: Implement targeted reclaim and partial list defragmentation clameter
2007-05-17 5:17 ` [patch 3/3] Support removal of unused dentry entries via SLUB defrag interface clameter
2 siblings, 0 replies; 4+ messages in thread
From: clameter @ 2007-05-17 5:17 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, linux-mm, Mel Gorman, dgc
[-- Attachment #1: kmem_cache_ops --]
[-- Type: text/plain, Size: 8349 bytes --]
We use the parameter formerly used by the destructor to pass an optional
pointer to a kmem_cache_ops structure to kmem_cache_create.
kmem_cache_ops is created as empty. Later patches populate kmem_cache_ops.
Create a KMEM_CACHE_OPS macro that allows the specification of a the
kmem_cache_ops.
Code to handle kmem_cache_ops is added to SLUB. SLAB and SLOB are updated
to be able to take a kmem_cache_ops structure but will ignore it.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
include/linux/slab.h | 13 +++++++++----
include/linux/slub_def.h | 1 +
mm/slab.c | 6 +++---
mm/slob.c | 2 +-
mm/slub.c | 44 ++++++++++++++++++++++++++++++--------------
5 files changed, 44 insertions(+), 22 deletions(-)
Index: slub/include/linux/slab.h
===================================================================
--- slub.orig/include/linux/slab.h 2007-05-15 21:19:51.000000000 -0700
+++ slub/include/linux/slab.h 2007-05-15 21:27:07.000000000 -0700
@@ -38,10 +38,13 @@ typedef struct kmem_cache kmem_cache_t _
void __init kmem_cache_init(void);
int slab_is_available(void);
+struct kmem_cache_ops {
+};
+
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
unsigned long,
void (*)(void *, struct kmem_cache *, unsigned long),
- void (*)(void *, struct kmem_cache *, unsigned long));
+ const struct kmem_cache_ops *s);
void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *);
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
@@ -59,9 +62,11 @@ int kmem_ptr_validate(struct kmem_cache
* f.e. add ____cacheline_aligned_in_smp to the struct declaration
* then the objects will be properly aligned in SMP configurations.
*/
-#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
- sizeof(struct __struct), __alignof__(struct __struct),\
- (__flags), NULL, NULL)
+#define KMEM_CACHE_OPS(__struct, __flags, __ops) \
+ kmem_cache_create(#__struct, sizeof(struct __struct), \
+ __alignof__(struct __struct), (__flags), NULL, (__ops))
+
+#define KMEM_CACHE(__struct, __flags) KMEM_CACHE_OPS(__struct, __flags, NULL)
#ifdef CONFIG_NUMA
extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
Index: slub/mm/slub.c
===================================================================
--- slub.orig/mm/slub.c 2007-05-15 21:25:46.000000000 -0700
+++ slub/mm/slub.c 2007-05-15 21:29:36.000000000 -0700
@@ -294,6 +294,9 @@ static inline int check_valid_pointer(st
return 1;
}
+struct kmem_cache_ops slub_default_ops = {
+};
+
/*
* Slow version of get and set free pointer.
*
@@ -2003,11 +2006,13 @@ static int calculate_sizes(struct kmem_c
static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
const char *name, size_t size,
size_t align, unsigned long flags,
- void (*ctor)(void *, struct kmem_cache *, unsigned long))
+ void (*ctor)(void *, struct kmem_cache *, unsigned long),
+ const struct kmem_cache_ops *ops)
{
memset(s, 0, kmem_size);
s->name = name;
s->ctor = ctor;
+ s->ops = ops;
s->objsize = size;
s->flags = flags;
s->align = align;
@@ -2191,7 +2196,7 @@ static struct kmem_cache *create_kmalloc
down_write(&slub_lock);
if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
- flags, NULL))
+ flags, NULL, &slub_default_ops))
goto panic;
list_add(&s->list, &slab_caches);
@@ -2505,12 +2510,16 @@ static int slab_unmergeable(struct kmem_
if (s->ctor)
return 1;
+ if (s->ops != &slub_default_ops)
+ return 1;
+
return 0;
}
static struct kmem_cache *find_mergeable(size_t size,
size_t align, unsigned long flags,
- void (*ctor)(void *, struct kmem_cache *, unsigned long))
+ void (*ctor)(void *, struct kmem_cache *, unsigned long),
+ const struct kmem_cache_ops *ops)
{
struct list_head *h;
@@ -2520,6 +2529,9 @@ static struct kmem_cache *find_mergeable
if (ctor)
return NULL;
+ if (ops != &slub_default_ops)
+ return NULL;
+
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
@@ -2555,13 +2567,15 @@ static struct kmem_cache *find_mergeable
struct kmem_cache *kmem_cache_create(const char *name, size_t size,
size_t align, unsigned long flags,
void (*ctor)(void *, struct kmem_cache *, unsigned long),
- void (*dtor)(void *, struct kmem_cache *, unsigned long))
+ const struct kmem_cache_ops *ops)
{
struct kmem_cache *s;
- BUG_ON(dtor);
+ if (!ops)
+ ops = &slub_default_ops;
+
down_write(&slub_lock);
- s = find_mergeable(size, align, flags, ctor);
+ s = find_mergeable(size, align, flags, ctor, ops);
if (s) {
s->refcount++;
/*
@@ -2575,7 +2589,7 @@ struct kmem_cache *kmem_cache_create(con
} else {
s = kmalloc(kmem_size, GFP_KERNEL);
if (s && kmem_cache_open(s, GFP_KERNEL, name,
- size, align, flags, ctor)) {
+ size, align, flags, ctor, ops)) {
if (sysfs_slab_add(s)) {
kfree(s);
goto err;
@@ -3206,16 +3220,18 @@ static ssize_t order_show(struct kmem_ca
}
SLAB_ATTR_RO(order);
-static ssize_t ctor_show(struct kmem_cache *s, char *buf)
+static ssize_t ops_show(struct kmem_cache *s, char *buf)
{
- if (s->ctor) {
- int n = sprint_symbol(buf, (unsigned long)s->ctor);
+ int x = 0;
- return n + sprintf(buf + n, "\n");
+ if (s->ctor) {
+ x += sprintf(buf + x, "ctor : ");
+ x += sprint_symbol(buf + x, (unsigned long)s->ctor);
+ x += sprintf(buf + x, "\n");
}
- return 0;
+ return x;
}
-SLAB_ATTR_RO(ctor);
+SLAB_ATTR_RO(ops);
static ssize_t aliases_show(struct kmem_cache *s, char *buf)
{
@@ -3447,7 +3463,7 @@ static struct attribute * slab_attrs[] =
&slabs_attr.attr,
&partial_attr.attr,
&cpu_slabs_attr.attr,
- &ctor_attr.attr,
+ &ops_attr.attr,
&aliases_attr.attr,
&align_attr.attr,
&sanity_checks_attr.attr,
Index: slub/include/linux/slub_def.h
===================================================================
--- slub.orig/include/linux/slub_def.h 2007-05-15 21:21:27.000000000 -0700
+++ slub/include/linux/slub_def.h 2007-05-15 21:26:13.000000000 -0700
@@ -40,6 +40,7 @@ struct kmem_cache {
int objects; /* Number of objects in slab */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *, struct kmem_cache *, unsigned long);
+ const struct kmem_cache_ops *ops;
int inuse; /* Offset to metadata */
int align; /* Alignment */
const char *name; /* Name (only for display!) */
Index: slub/mm/slab.c
===================================================================
--- slub.orig/mm/slab.c 2007-05-15 21:19:51.000000000 -0700
+++ slub/mm/slab.c 2007-05-15 21:26:13.000000000 -0700
@@ -2100,7 +2100,7 @@ static int setup_cpu_cache(struct kmem_c
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @ctor: A constructor for the objects.
- * @dtor: A destructor for the objects (not implemented anymore).
+ * @ops: A kmem_cache_ops structure (ignored).
*
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a int, but can be interrupted.
@@ -2126,7 +2126,7 @@ struct kmem_cache *
kmem_cache_create (const char *name, size_t size, size_t align,
unsigned long flags,
void (*ctor)(void*, struct kmem_cache *, unsigned long),
- void (*dtor)(void*, struct kmem_cache *, unsigned long))
+ const struct kmem_cache_ops *ops)
{
size_t left_over, slab_size, ralign;
struct kmem_cache *cachep = NULL, *pc;
@@ -2135,7 +2135,7 @@ kmem_cache_create (const char *name, siz
* Sanity checks... these are all serious usage bugs.
*/
if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
- size > KMALLOC_MAX_SIZE || dtor) {
+ size > KMALLOC_MAX_SIZE) {
printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
name);
BUG();
Index: slub/mm/slob.c
===================================================================
--- slub.orig/mm/slob.c 2007-05-15 21:17:15.000000000 -0700
+++ slub/mm/slob.c 2007-05-15 21:28:06.000000000 -0700
@@ -285,7 +285,7 @@ struct kmem_cache {
struct kmem_cache *kmem_cache_create(const char *name, size_t size,
size_t align, unsigned long flags,
void (*ctor)(void*, struct kmem_cache *, unsigned long),
- void (*dtor)(void*, struct kmem_cache *, unsigned long))
+ const struct kmem_cache_ops *o)
{
struct kmem_cache *c;
--
^ permalink raw reply [flat|nested] 4+ messages in thread
* [patch 2/3] SLUB: Implement targeted reclaim and partial list defragmentation
2007-05-17 5:16 [patch 0/3] Slab Defrag / Slab Targeted Reclaim clameter
2007-05-17 5:17 ` [patch 1/3] SLUB: add support for kmem_cache_ops clameter
@ 2007-05-17 5:17 ` clameter
2007-05-17 5:17 ` [patch 3/3] Support removal of unused dentry entries via SLUB defrag interface clameter
2 siblings, 0 replies; 4+ messages in thread
From: clameter @ 2007-05-17 5:17 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, linux-mm, Mel Gorman, dgc
[-- Attachment #1: get_ref_kick --]
[-- Type: text/plain, Size: 15502 bytes --]
Targeted reclaim allows to target a single slab for reclaim. This is done by
calling
kmem_cache_vacate(page);
It will return 1 on success, 0 if the operation failed.
The vacate functionality is also used for slab shrinking. During the shrink
operation SLUB will generate a list sorted by the number of objects in use.
We extract pages off that list that are only filled less than a quarter. These
objects are then processed using kmem_cache_vacate.
In order for a slabcache to support this functionality a couple of functions
must be defined via kmem_cache_ops. These are
int get(struct kmem_cache *s, void *)
Must obtain a reference to the indicated object. SLUB guarantees that
the objects is still allocated. However, another thread may be blocked
in slab_free attempting to free the same object. It may succeed as
soon as get() returns to the slab allocator. The function must
detect this situation and return 1 if that is the case.
If the object cannot be freed then a negative -Exx code must be
returned indicating the reason for the failure.
get() return 0 on success.
No slab operations may be performed in get_reference(). Interrupts
are disabled. What can be done is very limited. The slab lock
for the page with the object is taken. Any attempt to perform a slab
operation may lead to a deadlock.
void put(struct kmem_cache *, void *)
Used to restore the reference count obtained by get() if the reclaim
logic decides to abandon the attempt to vacate all objects in a slab.
This is usually the case if get() indicates that an object is not
freeable.
put() is optional. If it is not defined then it is assumed that we
can simply abandon get()s on slab objects.
int kick(struct kmem_cache *, void *)
After SLUB has established references to the remaining objects in a
slab it will drop all locks and then use kick() on each of the
objects. The existence of the object is guaranteed by virtue of the
earlier obtained reference. The callback may perform any slab operation
since no locks are held at the time of call.
Function must return 0 if the object was successfully freed.
Return -Exxx to indicate that the object is not freeable and to stop
further attempt to free objects in this slab.
The callback should remove the object from the slab in some way. This
may be accomplished by reclaiming the object and then running
kmem_cache_free() or reallocating it and then running
kmem_cache_free(). Reallocation is advantageous because the partial
slabs were just sorted to have the partial slabs with the most objects
first. Allocation is likely to result in filling up a slab so that
it can be removed from the partial list.
void sync(void)
After all objects have been removed by kick()s this function will be
called to ensure that all free operations have completed. Typically
the function called here is synchronize_rcu() if the slab cache uses
RCU to free objects. The function is optional. If it is not specified
then no synchronization is done before removing the slab.
If a kmem_cache_vacate on a page fails then the slab has usually a pretty
low usage ratio. Go through the slab and resequence the freelist so that
object addresses increase as we allocate objects. This will trigger the
cacheline prefetcher and increase allocations speed.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
include/linux/slab.h | 34 +++++
mm/slab.c | 9 +
mm/slob.c | 9 +
mm/slub.c | 304 +++++++++++++++++++++++++++++++++++++++++++++++++--
4 files changed, 346 insertions(+), 10 deletions(-)
Index: slub/include/linux/slab.h
===================================================================
--- slub.orig/include/linux/slab.h 2007-05-16 22:12:43.000000000 -0700
+++ slub/include/linux/slab.h 2007-05-16 22:12:44.000000000 -0700
@@ -39,6 +39,39 @@ void __init kmem_cache_init(void);
int slab_is_available(void);
struct kmem_cache_ops {
+ /*
+ * Called with slab lock held and interrupts disabled.
+ * No slab operation may be performed.
+ *
+ * Return 0 if reference was successfully obtained
+ * Return 1 if a concurrent kmem_cache_free is waiting to free object
+ * Return -errcode if it is not possible to free the object.
+ * No reference was obtained.
+ */
+ int (*get)(struct kmem_cache *, void *);
+
+ /*
+ * Use to restore the reference count if we abandon the
+ * attempt to vacate a slab page due to an unmovable
+ * object.
+ */
+ void (*put)(struct kmem_cache *, void *);
+
+ /*
+ * Called with no locks held and interrupts enabled.
+ * Any operation may be performed in kick_object.
+ *
+ * Return 0 for success
+ * Return -errcode aborts further kicks to objects in the slab
+ */
+ int (*kick)(struct kmem_cache *, void *);
+
+ /*
+ * Callback to make sure that all object freeing is complete.
+ * If the slab destroys objects by RCU then this needs to be
+ * set to synchronize_rcu().
+ */
+ void (*sync)(void);
};
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
@@ -53,6 +86,7 @@ void kmem_cache_free(struct kmem_cache *
unsigned int kmem_cache_size(struct kmem_cache *);
const char *kmem_cache_name(struct kmem_cache *);
int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
+int kmem_cache_vacate(struct page *);
/*
* Please use this macro to create slab caches. Simply specify the
Index: slub/mm/slub.c
===================================================================
--- slub.orig/mm/slub.c 2007-05-16 22:12:43.000000000 -0700
+++ slub/mm/slub.c 2007-05-16 22:12:44.000000000 -0700
@@ -1043,12 +1043,11 @@ static struct page *new_slab(struct kmem
n = get_node(s, page_to_nid(page));
if (n)
atomic_long_inc(&n->nr_slabs);
+
+ page->inuse = 0;
+ page->lockless_freelist = NULL;
page->offset = s->offset / sizeof(void *);
page->slab = s;
- page->flags |= 1 << PG_slab;
- if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
- SLAB_STORE_USER | SLAB_TRACE))
- SetSlabDebug(page);
start = page_address(page);
end = start + s->objects * s->size;
@@ -1066,11 +1065,20 @@ static struct page *new_slab(struct kmem
set_freepointer(s, last, NULL);
page->freelist = start;
- page->lockless_freelist = NULL;
- page->inuse = 0;
-out:
- if (flags & __GFP_WAIT)
- local_irq_disable();
+
+ /*
+ * page->inuse must be visible when PageSlab(page) becomes
+ * true for targeted reclaim
+ */
+ smp_wmb();
+ __SetPageSlab(page);
+ if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
+ SLAB_STORE_USER | SLAB_TRACE))
+ SetSlabDebug(page);
+
+ out:
+ if (flags & __GFP_WAIT)
+ local_irq_disable();
return page;
}
@@ -2323,6 +2331,218 @@ void kfree(const void *x)
EXPORT_SYMBOL(kfree);
/*
+ * Order the freelist so that addresses increase as object are allocated.
+ * This is useful to trigger the cpu cacheline prefetching logic.
+ */
+void resequence_freelist(struct kmem_cache *s, struct page *page)
+{
+ void *p;
+ void *last;
+ void *addr = page_address(page);
+ DECLARE_BITMAP(map, s->objects);
+
+ bitmap_zero(map, s->objects);
+
+ /* Figure out which objects are on the freelist */
+ for_each_free_object(p, s, page->freelist)
+ set_bit(slab_index(p, s, addr), map);
+
+ last = NULL;
+ for_each_object(p, s, addr)
+ if (test_bit(slab_index(p, s, addr), map)) {
+ if (last)
+ set_freepointer(s, last, p);
+ else
+ page->freelist = p;
+ last = p;
+ }
+
+ if (last)
+ set_freepointer(s, last, NULL);
+ else
+ page->freelist = NULL;
+}
+
+/*
+ * Vacate all objects in the given slab.
+ *
+ * Slab must be locked and frozen. Interrupts are disabled (flags must
+ * be passed).
+ *
+ * Will drop and regain and drop the slab lock. At the end the slab will
+ * either be freed or returned to the partial lists.
+ *
+ * Returns the number of remaining objects
+ */
+static int __kmem_cache_vacate(struct kmem_cache *s,
+ struct page *page, unsigned long flags)
+{
+ void *p;
+ void *addr = page_address(page);
+ DECLARE_BITMAP(map, s->objects);
+ int leftover;
+
+ if (!page->inuse)
+ return 0;
+
+ /* Determine free objects */
+ bitmap_fill(map, s->objects);
+ for_each_free_object(p, s, page->freelist)
+ __clear_bit(slab_index(p, s, addr), map);
+
+ /*
+ * Get a refcount for all used objects. If that fails then
+ * no KICK callback can be performed.
+ */
+ for_each_object(p, s, addr) {
+ int i = slab_index(p, s, addr);
+
+ if (test_bit(i, map)) {
+ int x = s->ops->get(s, p);
+
+ if (x > 0)
+ /*
+ * Concurrent free in progress, there is no
+ * need to do the kick call for this
+ * object
+ */
+ __clear_bit(i, map);
+
+ if (x >= 0)
+ continue;
+
+ /*
+ * Unfreeable object encountered. We have no chance
+ * to free up all objects. So free none.
+ * Drop refcounts.
+ */
+ if (s->ops->put) {
+ while (p > addr) {
+ p -= s->size;
+ if (test_bit(slab_index(p, s, addr),
+ map))
+ s->ops->put(s, p);
+ }
+ }
+ goto out;
+ }
+ }
+
+ /*
+ * Got references. Now we can drop the slab lock. The slab
+ * is frozen so it cannot vanish from under us nor will
+ * allocations be performed on the slab. However, unlocking the
+ * slab will allow concurrent slab_frees to proceed.
+ */
+ slab_unlock(page);
+ local_irq_restore(flags);
+
+ /*
+ * Perform the KICK callbacks to remove the objects. This is
+ * expected to remove objects in the slab.
+ */
+ for_each_object(p, s, addr)
+ if (test_bit(slab_index(p, s, addr), map)) {
+ int x = s->ops->kick(s, p);
+
+ if (x < 0)
+ /* Unfreeable object. Abort kicks */
+ break;
+ }
+
+ /*
+ * Insure deletion operations have completed.
+ */
+ if (s->ops->sync)
+ s->ops->sync();
+
+ /*
+ * Check the result and unfreeze the slab
+ */
+ local_irq_save(flags);
+ slab_lock(page);
+out:
+ leftover = page->inuse;
+ if (leftover > 0)
+ /*
+ * Cannot free. Lets at least optimize the freelist. We have
+ * likely touched all the cachelines with the free pointers
+ * already so it is cheap to do here.
+ */
+ resequence_freelist(s, page);
+ unfreeze_slab(s, page);
+ local_irq_restore(flags);
+ return leftover;
+}
+
+/*
+ * Get a page off a list and freeze it. Must be holding slab lock.
+ */
+static void freeze_from_list(struct kmem_cache *s, struct page *page)
+{
+ if (page->inuse < s->objects)
+ remove_partial(s, page);
+ else if (s->flags & SLAB_STORE_USER)
+ remove_full(s, page);
+ SetSlabFrozen(page);
+}
+
+/*
+ * Attempt to free objects in a page. Return 1 if succesful.
+ */
+int kmem_cache_vacate(struct page *page)
+{
+ unsigned long flags;
+ struct kmem_cache *s;
+ int vacated = 0;
+
+ /*
+ * Get a reference to the page. Return if its freed or being freed.
+ * This is necessary to make sure that the page does not vanish
+ * from under us before we are able to check the result.
+ */
+ if (!get_page_unless_zero(page))
+ return 0;
+
+ if (!PageSlab(page))
+ goto out;
+
+ local_irq_save(flags);
+ slab_lock(page);
+
+ /*
+ * We may now have locked a page that may be in various stages of
+ * being freed. If the PageSlab bit is off then we have already
+ * reached the page allocator. If page->inuse is zero then we are
+ * in SLUB but freeing or allocating the page.
+ * page->inuse is never modified without the slab lock held.
+ *
+ * Also abort if the page happens to be already frozen. If its
+ * frozen then a concurrent vacate may be in progress.
+ */
+ if (!PageSlab(page) || SlabFrozen(page) || !page->inuse)
+ goto out_locked;
+
+ /*
+ * We are holding a lock on a slab page and all operations on the
+ * slab are blocking.
+ */
+ s = page->slab;
+ if (!s->ops->get || !s->ops->kick)
+ goto out_locked;
+ freeze_from_list(s, page);
+ vacated = __kmem_cache_vacate(s, page, flags) == 0;
+out:
+ put_page(page);
+ return vacated;
+out_locked:
+ slab_unlock(page);
+ local_irq_restore(flags);
+ goto out;
+
+}
+
+/*
* kmem_cache_shrink removes empty slabs from the partial lists and sorts
* the remaining slabs by the number of items in use. The slabs with the
* most items in use come first. New allocations will then fill those up
@@ -2337,11 +2557,12 @@ int kmem_cache_shrink(struct kmem_cache
int node;
int i;
struct kmem_cache_node *n;
- struct page *page;
+ struct page *page, *page2;
struct page *t;
struct list_head *slabs_by_inuse =
kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
unsigned long flags;
+ LIST_HEAD(zaplist);
if (!slabs_by_inuse)
return -ENOMEM;
@@ -2392,8 +2613,43 @@ int kmem_cache_shrink(struct kmem_cache
for (i = s->objects - 1; i >= 0; i--)
list_splice(slabs_by_inuse + i, n->partial.prev);
+ /*
+ * If we have no functions available to defragment the slabs
+ * then we are done.
+ */
+ if (!s->ops->get || !s->ops->kick)
+ goto out;
+
+ /* Take objects with just a few objects off the tail */
+ while (n->nr_partial > MAX_PARTIAL) {
+ page = container_of(n->partial.prev, struct page, lru);
+
+ /*
+ * We are holding the list_lock so we can only
+ * trylock the slab
+ */
+ if (page->inuse > s->objects / 4)
+ break;
+
+ if (!slab_trylock(page))
+ break;
+
+ list_move(&page->lru, &zaplist);
+ n->nr_partial--;
+ SetSlabFrozen(page);
+ slab_unlock(page);
+ }
out:
spin_unlock_irqrestore(&n->list_lock, flags);
+
+ /* Now we can free objects in the slabs on the zaplist */
+ list_for_each_entry_safe(page, page2, &zaplist, lru) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ slab_lock(page);
+ __kmem_cache_vacate(s, page, flags);
+ }
}
kfree(slabs_by_inuse);
@@ -3229,6 +3485,34 @@ static ssize_t ops_show(struct kmem_cach
x += sprint_symbol(buf + x, (unsigned long)s->ctor);
x += sprintf(buf + x, "\n");
}
+
+ if (s->ops->get) {
+ x += sprintf(buf + x, "get : ");
+ x += sprint_symbol(buf + x,
+ (unsigned long)s->ops->get);
+ x += sprintf(buf + x, "\n");
+ }
+
+ if (s->ops->put) {
+ x += sprintf(buf + x, "put : ");
+ x += sprint_symbol(buf + x,
+ (unsigned long)s->ops->put);
+ x += sprintf(buf + x, "\n");
+ }
+
+ if (s->ops->kick) {
+ x += sprintf(buf + x, "kick : ");
+ x += sprint_symbol(buf + x,
+ (unsigned long)s->ops->kick);
+ x += sprintf(buf + x, "\n");
+ }
+
+ if (s->ops->sync) {
+ x += sprintf(buf + x, "sync : ");
+ x += sprint_symbol(buf + x,
+ (unsigned long)s->ops->sync);
+ x += sprintf(buf + x, "\n");
+ }
return x;
}
SLAB_ATTR_RO(ops);
Index: slub/mm/slab.c
===================================================================
--- slub.orig/mm/slab.c 2007-05-16 22:14:34.000000000 -0700
+++ slub/mm/slab.c 2007-05-16 22:14:47.000000000 -0700
@@ -2516,6 +2516,15 @@ int kmem_cache_shrink(struct kmem_cache
}
EXPORT_SYMBOL(kmem_cache_shrink);
+/*
+ * SLAB does not support slab defragmentation
+ */
+int kmem_cache_vacate(struct page *page)
+{
+ return 0;
+}
+EXPORT_SYMBOL(kmem_cache_vacate);
+
/**
* kmem_cache_destroy - delete a cache
* @cachep: the cache to destroy
Index: slub/mm/slob.c
===================================================================
--- slub.orig/mm/slob.c 2007-05-16 22:13:41.000000000 -0700
+++ slub/mm/slob.c 2007-05-16 22:14:19.000000000 -0700
@@ -394,6 +394,15 @@ int kmem_cache_shrink(struct kmem_cache
}
EXPORT_SYMBOL(kmem_cache_shrink);
+/*
+ * SLOB does not support slab defragmentation
+ */
+int kmem_cache_vacate(struct page *page)
+{
+ return 0;
+}
+EXPORT_SYMBOL(kmem_cache_vacate);
+
int kmem_ptr_validate(struct kmem_cache *a, const void *b)
{
return 0;
--
^ permalink raw reply [flat|nested] 4+ messages in thread
* [patch 3/3] Support removal of unused dentry entries via SLUB defrag interface
2007-05-17 5:16 [patch 0/3] Slab Defrag / Slab Targeted Reclaim clameter
2007-05-17 5:17 ` [patch 1/3] SLUB: add support for kmem_cache_ops clameter
2007-05-17 5:17 ` [patch 2/3] SLUB: Implement targeted reclaim and partial list defragmentation clameter
@ 2007-05-17 5:17 ` clameter
2 siblings, 0 replies; 4+ messages in thread
From: clameter @ 2007-05-17 5:17 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, linux-mm, Mel Gorman, dgc
[-- Attachment #1: dentry_targeted_reclaim --]
[-- Type: text/plain, Size: 3979 bytes --]
This patch allows the removal of unused dentry entries in a partial
populated slab page. Very limited (yet) in what it can do for reclaim
but this catches bad cases in which we have a long list of partial
slabs with a few entries in each of them. We can free up the slabs
that have only unused dentry entries in them.
get_dentry() uses the dcache lock and then works with dget_locked
to obtain a reference to the dentry. An additional complication is that
the dentry may be in process of being freed or it may just have been
allocated. In that case d_inode is NULL. If we discover this then we
simply stay away from the object and return 1 to indicate to the
defrag logic that this object will be free. Otherwise we increment
the refcount and return success.
kick_dentry() is called after get_dentry_reference() has
been used and after the slab has dropped all of its own locks. The dentry
pruning for unused entries works in a straighforward way.
Note: The code here could be significantly improved. If we could
get to a point where all used dentries could be moved then full
dentry slab defragmentation and vacating of dentry slab pages would
become possible.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
fs/dcache.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 81 insertions(+), 8 deletions(-)
Index: slub/fs/dcache.c
===================================================================
--- slub.orig/fs/dcache.c 2007-05-16 20:58:02.000000000 -0700
+++ slub/fs/dcache.c 2007-05-16 20:59:27.000000000 -0700
@@ -2114,18 +2114,91 @@ static void __init dcache_init_early(voi
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
+/*
+ * The slab is holding off frees. Thus we can safely examine
+ * the object without the danger of it vanishing from under us.
+ */
+static int get_dentry(struct kmem_cache *s, void *private)
+{
+ struct dentry *dentry = private;
+ int result = 0;
+
+ spin_lock(&dcache_lock);
+ /*
+ * dentry->d_inode is set to NULL when the dentry
+ * is freed. Use that as an indicator that we should
+ * not interfere with the freeing process.
+ */
+ if (dentry->d_inode) {
+ dget_locked(dentry);
+ if (atomic_read(&dentry->d_count) > 2)
+ /*
+ * Moving of dentries in use not
+ * implemented yet.
+ */
+ result = -EINVAL;
+ } else
+ result = 1;
+ spin_unlock(&dcache_lock);
+ return result;
+}
+
+static void put_dentry(struct kmem_cache *s, void *private)
+{
+ struct dentry *dentry = private;
+
+ dput(dentry);
+}
+
+/*
+ * Slab has dropped all the locks. Get rid of the
+ * refcount we obtained earlier and also rid of the
+ * object.
+ */
+static int kick_dentry(struct kmem_cache *s, void *private)
+{
+ struct dentry *dentry = private;
+
+ spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count) > 1) {
+ /*
+ * Reference count was increased.
+ * We need to abandon the freeing of
+ * objects.
+ */
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ dput(dentry);
+ return -EBUSY;
+ }
+
+ /* Remove from LRU */
+ if (!list_empty(&dentry->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
+ /* Drop the entry */
+ prune_one_dentry(dentry, 1);
+ spin_unlock(&dcache_lock);
+ return 0;
+}
+
+static struct kmem_cache_ops dentry_kmem_cache_ops = {
+ .get = get_dentry,
+ .put = put_dentry,
+ .kick = kick_dentry,
+ .sync = synchronize_rcu
+};
+
static void __init dcache_init(unsigned long mempages)
{
int loop;
- /*
- * A constructor could be added for stable state like the lists,
- * but it is probably not worth it because of the cache nature
- * of the dcache.
- */
- dentry_cache = KMEM_CACHE(dentry,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
-
+ dentry_cache = KMEM_CACHE_OPS(dentry,
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD,
+ &dentry_kmem_cache_ops);
+
register_shrinker(&dcache_shrinker);
/* Hash may have been set up in dcache_init_early */
--
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-05-17 5:20 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-17 5:16 [patch 0/3] Slab Defrag / Slab Targeted Reclaim clameter
2007-05-17 5:17 ` [patch 1/3] SLUB: add support for kmem_cache_ops clameter
2007-05-17 5:17 ` [patch 2/3] SLUB: Implement targeted reclaim and partial list defragmentation clameter
2007-05-17 5:17 ` [patch 3/3] Support removal of unused dentry entries via SLUB defrag interface clameter
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.