From: Nitin Gupta <ngupta@vflare.org>
To: Pekka Enberg <penberg@cs.helsinki.fi>,
Hugh Dickins <hugh.dickins@tiscali.co.uk>,
Andrew Morton <akpm@linux-foundation.org>,
Greg KH <greg@kroah.com>,
Dan Magenheimer <dan.magenheimer@oracle.com>,
Rik van Riel <riel@redhat.com>, Avi Kivity <avi@redhat.com>,
Christoph Hellwig <hch@infradead.org>,
Minchan Kim <minchan.kim@gmail.com>,
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: linux-mm <linux-mm@kvack.org>,
linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH 4/8] Shrink zcache based on memlimit
Date: Fri, 16 Jul 2010 18:07:46 +0530 [thread overview]
Message-ID: <1279283870-18549-5-git-send-email-ngupta@vflare.org> (raw)
In-Reply-To: <1279283870-18549-1-git-send-email-ngupta@vflare.org>
User can change (per-pool) memlimit using sysfs node:
/sys/kernel/mm/zcache/pool<id>/memlimit
When memlimit is set to a value smaller than current
number of pages allocated for that pool, excess pages
are now freed immediately instead of waiting for get/
flush for these pages.
Currently, victim page selection is essentially random.
Automatic cache resizing and better page replacement
policies will be implemented later.
Signed-off-by: Nitin Gupta <ngupta@vflare.org>
---
drivers/staging/zram/zcache_drv.c | 115 ++++++++++++++++++++++++++++++++++---
1 files changed, 106 insertions(+), 9 deletions(-)
diff --git a/drivers/staging/zram/zcache_drv.c b/drivers/staging/zram/zcache_drv.c
index f680f19..c5de65d 100644
--- a/drivers/staging/zram/zcache_drv.c
+++ b/drivers/staging/zram/zcache_drv.c
@@ -41,6 +41,7 @@
#include <linux/kernel.h>
#include <linux/cleancache.h>
#include <linux/highmem.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/u64_stats_sync.h>
@@ -416,7 +417,8 @@ out:
* Called under zcache_inode_rb->tree_lock
*/
#define FREE_BATCH 16
-static void zcache_free_inode_pages(struct zcache_inode_rb *znode)
+static void zcache_free_inode_pages(struct zcache_inode_rb *znode,
+ u32 pages_to_free)
{
int count;
unsigned long index = 0;
@@ -428,6 +430,8 @@ static void zcache_free_inode_pages(struct zcache_inode_rb *znode)
count = radix_tree_gang_lookup(&znode->page_tree,
(void **)pages, index, FREE_BATCH);
+ if (count > pages_to_free)
+ count = pages_to_free;
for (i = 0; i < count; i++) {
index = pages[i]->index;
@@ -437,7 +441,98 @@ static void zcache_free_inode_pages(struct zcache_inode_rb *znode)
}
index++;
- } while (count == FREE_BATCH);
+ pages_to_free -= count;
+ } while (pages_to_free && (count == FREE_BATCH));
+}
+
+/*
+ * Returns number of pages stored in excess of currently
+ * set memlimit for the given pool.
+ */
+static u32 zcache_count_excess_pages(struct zcache_pool *zpool)
+{
+ u32 excess_pages, memlimit_pages, pages_stored;
+
+ memlimit_pages = zcache_get_memlimit(zpool) >> PAGE_SHIFT;
+ pages_stored = zcache_get_stat(zpool, ZPOOL_STAT_PAGES_STORED);
+ excess_pages = pages_stored > memlimit_pages ?
+ pages_stored - memlimit_pages : 0;
+
+ return excess_pages;
+}
+
+/*
+ * Free pages from this pool till we come within its memlimit.
+ *
+ * Currently, its called only when user sets memlimit lower than the
+ * number of pages currently stored in that pool. We select nodes in
+ * order of increasing inode number. This, in general, has no correlation
+ * with the order in which these are added. So, it is essentially random
+ * selection of nodes. Pages within a victim node node are freed in order
+ * of increasing index number.
+ *
+ * Automatic cache resizing and better page replacement policies will
+ * be implemented later.
+ */
+static void zcache_shrink_pool(struct zcache_pool *zpool)
+{
+ struct rb_node *node;
+ struct zcache_inode_rb *znode;
+
+ read_lock(&zpool->tree_lock);
+ node = rb_first(&zpool->inode_tree);
+ if (unlikely(!node)) {
+ read_unlock(&zpool->tree_lock);
+ return;
+ }
+ znode = rb_entry(node, struct zcache_inode_rb, rb_node);
+ kref_get(&znode->refcount);
+ read_unlock(&zpool->tree_lock);
+
+ do {
+ u32 pages_to_free;
+ struct rb_node *next_node;
+ struct zcache_inode_rb *next_znode;
+
+ pages_to_free = zcache_count_excess_pages(zpool);
+ if (!pages_to_free) {
+ spin_lock(&znode->tree_lock);
+ if (zcache_inode_is_empty(znode))
+ zcache_inode_isolate(znode);
+ spin_unlock(&znode->tree_lock);
+
+ kref_put(&znode->refcount, zcache_inode_release);
+ break;
+ }
+
+ /*
+ * Get the next victim node before we (possibly) isolate
+ * the current node.
+ */
+ read_lock(&zpool->tree_lock);
+ next_node = rb_next(node);
+ next_znode = NULL;
+ if (next_node) {
+ next_znode = rb_entry(next_node,
+ struct zcache_inode_rb, rb_node);
+ kref_get(&next_znode->refcount);
+ }
+ read_unlock(&zpool->tree_lock);
+
+ spin_lock(&znode->tree_lock);
+ zcache_free_inode_pages(znode, pages_to_free);
+ if (zcache_inode_is_empty(znode))
+ zcache_inode_isolate(znode);
+ spin_unlock(&znode->tree_lock);
+
+ kref_put(&znode->refcount, zcache_inode_release);
+
+ /* Avoid busy-looping */
+ cond_resched();
+
+ node = next_node;
+ znode = next_znode;
+ } while (znode);
}
#ifdef CONFIG_SYSFS
@@ -476,10 +571,13 @@ static void memlimit_sysfs_common(struct kobject *kobj, u64 *value, int store)
{
struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
- if (store)
+ if (store) {
zcache_set_memlimit(zpool, *value);
- else
+ if (zcache_count_excess_pages(zpool))
+ zcache_shrink_pool(zpool);
+ } else {
*value = zcache_get_memlimit(zpool);
+ }
}
static ssize_t memlimit_store(struct kobject *kobj,
@@ -687,9 +785,8 @@ static void zcache_put_page(int pool_id, ino_t inode_no,
/*
* memlimit can be changed any time by user using sysfs. If
* it is set to a value smaller than current number of pages
- * stored, then excess pages are not freed immediately but
- * further puts are blocked till sufficient number of pages
- * are flushed/freed.
+ * stored, then excess pages are freed synchronously when this
+ * sysfs event occurs.
*/
if (zcache_get_stat(zpool, ZPOOL_STAT_PAGES_STORED) >
zcache_get_memlimit(zpool) >> PAGE_SHIFT) {
@@ -781,7 +878,7 @@ static void zcache_flush_inode(int pool_id, ino_t inode_no)
return;
spin_lock_irqsave(&znode->tree_lock, flags);
- zcache_free_inode_pages(znode);
+ zcache_free_inode_pages(znode, UINT_MAX);
if (zcache_inode_is_empty(znode))
zcache_inode_isolate(znode);
spin_unlock_irqrestore(&znode->tree_lock, flags);
@@ -815,7 +912,7 @@ static void zcache_flush_fs(int pool_id)
while (node) {
znode = rb_entry(node, struct zcache_inode_rb, rb_node);
node = rb_next(node);
- zcache_free_inode_pages(znode);
+ zcache_free_inode_pages(znode, UINT_MAX);
rb_erase(&znode->rb_node, &zpool->inode_tree);
kfree(znode);
}
--
1.7.1.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-07-16 12:37 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-16 12:37 [PATCH 0/8] zcache: page cache compression support Nitin Gupta
2010-07-16 12:37 ` [PATCH 1/8] Allow sharing xvmalloc for zram and zcache Nitin Gupta
2010-07-17 18:10 ` Rik van Riel
2010-07-16 12:37 ` [PATCH 2/8] Basic zcache functionality Nitin Gupta
2010-07-18 8:14 ` Pekka Enberg
2010-07-18 9:45 ` Nitin Gupta
2010-07-18 8:27 ` Pekka Enberg
2010-07-18 8:44 ` Eric Dumazet
2010-07-18 9:51 ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 3/8] Create sysfs nodes and export basic statistics Nitin Gupta
2010-07-16 12:37 ` Nitin Gupta [this message]
2010-07-20 23:03 ` [PATCH 4/8] Shrink zcache based on memlimit Minchan Kim
2010-07-21 4:52 ` Nitin Gupta
2010-07-21 11:32 ` Ed Tomlinson
2010-07-23 19:23 ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 5/8] Eliminate zero-filled pages Nitin Gupta
2010-07-16 12:37 ` [PATCH 6/8] Compress pages using LZO Nitin Gupta
2010-07-16 12:37 ` [PATCH 7/8] Use xvmalloc to store compressed chunks Nitin Gupta
2010-07-18 7:53 ` Pekka Enberg
2010-07-18 8:21 ` Nitin Gupta
2010-07-19 4:36 ` Minchan Kim
2010-07-19 6:48 ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 8/8] Document sysfs entries Nitin Gupta
2010-07-17 21:13 ` [PATCH 0/8] zcache: page cache compression support Ed Tomlinson
2010-07-18 2:23 ` Nitin Gupta
2010-07-18 7:50 ` Pekka Enberg
2010-07-18 8:12 ` Nitin Gupta
2010-07-19 19:57 ` Dan Magenheimer
2010-07-20 13:50 ` Nitin Gupta
2010-07-20 14:28 ` Dan Magenheimer
2010-07-21 4:27 ` Nitin Gupta
2010-07-21 17:37 ` Dan Magenheimer
2010-07-22 19:14 ` Greg KH
2010-07-22 19:54 ` Dan Magenheimer
2010-07-22 21:00 ` Greg KH
2011-01-10 13:16 ` Kirill A. Shutemov
2011-01-18 17:53 ` Dan Magenheimer
2011-01-20 12:33 ` Nitin Gupta
2011-01-20 12:47 ` Christoph Hellwig
2011-01-20 13:16 ` Pekka Enberg
2011-01-20 13:58 ` Nitin Gupta
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1279283870-18549-5-git-send-email-ngupta@vflare.org \
--to=ngupta@vflare.org \
--cc=akpm@linux-foundation.org \
--cc=avi@redhat.com \
--cc=dan.magenheimer@oracle.com \
--cc=greg@kroah.com \
--cc=hch@infradead.org \
--cc=hugh.dickins@tiscali.co.uk \
--cc=konrad.wilk@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=minchan.kim@gmail.com \
--cc=penberg@cs.helsinki.fi \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).