linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nitin Gupta <ngupta@vflare.org>
To: Pekka Enberg <penberg@cs.helsinki.fi>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Andrew Morton <akpm@linux-foundation.org>,
	Greg KH <greg@kroah.com>,
	Dan Magenheimer <dan.magenheimer@oracle.com>,
	Rik van Riel <riel@redhat.com>, Avi Kivity <avi@redhat.com>,
	Christoph Hellwig <hch@infradead.org>,
	Minchan Kim <minchan.kim@gmail.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: linux-mm <linux-mm@kvack.org>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH 4/8] Shrink zcache based on memlimit
Date: Fri, 16 Jul 2010 18:07:46 +0530	[thread overview]
Message-ID: <1279283870-18549-5-git-send-email-ngupta@vflare.org> (raw)
In-Reply-To: <1279283870-18549-1-git-send-email-ngupta@vflare.org>

User can change (per-pool) memlimit using sysfs node:
/sys/kernel/mm/zcache/pool<id>/memlimit

When memlimit is set to a value smaller than current
number of pages allocated for that pool, excess pages
are now freed immediately instead of waiting for get/
flush for these pages.

Currently, victim page selection is essentially random.
Automatic cache resizing and better page replacement
policies will be implemented later.

Signed-off-by: Nitin Gupta <ngupta@vflare.org>
---
 drivers/staging/zram/zcache_drv.c |  115 ++++++++++++++++++++++++++++++++++---
 1 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/zram/zcache_drv.c b/drivers/staging/zram/zcache_drv.c
index f680f19..c5de65d 100644
--- a/drivers/staging/zram/zcache_drv.c
+++ b/drivers/staging/zram/zcache_drv.c
@@ -41,6 +41,7 @@
 #include <linux/kernel.h>
 #include <linux/cleancache.h>
 #include <linux/highmem.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/u64_stats_sync.h>
 
@@ -416,7 +417,8 @@ out:
  * Called under zcache_inode_rb->tree_lock
  */
 #define FREE_BATCH 16
-static void zcache_free_inode_pages(struct zcache_inode_rb *znode)
+static void zcache_free_inode_pages(struct zcache_inode_rb *znode,
+				u32 pages_to_free)
 {
 	int count;
 	unsigned long index = 0;
@@ -428,6 +430,8 @@ static void zcache_free_inode_pages(struct zcache_inode_rb *znode)
 
 		count = radix_tree_gang_lookup(&znode->page_tree,
 					(void **)pages, index, FREE_BATCH);
+		if (count > pages_to_free)
+			count = pages_to_free;
 
 		for (i = 0; i < count; i++) {
 			index = pages[i]->index;
@@ -437,7 +441,98 @@ static void zcache_free_inode_pages(struct zcache_inode_rb *znode)
 		}
 
 		index++;
-	} while (count == FREE_BATCH);
+		pages_to_free -= count;
+	} while (pages_to_free && (count == FREE_BATCH));
+}
+
+/*
+ * Returns number of pages stored in excess of currently
+ * set memlimit for the given pool.
+ */
+static u32 zcache_count_excess_pages(struct zcache_pool *zpool)
+{
+	u32 excess_pages, memlimit_pages, pages_stored;
+
+	memlimit_pages = zcache_get_memlimit(zpool) >> PAGE_SHIFT;
+	pages_stored = zcache_get_stat(zpool, ZPOOL_STAT_PAGES_STORED);
+	excess_pages = pages_stored > memlimit_pages ?
+			pages_stored - memlimit_pages : 0;
+
+	return excess_pages;
+}
+
+/*
+ * Free pages from this pool till we come within its memlimit.
+ *
+ * Currently, its called only when user sets memlimit lower than the
+ * number of pages currently stored in that pool. We select nodes in
+ * order of increasing inode number. This, in general, has no correlation
+ * with the order in which these are added. So, it is essentially random
+ * selection of nodes. Pages within a victim node node are freed in order
+ * of increasing index number.
+ *
+ * Automatic cache resizing and better page replacement policies will
+ * be implemented later.
+ */
+static void zcache_shrink_pool(struct zcache_pool *zpool)
+{
+	struct rb_node *node;
+	struct zcache_inode_rb *znode;
+
+	read_lock(&zpool->tree_lock);
+	node = rb_first(&zpool->inode_tree);
+	if (unlikely(!node)) {
+		read_unlock(&zpool->tree_lock);
+		return;
+	}
+	znode = rb_entry(node, struct zcache_inode_rb, rb_node);
+	kref_get(&znode->refcount);
+	read_unlock(&zpool->tree_lock);
+
+	do {
+		u32 pages_to_free;
+		struct rb_node *next_node;
+		struct zcache_inode_rb *next_znode;
+
+		pages_to_free = zcache_count_excess_pages(zpool);
+		if (!pages_to_free) {
+			spin_lock(&znode->tree_lock);
+			if (zcache_inode_is_empty(znode))
+				zcache_inode_isolate(znode);
+			spin_unlock(&znode->tree_lock);
+
+			kref_put(&znode->refcount, zcache_inode_release);
+			break;
+		}
+
+		/*
+		 * Get the next victim node before we (possibly) isolate
+		 * the current node.
+		 */
+		read_lock(&zpool->tree_lock);
+		next_node = rb_next(node);
+		next_znode = NULL;
+		if (next_node) {
+			next_znode = rb_entry(next_node,
+				struct zcache_inode_rb, rb_node);
+			kref_get(&next_znode->refcount);
+		}
+		read_unlock(&zpool->tree_lock);
+
+		spin_lock(&znode->tree_lock);
+		zcache_free_inode_pages(znode, pages_to_free);
+		if (zcache_inode_is_empty(znode))
+			zcache_inode_isolate(znode);
+		spin_unlock(&znode->tree_lock);
+
+		kref_put(&znode->refcount, zcache_inode_release);
+
+		/* Avoid busy-looping */
+		cond_resched();
+
+		node = next_node;
+		znode = next_znode;
+	} while (znode);
 }
 
 #ifdef CONFIG_SYSFS
@@ -476,10 +571,13 @@ static void memlimit_sysfs_common(struct kobject *kobj, u64 *value, int store)
 {
 	struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
 
-	if (store)
+	if (store) {
 		zcache_set_memlimit(zpool, *value);
-	else
+		if (zcache_count_excess_pages(zpool))
+			zcache_shrink_pool(zpool);
+	} else {
 		*value = zcache_get_memlimit(zpool);
+	}
 }
 
 static ssize_t memlimit_store(struct kobject *kobj,
@@ -687,9 +785,8 @@ static void zcache_put_page(int pool_id, ino_t inode_no,
 	/*
 	 * memlimit can be changed any time by user using sysfs. If
 	 * it is set to a value smaller than current number of pages
-	 * stored, then excess pages are not freed immediately but
-	 * further puts are blocked till sufficient number of pages
-	 * are flushed/freed.
+	 * stored, then excess pages are freed synchronously when this
+	 * sysfs event occurs.
 	 */
 	if (zcache_get_stat(zpool, ZPOOL_STAT_PAGES_STORED) >
 			zcache_get_memlimit(zpool) >> PAGE_SHIFT) {
@@ -781,7 +878,7 @@ static void zcache_flush_inode(int pool_id, ino_t inode_no)
 		return;
 
 	spin_lock_irqsave(&znode->tree_lock, flags);
-	zcache_free_inode_pages(znode);
+	zcache_free_inode_pages(znode, UINT_MAX);
 	if (zcache_inode_is_empty(znode))
 		zcache_inode_isolate(znode);
 	spin_unlock_irqrestore(&znode->tree_lock, flags);
@@ -815,7 +912,7 @@ static void zcache_flush_fs(int pool_id)
 	while (node) {
 		znode = rb_entry(node, struct zcache_inode_rb, rb_node);
 		node = rb_next(node);
-		zcache_free_inode_pages(znode);
+		zcache_free_inode_pages(znode, UINT_MAX);
 		rb_erase(&znode->rb_node, &zpool->inode_tree);
 		kfree(znode);
 	}
-- 
1.7.1.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-07-16 12:37 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-16 12:37 [PATCH 0/8] zcache: page cache compression support Nitin Gupta
2010-07-16 12:37 ` [PATCH 1/8] Allow sharing xvmalloc for zram and zcache Nitin Gupta
2010-07-17 18:10   ` Rik van Riel
2010-07-16 12:37 ` [PATCH 2/8] Basic zcache functionality Nitin Gupta
2010-07-18  8:14   ` Pekka Enberg
2010-07-18  9:45     ` Nitin Gupta
2010-07-18  8:27   ` Pekka Enberg
2010-07-18  8:44   ` Eric Dumazet
2010-07-18  9:51     ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 3/8] Create sysfs nodes and export basic statistics Nitin Gupta
2010-07-16 12:37 ` Nitin Gupta [this message]
2010-07-20 23:03   ` [PATCH 4/8] Shrink zcache based on memlimit Minchan Kim
2010-07-21  4:52     ` Nitin Gupta
2010-07-21 11:32       ` Ed Tomlinson
2010-07-23 19:23         ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 5/8] Eliminate zero-filled pages Nitin Gupta
2010-07-16 12:37 ` [PATCH 6/8] Compress pages using LZO Nitin Gupta
2010-07-16 12:37 ` [PATCH 7/8] Use xvmalloc to store compressed chunks Nitin Gupta
2010-07-18  7:53   ` Pekka Enberg
2010-07-18  8:21     ` Nitin Gupta
2010-07-19  4:36       ` Minchan Kim
2010-07-19  6:48         ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 8/8] Document sysfs entries Nitin Gupta
2010-07-17 21:13 ` [PATCH 0/8] zcache: page cache compression support Ed Tomlinson
2010-07-18  2:23   ` Nitin Gupta
2010-07-18  7:50 ` Pekka Enberg
2010-07-18  8:12   ` Nitin Gupta
2010-07-19 19:57 ` Dan Magenheimer
2010-07-20 13:50   ` Nitin Gupta
2010-07-20 14:28     ` Dan Magenheimer
2010-07-21  4:27       ` Nitin Gupta
2010-07-21 17:37         ` Dan Magenheimer
2010-07-22 19:14 ` Greg KH
2010-07-22 19:54   ` Dan Magenheimer
2010-07-22 21:00     ` Greg KH
2011-01-10 13:16 ` Kirill A. Shutemov
2011-01-18 17:53   ` Dan Magenheimer
2011-01-20 12:33     ` Nitin Gupta
2011-01-20 12:47       ` Christoph Hellwig
2011-01-20 13:16         ` Pekka Enberg
2011-01-20 13:58           ` Nitin Gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1279283870-18549-5-git-send-email-ngupta@vflare.org \
    --to=ngupta@vflare.org \
    --cc=akpm@linux-foundation.org \
    --cc=avi@redhat.com \
    --cc=dan.magenheimer@oracle.com \
    --cc=greg@kroah.com \
    --cc=hch@infradead.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan.kim@gmail.com \
    --cc=penberg@cs.helsinki.fi \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).