All of lore.kernel.org
 help / color / mirror / Atom feed
From: Seth Jennings <sjenning@linux.vnet.ibm.com>
To: Dan Magenheimer <dan.magenheimer@oracle.com>,
	minchan@kernel.org, Nitin Gupta <nitingupta910@gmail.com>,
	Konrad Wilk <konrad.wilk@oracle.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Bob Liu <lliubbo@gmail.com>,
	Luigi Semenzato <semenzato@google.com>,
	Mel Gorman <mgorman@suse.de>
Subject: Re: zsmalloc limitations and related topics
Date: Wed, 13 Mar 2013 10:33:57 -0500	[thread overview]
Message-ID: <51409C65.1040207@linux.vnet.ibm.com> (raw)
In-Reply-To: <20130313151359.GA3130@linux.vnet.ibm.com>

The periodic writeback that Rob mentions would go something like this
for zswap:

---
 mm/filemap.c |    3 +--
 mm/zswap.c   |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee7..fe63e95 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -735,12 +735,11 @@ repeat:
 	if (page && !radix_tree_exception(page)) {
 		lock_page(page);
 		/* Has the page been truncated? */
-		if (unlikely(page->mapping != mapping)) {
+		if (unlikely(page_mapping(page) != mapping)) {
 			unlock_page(page);
 			page_cache_release(page);
 			goto repeat;
 		}
-		VM_BUG_ON(page->index != offset);
 	}
 	return page;
 }
diff --git a/mm/zswap.c b/mm/zswap.c
index 82b8d59..0b2351e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -42,6 +42,9 @@
 #include <linux/writeback.h>
 #include <linux/pagemap.h>
 
+#include <linux/workqueue.h>
+#include <linux/time.h>
+
 /*********************************
 * statistics
 **********************************/
@@ -102,6 +105,23 @@ module_param_named(max_compression_ratio,
 */
 #define ZSWAP_MAX_OUTSTANDING_FLUSHES 64
 
+/*
+ * The amount of time in seconds for zswap is considered "idle" and periodic
+ * writeback begins
+ */
+static int zswap_pwb_idle_secs = 30;
+
+/*
+ * The delay between iterations of periodic writeback
+ */
+static unsigned long zswap_pwb_delay_secs = 1;
+
+/*
+ * The number of pages to attempt to writeback on each iteration of the periodic
+ * writeback thread
+ */
+static int zswap_pwb_writeback_pages = 32;
+
 /*********************************
 * compression functions
 **********************************/
@@ -199,6 +219,7 @@ struct zswap_entry {
  * The tree lock in the zswap_tree struct protects a few things:
  * - the rbtree
  * - the lru list
+ * - starting/modifying the pwb_work timer
  * - the refcount field of each entry in the tree
  */
 struct zswap_tree {
@@ -207,6 +228,7 @@ struct zswap_tree {
 	spinlock_t lock;
 	struct zs_pool *pool;
 	unsigned type;
+	struct delayed_work pwb_work;
 };
 
 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
@@ -492,7 +514,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 		 * called after lookup_swap_cache() failed, re-calling
 		 * that would confuse statistics.
 		 */
-		found_page = find_get_page(&swapper_space, entry.val);
+		found_page = find_lock_page(&swapper_space, entry.val);
 		if (found_page)
 			break;
 
@@ -588,9 +610,8 @@ static int zswap_writeback_entry(struct zswap_tree *tree, struct zswap_entry *en
 		break; /* not reached */
 
 	case ZSWAP_SWAPCACHE_EXIST: /* page is unlocked */
-		/* page is already in the swap cache, ignore for now */
-		return -EEXIST;
-		break; /* not reached */
+		/* page is already in the swap cache, no need to decompress */
+		break;
 
 	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
 		/* decompress */
@@ -698,6 +719,26 @@ static int zswap_writeback_entries(struct zswap_tree *tree, int nr)
 	return freed_nr++;
 }
 
+/*********************************
+* periodic writeback (pwb)
+**********************************/
+void zswap_pwb_work(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct zswap_tree *tree;
+
+	dwork  = to_delayed_work(work);
+	tree = container_of(dwork, struct zswap_tree, pwb_work);
+
+	zswap_writeback_entries(tree, zswap_pwb_writeback_pages);
+
+	spin_lock(&tree->lock);
+	if (!list_empty(&tree->lru))
+		schedule_delayed_work(&tree->pwb_work,
+			msecs_to_jiffies(MSEC_PER_SEC * zswap_pwb_delay_secs));
+	spin_unlock(&tree->lock);
+}
+
 /*******************************************
 * page pool for temporary compression result
 ********************************************/
@@ -854,8 +895,18 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	entry->handle = handle;
 	entry->length = dlen;
 
-	/* map */
 	spin_lock(&tree->lock);
+
+	if (RB_EMPTY_ROOT(&tree->rbroot))
+		/* schedule delayed periodic writeback work */
+		schedule_delayed_work(&tree->pwb_work,
+			msecs_to_jiffies(MSEC_PER_SEC * zswap_pwb_idle_secs));
+	else
+		/* update delay on already scheduled delayed work */
+		mod_delayed_work(system_wq, &tree->pwb_work,
+			msecs_to_jiffies(MSEC_PER_SEC * zswap_pwb_idle_secs));
+
+	/* map */
 	do {
 		ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
 		if (ret == -EEXIST) {
@@ -1001,6 +1052,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
 	 * If post-order traversal code is ever added to the rbtree
 	 * implementation, it should be used here.
 	 */
+	cancel_delayed_work_sync(&tree->pwb_work);
 	while ((node = rb_first(&tree->rbroot))) {
 		entry = rb_entry(node, struct zswap_entry, rbnode);
 		rb_erase(&entry->rbnode, &tree->rbroot);
@@ -1027,6 +1079,7 @@ static void zswap_frontswap_init(unsigned type)
 	INIT_LIST_HEAD(&tree->lru);
 	spin_lock_init(&tree->lock);
 	tree->type = type;
+	INIT_DELAYED_WORK(&tree->pwb_work, zswap_pwb_work);
 	zswap_trees[type] = tree;
 	return;
 
-- 
1.7.9.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Seth Jennings <sjenning@linux.vnet.ibm.com>
To: Dan Magenheimer <dan.magenheimer@oracle.com>,
	minchan@kernel.org, Nitin Gupta <nitingupta910@gmail.com>,
	Konrad Wilk <konrad.wilk@oracle.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Bob Liu <lliubbo@gmail.com>,
	Luigi Semenzato <semenzato@google.com>,
	Mel Gorman <mgorman@suse.de>
Subject: Re: zsmalloc limitations and related topics
Date: Wed, 13 Mar 2013 10:33:57 -0500	[thread overview]
Message-ID: <51409C65.1040207@linux.vnet.ibm.com> (raw)
In-Reply-To: <20130313151359.GA3130@linux.vnet.ibm.com>

The periodic writeback that Rob mentions would go something like this
for zswap:

---
 mm/filemap.c |    3 +--
 mm/zswap.c   |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee7..fe63e95 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -735,12 +735,11 @@ repeat:
 	if (page && !radix_tree_exception(page)) {
 		lock_page(page);
 		/* Has the page been truncated? */
-		if (unlikely(page->mapping != mapping)) {
+		if (unlikely(page_mapping(page) != mapping)) {
 			unlock_page(page);
 			page_cache_release(page);
 			goto repeat;
 		}
-		VM_BUG_ON(page->index != offset);
 	}
 	return page;
 }
diff --git a/mm/zswap.c b/mm/zswap.c
index 82b8d59..0b2351e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -42,6 +42,9 @@
 #include <linux/writeback.h>
 #include <linux/pagemap.h>
 
+#include <linux/workqueue.h>
+#include <linux/time.h>
+
 /*********************************
 * statistics
 **********************************/
@@ -102,6 +105,23 @@ module_param_named(max_compression_ratio,
 */
 #define ZSWAP_MAX_OUTSTANDING_FLUSHES 64
 
+/*
+ * The amount of time in seconds for zswap is considered "idle" and periodic
+ * writeback begins
+ */
+static int zswap_pwb_idle_secs = 30;
+
+/*
+ * The delay between iterations of periodic writeback
+ */
+static unsigned long zswap_pwb_delay_secs = 1;
+
+/*
+ * The number of pages to attempt to writeback on each iteration of the periodic
+ * writeback thread
+ */
+static int zswap_pwb_writeback_pages = 32;
+
 /*********************************
 * compression functions
 **********************************/
@@ -199,6 +219,7 @@ struct zswap_entry {
  * The tree lock in the zswap_tree struct protects a few things:
  * - the rbtree
  * - the lru list
+ * - starting/modifying the pwb_work timer
  * - the refcount field of each entry in the tree
  */
 struct zswap_tree {
@@ -207,6 +228,7 @@ struct zswap_tree {
 	spinlock_t lock;
 	struct zs_pool *pool;
 	unsigned type;
+	struct delayed_work pwb_work;
 };
 
 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
@@ -492,7 +514,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 		 * called after lookup_swap_cache() failed, re-calling
 		 * that would confuse statistics.
 		 */
-		found_page = find_get_page(&swapper_space, entry.val);
+		found_page = find_lock_page(&swapper_space, entry.val);
 		if (found_page)
 			break;
 
@@ -588,9 +610,8 @@ static int zswap_writeback_entry(struct zswap_tree *tree, struct zswap_entry *en
 		break; /* not reached */
 
 	case ZSWAP_SWAPCACHE_EXIST: /* page is unlocked */
-		/* page is already in the swap cache, ignore for now */
-		return -EEXIST;
-		break; /* not reached */
+		/* page is already in the swap cache, no need to decompress */
+		break;
 
 	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
 		/* decompress */
@@ -698,6 +719,26 @@ static int zswap_writeback_entries(struct zswap_tree *tree, int nr)
 	return freed_nr++;
 }
 
+/*********************************
+* periodic writeback (pwb)
+**********************************/
+void zswap_pwb_work(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct zswap_tree *tree;
+
+	dwork  = to_delayed_work(work);
+	tree = container_of(dwork, struct zswap_tree, pwb_work);
+
+	zswap_writeback_entries(tree, zswap_pwb_writeback_pages);
+
+	spin_lock(&tree->lock);
+	if (!list_empty(&tree->lru))
+		schedule_delayed_work(&tree->pwb_work,
+			msecs_to_jiffies(MSEC_PER_SEC * zswap_pwb_delay_secs));
+	spin_unlock(&tree->lock);
+}
+
 /*******************************************
 * page pool for temporary compression result
 ********************************************/
@@ -854,8 +895,18 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	entry->handle = handle;
 	entry->length = dlen;
 
-	/* map */
 	spin_lock(&tree->lock);
+
+	if (RB_EMPTY_ROOT(&tree->rbroot))
+		/* schedule delayed periodic writeback work */
+		schedule_delayed_work(&tree->pwb_work,
+			msecs_to_jiffies(MSEC_PER_SEC * zswap_pwb_idle_secs));
+	else
+		/* update delay on already scheduled delayed work */
+		mod_delayed_work(system_wq, &tree->pwb_work,
+			msecs_to_jiffies(MSEC_PER_SEC * zswap_pwb_idle_secs));
+
+	/* map */
 	do {
 		ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
 		if (ret == -EEXIST) {
@@ -1001,6 +1052,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
 	 * If post-order traversal code is ever added to the rbtree
 	 * implementation, it should be used here.
 	 */
+	cancel_delayed_work_sync(&tree->pwb_work);
 	while ((node = rb_first(&tree->rbroot))) {
 		entry = rb_entry(node, struct zswap_entry, rbnode);
 		rb_erase(&entry->rbnode, &tree->rbroot);
@@ -1027,6 +1079,7 @@ static void zswap_frontswap_init(unsigned type)
 	INIT_LIST_HEAD(&tree->lru);
 	spin_lock_init(&tree->lock);
 	tree->type = type;
+	INIT_DELAYED_WORK(&tree->pwb_work, zswap_pwb_work);
 	zswap_trees[type] = tree;
 	return;
 
-- 
1.7.9.5


  reply	other threads:[~2013-03-13 15:34 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-27 23:24 zsmalloc limitations and related topics Dan Magenheimer
2013-02-27 23:24 ` Dan Magenheimer
2013-02-28 22:00 ` Dan Magenheimer
2013-02-28 22:00   ` Dan Magenheimer
2013-03-01  1:40 ` Ric Mason
2013-03-01  1:40   ` Ric Mason
2013-03-04 18:29   ` Dan Magenheimer
2013-03-04 18:29     ` Dan Magenheimer
2013-03-13 15:14 ` Robert Jennings
2013-03-13 15:14   ` Robert Jennings
2013-03-13 15:33   ` Seth Jennings [this message]
2013-03-13 15:33     ` Seth Jennings
2013-03-13 15:56     ` Seth Jennings
2013-03-13 15:56       ` Seth Jennings
2013-03-13 20:02   ` Dan Magenheimer
2013-03-13 20:02     ` Dan Magenheimer
2013-03-13 22:59     ` Seth Jennings
2013-03-13 22:59       ` Seth Jennings
2013-03-14 12:02       ` Bob
2013-03-14 12:02         ` Bob
2013-03-14 13:20         ` Robert Jennings
2013-03-14 13:20           ` Robert Jennings
2013-03-14 18:54           ` Dan Magenheimer
2013-03-14 18:54             ` Dan Magenheimer
2013-03-15 16:14             ` Seth Jennings
2013-03-15 16:14               ` Seth Jennings
2013-03-15 16:54               ` Dan Magenheimer
2013-03-15 16:54                 ` Dan Magenheimer
2013-03-15 16:18             ` Seth Jennings
2013-03-15 16:18               ` Seth Jennings
2013-03-14 17:39       ` Dan Magenheimer
2013-03-14 17:39         ` Dan Magenheimer
2013-03-14 19:16     ` Dan Magenheimer
2013-03-14 19:16       ` Dan Magenheimer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51409C65.1040207@linux.vnet.ibm.com \
    --to=sjenning@linux.vnet.ibm.com \
    --cc=dan.magenheimer@oracle.com \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lliubbo@gmail.com \
    --cc=mgorman@suse.de \
    --cc=minchan@kernel.org \
    --cc=nitingupta910@gmail.com \
    --cc=semenzato@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.