All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20140123030242.GA28732@bbox>

diff --git a/a/1.txt b/N1/1.txt
index 3c5f43d..8c71760 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -205,3 +205,230 @@ and like the Weijie's suggestion.
 So, how about this?
 I didn't look at code in detail and want to show the concept.
 That's why I added RFC tag.
+
+>From 67c64746e977a091ee30ca37bbc034990adf5ca5 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Thu, 23 Jan 2014 11:41:44 +0900
+Subject: [RFC] zswap: support multiple swap
+
+Cai Liu reporeted that now zbud pool pages counting has a problem
+when multiple swap is used because it just counts one of swap
+among mutliple swap intead of all of swap so zswap cannot control
+writeback properly. The result is unnecessary writeback or
+no writeback when we should really writeback. IOW, it made zswap
+crazy.
+
+Another problem in zswap is following as.
+For example, let's assume we use two swap A and B with different
+priority and A already has charged 19% long time ago and let's assume
+that A swap is full now so VM start to use B so that B has charged 1%
+recently. It menas zswap charged (19% + 1%) is full by default.
+Then, if VM want to swap out more pages into B, zbud_reclaim_page
+would be evict one of pages in B's pool and it would be repeated
+continuously. It's totally LRU reverse problem and swap thrashing
+in B would happen.
+
+This patch makes zswap consider mutliple swap by creating *a* zbud
+pool which will be shared by multiple swap so all of zswap pages
+in multiple swap keep order by LRU so it can prevent above two
+problems.
+
+Reported-by: Cai Liu <cai.liu@samsung.com>
+Suggested-by: Weijie Yang <weijie.yang.kh@gmail.com>
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+---
+ mm/zswap.c | 56 +++++++++++++++++++++++++++++---------------------------
+ 1 file changed, 29 insertions(+), 27 deletions(-)
+
+diff --git a/mm/zswap.c b/mm/zswap.c
+index 5a63f78a5601..96039e86db79 100644
+--- a/mm/zswap.c
++++ b/mm/zswap.c
+@@ -89,6 +89,8 @@ static unsigned int zswap_max_pool_percent = 20;
+ module_param_named(max_pool_percent,
+ 			zswap_max_pool_percent, uint, 0644);
+ 
++static struct zbud_pool *mem_pool;
++
+ /*********************************
+ * compression functions
+ **********************************/
+@@ -189,7 +191,6 @@ struct zswap_header {
+ struct zswap_tree {
+ 	struct rb_root rbroot;
+ 	spinlock_t lock;
+-	struct zbud_pool *pool;
+ };
+ 
+ static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
+@@ -288,10 +289,10 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
+ static void zswap_free_entry(struct zswap_tree *tree,
+ 			struct zswap_entry *entry)
+ {
+-	zbud_free(tree->pool, entry->handle);
++	zbud_free(mem_pool, entry->handle);
+ 	zswap_entry_cache_free(entry);
+ 	atomic_dec(&zswap_stored_pages);
+-	zswap_pool_pages = zbud_get_pool_size(tree->pool);
++	zswap_pool_pages = zbud_get_pool_size(mem_pool);
+ }
+ 
+ /* caller must hold the tree lock */
+@@ -545,7 +546,7 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+ 	zbud_unmap(pool, handle);
+ 	tree = zswap_trees[swp_type(swpentry)];
+ 	offset = swp_offset(swpentry);
+-	BUG_ON(pool != tree->pool);
++	BUG_ON(pool != mem_pool);
+ 
+ 	/* find and ref zswap entry */
+ 	spin_lock(&tree->lock);
+@@ -573,13 +574,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+ 	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
+ 		/* decompress */
+ 		dlen = PAGE_SIZE;
+-		src = (u8 *)zbud_map(tree->pool, entry->handle) +
++		src = (u8 *)zbud_map(mem_pool, entry->handle) +
+ 			sizeof(struct zswap_header);
+ 		dst = kmap_atomic(page);
+ 		ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
+ 				entry->length, dst, &dlen);
+ 		kunmap_atomic(dst);
+-		zbud_unmap(tree->pool, entry->handle);
++		zbud_unmap(mem_pool, entry->handle);
+ 		BUG_ON(ret);
+ 		BUG_ON(dlen != PAGE_SIZE);
+ 
+@@ -652,7 +653,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
+ 	/* reclaim space if needed */
+ 	if (zswap_is_full()) {
+ 		zswap_pool_limit_hit++;
+-		if (zbud_reclaim_page(tree->pool, 8)) {
++		if (zbud_reclaim_page(mem_pool, 8)) {
+ 			zswap_reject_reclaim_fail++;
+ 			ret = -ENOMEM;
+ 			goto reject;
+@@ -679,7 +680,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
+ 
+ 	/* store */
+ 	len = dlen + sizeof(struct zswap_header);
+-	ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,
++	ret = zbud_alloc(mem_pool, len, __GFP_NORETRY | __GFP_NOWARN,
+ 		&handle);
+ 	if (ret == -ENOSPC) {
+ 		zswap_reject_compress_poor++;
+@@ -689,11 +690,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
+ 		zswap_reject_alloc_fail++;
+ 		goto freepage;
+ 	}
+-	zhdr = zbud_map(tree->pool, handle);
++	zhdr = zbud_map(mem_pool, handle);
+ 	zhdr->swpentry = swp_entry(type, offset);
+ 	buf = (u8 *)(zhdr + 1);
+ 	memcpy(buf, dst, dlen);
+-	zbud_unmap(tree->pool, handle);
++	zbud_unmap(mem_pool, handle);
+ 	put_cpu_var(zswap_dstmem);
+ 
+ 	/* populate entry */
+@@ -716,7 +717,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
+ 
+ 	/* update stats */
+ 	atomic_inc(&zswap_stored_pages);
+-	zswap_pool_pages = zbud_get_pool_size(tree->pool);
++	zswap_pool_pages = zbud_get_pool_size(mem_pool);
+ 
+ 	return 0;
+ 
+@@ -752,13 +753,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
+ 
+ 	/* decompress */
+ 	dlen = PAGE_SIZE;
+-	src = (u8 *)zbud_map(tree->pool, entry->handle) +
++	src = (u8 *)zbud_map(mem_pool, entry->handle) +
+ 			sizeof(struct zswap_header);
+ 	dst = kmap_atomic(page);
+ 	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
+ 		dst, &dlen);
+ 	kunmap_atomic(dst);
+-	zbud_unmap(tree->pool, entry->handle);
++	zbud_unmap(mem_pool, entry->handle);
+ 	BUG_ON(ret);
+ 
+ 	spin_lock(&tree->lock);
+@@ -807,8 +808,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
+ 		zswap_free_entry(tree, entry);
+ 	tree->rbroot = RB_ROOT;
+ 	spin_unlock(&tree->lock);
+-
+-	zbud_destroy_pool(tree->pool);
+ 	kfree(tree);
+ 	zswap_trees[type] = NULL;
+ }
+@@ -822,20 +821,14 @@ static void zswap_frontswap_init(unsigned type)
+ 	struct zswap_tree *tree;
+ 
+ 	tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
+-	if (!tree)
+-		goto err;
+-	tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+-	if (!tree->pool)
+-		goto freetree;
++	if (!tree) {
++		pr_err("alloc failed, zswap disabled for swap type %d\n", type);
++		return;
++	}
++
+ 	tree->rbroot = RB_ROOT;
+ 	spin_lock_init(&tree->lock);
+ 	zswap_trees[type] = tree;
+-	return;
+-
+-freetree:
+-	kfree(tree);
+-err:
+-	pr_err("alloc failed, zswap disabled for swap type %d\n", type);
+ }
+ 
+ static struct frontswap_ops zswap_frontswap_ops = {
+@@ -907,9 +900,14 @@ static int __init init_zswap(void)
+ 		return 0;
+ 
+ 	pr_info("loading zswap\n");
++
++	mem_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
++	if (!mem_pool)
++		goto error;
++
+ 	if (zswap_entry_cache_create()) {
+ 		pr_err("entry cache creation failed\n");
+-		goto error;
++		goto cachefail;
+ 	}
+ 	if (zswap_comp_init()) {
+ 		pr_err("compressor initialization failed\n");
+@@ -919,6 +917,8 @@ static int __init init_zswap(void)
+ 		pr_err("per-cpu initialization failed\n");
+ 		goto pcpufail;
+ 	}
++
++
+ 	frontswap_register_ops(&zswap_frontswap_ops);
+ 	if (zswap_debugfs_init())
+ 		pr_warn("debugfs initialization failed\n");
+@@ -927,6 +927,8 @@ pcpufail:
+ 	zswap_comp_exit();
+ compfail:
+ 	zswap_entry_cache_destory();
++cachefail:
++	zbud_destroy_pool(mem_pool);
+ error:
+ 	return -ENOMEM;
+ }
+-- 
+1.8.5.2
+
+
+-- 
+Kind regards,
+Minchan Kim
diff --git a/a/content_digest b/N1/content_digest
index cf48532..0268da4 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -227,6 +227,233 @@
  "\n"
  "So, how about this?\n"
  "I didn't look at code in detail and want to show the concept.\n"
- That's why I added RFC tag.
+ "That's why I added RFC tag.\n"
+ "\n"
+ ">From 67c64746e977a091ee30ca37bbc034990adf5ca5 Mon Sep 17 00:00:00 2001\n"
+ "From: Minchan Kim <minchan@kernel.org>\n"
+ "Date: Thu, 23 Jan 2014 11:41:44 +0900\n"
+ "Subject: [RFC] zswap: support multiple swap\n"
+ "\n"
+ "Cai Liu reporeted that now zbud pool pages counting has a problem\n"
+ "when multiple swap is used because it just counts one of swap\n"
+ "among mutliple swap intead of all of swap so zswap cannot control\n"
+ "writeback properly. The result is unnecessary writeback or\n"
+ "no writeback when we should really writeback. IOW, it made zswap\n"
+ "crazy.\n"
+ "\n"
+ "Another problem in zswap is following as.\n"
+ "For example, let's assume we use two swap A and B with different\n"
+ "priority and A already has charged 19% long time ago and let's assume\n"
+ "that A swap is full now so VM start to use B so that B has charged 1%\n"
+ "recently. It menas zswap charged (19% + 1%) is full by default.\n"
+ "Then, if VM want to swap out more pages into B, zbud_reclaim_page\n"
+ "would be evict one of pages in B's pool and it would be repeated\n"
+ "continuously. It's totally LRU reverse problem and swap thrashing\n"
+ "in B would happen.\n"
+ "\n"
+ "This patch makes zswap consider mutliple swap by creating *a* zbud\n"
+ "pool which will be shared by multiple swap so all of zswap pages\n"
+ "in multiple swap keep order by LRU so it can prevent above two\n"
+ "problems.\n"
+ "\n"
+ "Reported-by: Cai Liu <cai.liu@samsung.com>\n"
+ "Suggested-by: Weijie Yang <weijie.yang.kh@gmail.com>\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "---\n"
+ " mm/zswap.c | 56 +++++++++++++++++++++++++++++---------------------------\n"
+ " 1 file changed, 29 insertions(+), 27 deletions(-)\n"
+ "\n"
+ "diff --git a/mm/zswap.c b/mm/zswap.c\n"
+ "index 5a63f78a5601..96039e86db79 100644\n"
+ "--- a/mm/zswap.c\n"
+ "+++ b/mm/zswap.c\n"
+ "@@ -89,6 +89,8 @@ static unsigned int zswap_max_pool_percent = 20;\n"
+ " module_param_named(max_pool_percent,\n"
+ " \t\t\tzswap_max_pool_percent, uint, 0644);\n"
+ " \n"
+ "+static struct zbud_pool *mem_pool;\n"
+ "+\n"
+ " /*********************************\n"
+ " * compression functions\n"
+ " **********************************/\n"
+ "@@ -189,7 +191,6 @@ struct zswap_header {\n"
+ " struct zswap_tree {\n"
+ " \tstruct rb_root rbroot;\n"
+ " \tspinlock_t lock;\n"
+ "-\tstruct zbud_pool *pool;\n"
+ " };\n"
+ " \n"
+ " static struct zswap_tree *zswap_trees[MAX_SWAPFILES];\n"
+ "@@ -288,10 +289,10 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)\n"
+ " static void zswap_free_entry(struct zswap_tree *tree,\n"
+ " \t\t\tstruct zswap_entry *entry)\n"
+ " {\n"
+ "-\tzbud_free(tree->pool, entry->handle);\n"
+ "+\tzbud_free(mem_pool, entry->handle);\n"
+ " \tzswap_entry_cache_free(entry);\n"
+ " \tatomic_dec(&zswap_stored_pages);\n"
+ "-\tzswap_pool_pages = zbud_get_pool_size(tree->pool);\n"
+ "+\tzswap_pool_pages = zbud_get_pool_size(mem_pool);\n"
+ " }\n"
+ " \n"
+ " /* caller must hold the tree lock */\n"
+ "@@ -545,7 +546,7 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)\n"
+ " \tzbud_unmap(pool, handle);\n"
+ " \ttree = zswap_trees[swp_type(swpentry)];\n"
+ " \toffset = swp_offset(swpentry);\n"
+ "-\tBUG_ON(pool != tree->pool);\n"
+ "+\tBUG_ON(pool != mem_pool);\n"
+ " \n"
+ " \t/* find and ref zswap entry */\n"
+ " \tspin_lock(&tree->lock);\n"
+ "@@ -573,13 +574,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)\n"
+ " \tcase ZSWAP_SWAPCACHE_NEW: /* page is locked */\n"
+ " \t\t/* decompress */\n"
+ " \t\tdlen = PAGE_SIZE;\n"
+ "-\t\tsrc = (u8 *)zbud_map(tree->pool, entry->handle) +\n"
+ "+\t\tsrc = (u8 *)zbud_map(mem_pool, entry->handle) +\n"
+ " \t\t\tsizeof(struct zswap_header);\n"
+ " \t\tdst = kmap_atomic(page);\n"
+ " \t\tret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,\n"
+ " \t\t\t\tentry->length, dst, &dlen);\n"
+ " \t\tkunmap_atomic(dst);\n"
+ "-\t\tzbud_unmap(tree->pool, entry->handle);\n"
+ "+\t\tzbud_unmap(mem_pool, entry->handle);\n"
+ " \t\tBUG_ON(ret);\n"
+ " \t\tBUG_ON(dlen != PAGE_SIZE);\n"
+ " \n"
+ "@@ -652,7 +653,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n"
+ " \t/* reclaim space if needed */\n"
+ " \tif (zswap_is_full()) {\n"
+ " \t\tzswap_pool_limit_hit++;\n"
+ "-\t\tif (zbud_reclaim_page(tree->pool, 8)) {\n"
+ "+\t\tif (zbud_reclaim_page(mem_pool, 8)) {\n"
+ " \t\t\tzswap_reject_reclaim_fail++;\n"
+ " \t\t\tret = -ENOMEM;\n"
+ " \t\t\tgoto reject;\n"
+ "@@ -679,7 +680,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n"
+ " \n"
+ " \t/* store */\n"
+ " \tlen = dlen + sizeof(struct zswap_header);\n"
+ "-\tret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,\n"
+ "+\tret = zbud_alloc(mem_pool, len, __GFP_NORETRY | __GFP_NOWARN,\n"
+ " \t\t&handle);\n"
+ " \tif (ret == -ENOSPC) {\n"
+ " \t\tzswap_reject_compress_poor++;\n"
+ "@@ -689,11 +690,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n"
+ " \t\tzswap_reject_alloc_fail++;\n"
+ " \t\tgoto freepage;\n"
+ " \t}\n"
+ "-\tzhdr = zbud_map(tree->pool, handle);\n"
+ "+\tzhdr = zbud_map(mem_pool, handle);\n"
+ " \tzhdr->swpentry = swp_entry(type, offset);\n"
+ " \tbuf = (u8 *)(zhdr + 1);\n"
+ " \tmemcpy(buf, dst, dlen);\n"
+ "-\tzbud_unmap(tree->pool, handle);\n"
+ "+\tzbud_unmap(mem_pool, handle);\n"
+ " \tput_cpu_var(zswap_dstmem);\n"
+ " \n"
+ " \t/* populate entry */\n"
+ "@@ -716,7 +717,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n"
+ " \n"
+ " \t/* update stats */\n"
+ " \tatomic_inc(&zswap_stored_pages);\n"
+ "-\tzswap_pool_pages = zbud_get_pool_size(tree->pool);\n"
+ "+\tzswap_pool_pages = zbud_get_pool_size(mem_pool);\n"
+ " \n"
+ " \treturn 0;\n"
+ " \n"
+ "@@ -752,13 +753,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,\n"
+ " \n"
+ " \t/* decompress */\n"
+ " \tdlen = PAGE_SIZE;\n"
+ "-\tsrc = (u8 *)zbud_map(tree->pool, entry->handle) +\n"
+ "+\tsrc = (u8 *)zbud_map(mem_pool, entry->handle) +\n"
+ " \t\t\tsizeof(struct zswap_header);\n"
+ " \tdst = kmap_atomic(page);\n"
+ " \tret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,\n"
+ " \t\tdst, &dlen);\n"
+ " \tkunmap_atomic(dst);\n"
+ "-\tzbud_unmap(tree->pool, entry->handle);\n"
+ "+\tzbud_unmap(mem_pool, entry->handle);\n"
+ " \tBUG_ON(ret);\n"
+ " \n"
+ " \tspin_lock(&tree->lock);\n"
+ "@@ -807,8 +808,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)\n"
+ " \t\tzswap_free_entry(tree, entry);\n"
+ " \ttree->rbroot = RB_ROOT;\n"
+ " \tspin_unlock(&tree->lock);\n"
+ "-\n"
+ "-\tzbud_destroy_pool(tree->pool);\n"
+ " \tkfree(tree);\n"
+ " \tzswap_trees[type] = NULL;\n"
+ " }\n"
+ "@@ -822,20 +821,14 @@ static void zswap_frontswap_init(unsigned type)\n"
+ " \tstruct zswap_tree *tree;\n"
+ " \n"
+ " \ttree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);\n"
+ "-\tif (!tree)\n"
+ "-\t\tgoto err;\n"
+ "-\ttree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);\n"
+ "-\tif (!tree->pool)\n"
+ "-\t\tgoto freetree;\n"
+ "+\tif (!tree) {\n"
+ "+\t\tpr_err(\"alloc failed, zswap disabled for swap type %d\\n\", type);\n"
+ "+\t\treturn;\n"
+ "+\t}\n"
+ "+\n"
+ " \ttree->rbroot = RB_ROOT;\n"
+ " \tspin_lock_init(&tree->lock);\n"
+ " \tzswap_trees[type] = tree;\n"
+ "-\treturn;\n"
+ "-\n"
+ "-freetree:\n"
+ "-\tkfree(tree);\n"
+ "-err:\n"
+ "-\tpr_err(\"alloc failed, zswap disabled for swap type %d\\n\", type);\n"
+ " }\n"
+ " \n"
+ " static struct frontswap_ops zswap_frontswap_ops = {\n"
+ "@@ -907,9 +900,14 @@ static int __init init_zswap(void)\n"
+ " \t\treturn 0;\n"
+ " \n"
+ " \tpr_info(\"loading zswap\\n\");\n"
+ "+\n"
+ "+\tmem_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);\n"
+ "+\tif (!mem_pool)\n"
+ "+\t\tgoto error;\n"
+ "+\n"
+ " \tif (zswap_entry_cache_create()) {\n"
+ " \t\tpr_err(\"entry cache creation failed\\n\");\n"
+ "-\t\tgoto error;\n"
+ "+\t\tgoto cachefail;\n"
+ " \t}\n"
+ " \tif (zswap_comp_init()) {\n"
+ " \t\tpr_err(\"compressor initialization failed\\n\");\n"
+ "@@ -919,6 +917,8 @@ static int __init init_zswap(void)\n"
+ " \t\tpr_err(\"per-cpu initialization failed\\n\");\n"
+ " \t\tgoto pcpufail;\n"
+ " \t}\n"
+ "+\n"
+ "+\n"
+ " \tfrontswap_register_ops(&zswap_frontswap_ops);\n"
+ " \tif (zswap_debugfs_init())\n"
+ " \t\tpr_warn(\"debugfs initialization failed\\n\");\n"
+ "@@ -927,6 +927,8 @@ pcpufail:\n"
+ " \tzswap_comp_exit();\n"
+ " compfail:\n"
+ " \tzswap_entry_cache_destory();\n"
+ "+cachefail:\n"
+ "+\tzbud_destroy_pool(mem_pool);\n"
+ " error:\n"
+ " \treturn -ENOMEM;\n"
+ " }\n"
+ "-- \n"
+ "1.8.5.2\n"
+ "\n"
+ "\n"
+ "-- \n"
+ "Kind regards,\n"
+ Minchan Kim
 
-b1dde008946c3718c5836ea6bf83ca425004c9699097bad6f44cfcd581816f7a
+5016db950fd03258134ce1ba1c5610f806b2aa6234588d01e8a3d4b645814daa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.