diff for duplicates of <20140123030242.GA28732@bbox> diff --git a/a/1.txt b/N1/1.txt index 3c5f43d..8c71760 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -205,3 +205,230 @@ and like the Weijie's suggestion. So, how about this? I didn't look at code in detail and want to show the concept. That's why I added RFC tag. + +>From 67c64746e977a091ee30ca37bbc034990adf5ca5 Mon Sep 17 00:00:00 2001 +From: Minchan Kim <minchan@kernel.org> +Date: Thu, 23 Jan 2014 11:41:44 +0900 +Subject: [RFC] zswap: support multiple swap + +Cai Liu reporeted that now zbud pool pages counting has a problem +when multiple swap is used because it just counts one of swap +among mutliple swap intead of all of swap so zswap cannot control +writeback properly. The result is unnecessary writeback or +no writeback when we should really writeback. IOW, it made zswap +crazy. + +Another problem in zswap is following as. +For example, let's assume we use two swap A and B with different +priority and A already has charged 19% long time ago and let's assume +that A swap is full now so VM start to use B so that B has charged 1% +recently. It menas zswap charged (19% + 1%) is full by default. +Then, if VM want to swap out more pages into B, zbud_reclaim_page +would be evict one of pages in B's pool and it would be repeated +continuously. It's totally LRU reverse problem and swap thrashing +in B would happen. + +This patch makes zswap consider mutliple swap by creating *a* zbud +pool which will be shared by multiple swap so all of zswap pages +in multiple swap keep order by LRU so it can prevent above two +problems. + +Reported-by: Cai Liu <cai.liu@samsung.com> +Suggested-by: Weijie Yang <weijie.yang.kh@gmail.com> +Signed-off-by: Minchan Kim <minchan@kernel.org> +--- + mm/zswap.c | 56 +++++++++++++++++++++++++++++--------------------------- + 1 file changed, 29 insertions(+), 27 deletions(-) + +diff --git a/mm/zswap.c b/mm/zswap.c +index 5a63f78a5601..96039e86db79 100644 +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@ -89,6 +89,8 @@ static unsigned int zswap_max_pool_percent = 20; + module_param_named(max_pool_percent, + zswap_max_pool_percent, uint, 0644); + ++static struct zbud_pool *mem_pool; ++ + /********************************* + * compression functions + **********************************/ +@@ -189,7 +191,6 @@ struct zswap_header { + struct zswap_tree { + struct rb_root rbroot; + spinlock_t lock; +- struct zbud_pool *pool; + }; + + static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; +@@ -288,10 +289,10 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) + static void zswap_free_entry(struct zswap_tree *tree, + struct zswap_entry *entry) + { +- zbud_free(tree->pool, entry->handle); ++ zbud_free(mem_pool, entry->handle); + zswap_entry_cache_free(entry); + atomic_dec(&zswap_stored_pages); +- zswap_pool_pages = zbud_get_pool_size(tree->pool); ++ zswap_pool_pages = zbud_get_pool_size(mem_pool); + } + + /* caller must hold the tree lock */ +@@ -545,7 +546,7 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle) + zbud_unmap(pool, handle); + tree = zswap_trees[swp_type(swpentry)]; + offset = swp_offset(swpentry); +- BUG_ON(pool != tree->pool); ++ BUG_ON(pool != mem_pool); + + /* find and ref zswap entry */ + spin_lock(&tree->lock); +@@ -573,13 +574,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle) + case ZSWAP_SWAPCACHE_NEW: /* page is locked */ + /* decompress */ + dlen = PAGE_SIZE; +- src = (u8 *)zbud_map(tree->pool, entry->handle) + ++ src = (u8 *)zbud_map(mem_pool, entry->handle) + + sizeof(struct zswap_header); + dst = kmap_atomic(page); + ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, + entry->length, dst, &dlen); + kunmap_atomic(dst); +- zbud_unmap(tree->pool, entry->handle); ++ zbud_unmap(mem_pool, entry->handle); + BUG_ON(ret); + BUG_ON(dlen != PAGE_SIZE); + +@@ -652,7 +653,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, + /* reclaim space if needed */ + if (zswap_is_full()) { + zswap_pool_limit_hit++; +- if (zbud_reclaim_page(tree->pool, 8)) { ++ if (zbud_reclaim_page(mem_pool, 8)) { + zswap_reject_reclaim_fail++; + ret = -ENOMEM; + goto reject; +@@ -679,7 +680,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, + + /* store */ + len = dlen + sizeof(struct zswap_header); +- ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN, ++ ret = zbud_alloc(mem_pool, len, __GFP_NORETRY | __GFP_NOWARN, + &handle); + if (ret == -ENOSPC) { + zswap_reject_compress_poor++; +@@ -689,11 +690,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, + zswap_reject_alloc_fail++; + goto freepage; + } +- zhdr = zbud_map(tree->pool, handle); ++ zhdr = zbud_map(mem_pool, handle); + zhdr->swpentry = swp_entry(type, offset); + buf = (u8 *)(zhdr + 1); + memcpy(buf, dst, dlen); +- zbud_unmap(tree->pool, handle); ++ zbud_unmap(mem_pool, handle); + put_cpu_var(zswap_dstmem); + + /* populate entry */ +@@ -716,7 +717,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, + + /* update stats */ + atomic_inc(&zswap_stored_pages); +- zswap_pool_pages = zbud_get_pool_size(tree->pool); ++ zswap_pool_pages = zbud_get_pool_size(mem_pool); + + return 0; + +@@ -752,13 +753,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, + + /* decompress */ + dlen = PAGE_SIZE; +- src = (u8 *)zbud_map(tree->pool, entry->handle) + ++ src = (u8 *)zbud_map(mem_pool, entry->handle) + + sizeof(struct zswap_header); + dst = kmap_atomic(page); + ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, + dst, &dlen); + kunmap_atomic(dst); +- zbud_unmap(tree->pool, entry->handle); ++ zbud_unmap(mem_pool, entry->handle); + BUG_ON(ret); + + spin_lock(&tree->lock); +@@ -807,8 +808,6 @@ static void zswap_frontswap_invalidate_area(unsigned type) + zswap_free_entry(tree, entry); + tree->rbroot = RB_ROOT; + spin_unlock(&tree->lock); +- +- zbud_destroy_pool(tree->pool); + kfree(tree); + zswap_trees[type] = NULL; + } +@@ -822,20 +821,14 @@ static void zswap_frontswap_init(unsigned type) + struct zswap_tree *tree; + + tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL); +- if (!tree) +- goto err; +- tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); +- if (!tree->pool) +- goto freetree; ++ if (!tree) { ++ pr_err("alloc failed, zswap disabled for swap type %d\n", type); ++ return; ++ } ++ + tree->rbroot = RB_ROOT; + spin_lock_init(&tree->lock); + zswap_trees[type] = tree; +- return; +- +-freetree: +- kfree(tree); +-err: +- pr_err("alloc failed, zswap disabled for swap type %d\n", type); + } + + static struct frontswap_ops zswap_frontswap_ops = { +@@ -907,9 +900,14 @@ static int __init init_zswap(void) + return 0; + + pr_info("loading zswap\n"); ++ ++ mem_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); ++ if (!mem_pool) ++ goto error; ++ + if (zswap_entry_cache_create()) { + pr_err("entry cache creation failed\n"); +- goto error; ++ goto cachefail; + } + if (zswap_comp_init()) { + pr_err("compressor initialization failed\n"); +@@ -919,6 +917,8 @@ static int __init init_zswap(void) + pr_err("per-cpu initialization failed\n"); + goto pcpufail; + } ++ ++ + frontswap_register_ops(&zswap_frontswap_ops); + if (zswap_debugfs_init()) + pr_warn("debugfs initialization failed\n"); +@@ -927,6 +927,8 @@ pcpufail: + zswap_comp_exit(); + compfail: + zswap_entry_cache_destory(); ++cachefail: ++ zbud_destroy_pool(mem_pool); + error: + return -ENOMEM; + } +-- +1.8.5.2 + + +-- +Kind regards, +Minchan Kim diff --git a/a/content_digest b/N1/content_digest index cf48532..0268da4 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -227,6 +227,233 @@ "\n" "So, how about this?\n" "I didn't look at code in detail and want to show the concept.\n" - That's why I added RFC tag. + "That's why I added RFC tag.\n" + "\n" + ">From 67c64746e977a091ee30ca37bbc034990adf5ca5 Mon Sep 17 00:00:00 2001\n" + "From: Minchan Kim <minchan@kernel.org>\n" + "Date: Thu, 23 Jan 2014 11:41:44 +0900\n" + "Subject: [RFC] zswap: support multiple swap\n" + "\n" + "Cai Liu reporeted that now zbud pool pages counting has a problem\n" + "when multiple swap is used because it just counts one of swap\n" + "among mutliple swap intead of all of swap so zswap cannot control\n" + "writeback properly. The result is unnecessary writeback or\n" + "no writeback when we should really writeback. IOW, it made zswap\n" + "crazy.\n" + "\n" + "Another problem in zswap is following as.\n" + "For example, let's assume we use two swap A and B with different\n" + "priority and A already has charged 19% long time ago and let's assume\n" + "that A swap is full now so VM start to use B so that B has charged 1%\n" + "recently. It menas zswap charged (19% + 1%) is full by default.\n" + "Then, if VM want to swap out more pages into B, zbud_reclaim_page\n" + "would be evict one of pages in B's pool and it would be repeated\n" + "continuously. It's totally LRU reverse problem and swap thrashing\n" + "in B would happen.\n" + "\n" + "This patch makes zswap consider mutliple swap by creating *a* zbud\n" + "pool which will be shared by multiple swap so all of zswap pages\n" + "in multiple swap keep order by LRU so it can prevent above two\n" + "problems.\n" + "\n" + "Reported-by: Cai Liu <cai.liu@samsung.com>\n" + "Suggested-by: Weijie Yang <weijie.yang.kh@gmail.com>\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "---\n" + " mm/zswap.c | 56 +++++++++++++++++++++++++++++---------------------------\n" + " 1 file changed, 29 insertions(+), 27 deletions(-)\n" + "\n" + "diff --git a/mm/zswap.c b/mm/zswap.c\n" + "index 5a63f78a5601..96039e86db79 100644\n" + "--- a/mm/zswap.c\n" + "+++ b/mm/zswap.c\n" + "@@ -89,6 +89,8 @@ static unsigned int zswap_max_pool_percent = 20;\n" + " module_param_named(max_pool_percent,\n" + " \t\t\tzswap_max_pool_percent, uint, 0644);\n" + " \n" + "+static struct zbud_pool *mem_pool;\n" + "+\n" + " /*********************************\n" + " * compression functions\n" + " **********************************/\n" + "@@ -189,7 +191,6 @@ struct zswap_header {\n" + " struct zswap_tree {\n" + " \tstruct rb_root rbroot;\n" + " \tspinlock_t lock;\n" + "-\tstruct zbud_pool *pool;\n" + " };\n" + " \n" + " static struct zswap_tree *zswap_trees[MAX_SWAPFILES];\n" + "@@ -288,10 +289,10 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)\n" + " static void zswap_free_entry(struct zswap_tree *tree,\n" + " \t\t\tstruct zswap_entry *entry)\n" + " {\n" + "-\tzbud_free(tree->pool, entry->handle);\n" + "+\tzbud_free(mem_pool, entry->handle);\n" + " \tzswap_entry_cache_free(entry);\n" + " \tatomic_dec(&zswap_stored_pages);\n" + "-\tzswap_pool_pages = zbud_get_pool_size(tree->pool);\n" + "+\tzswap_pool_pages = zbud_get_pool_size(mem_pool);\n" + " }\n" + " \n" + " /* caller must hold the tree lock */\n" + "@@ -545,7 +546,7 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)\n" + " \tzbud_unmap(pool, handle);\n" + " \ttree = zswap_trees[swp_type(swpentry)];\n" + " \toffset = swp_offset(swpentry);\n" + "-\tBUG_ON(pool != tree->pool);\n" + "+\tBUG_ON(pool != mem_pool);\n" + " \n" + " \t/* find and ref zswap entry */\n" + " \tspin_lock(&tree->lock);\n" + "@@ -573,13 +574,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)\n" + " \tcase ZSWAP_SWAPCACHE_NEW: /* page is locked */\n" + " \t\t/* decompress */\n" + " \t\tdlen = PAGE_SIZE;\n" + "-\t\tsrc = (u8 *)zbud_map(tree->pool, entry->handle) +\n" + "+\t\tsrc = (u8 *)zbud_map(mem_pool, entry->handle) +\n" + " \t\t\tsizeof(struct zswap_header);\n" + " \t\tdst = kmap_atomic(page);\n" + " \t\tret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,\n" + " \t\t\t\tentry->length, dst, &dlen);\n" + " \t\tkunmap_atomic(dst);\n" + "-\t\tzbud_unmap(tree->pool, entry->handle);\n" + "+\t\tzbud_unmap(mem_pool, entry->handle);\n" + " \t\tBUG_ON(ret);\n" + " \t\tBUG_ON(dlen != PAGE_SIZE);\n" + " \n" + "@@ -652,7 +653,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n" + " \t/* reclaim space if needed */\n" + " \tif (zswap_is_full()) {\n" + " \t\tzswap_pool_limit_hit++;\n" + "-\t\tif (zbud_reclaim_page(tree->pool, 8)) {\n" + "+\t\tif (zbud_reclaim_page(mem_pool, 8)) {\n" + " \t\t\tzswap_reject_reclaim_fail++;\n" + " \t\t\tret = -ENOMEM;\n" + " \t\t\tgoto reject;\n" + "@@ -679,7 +680,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n" + " \n" + " \t/* store */\n" + " \tlen = dlen + sizeof(struct zswap_header);\n" + "-\tret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,\n" + "+\tret = zbud_alloc(mem_pool, len, __GFP_NORETRY | __GFP_NOWARN,\n" + " \t\t&handle);\n" + " \tif (ret == -ENOSPC) {\n" + " \t\tzswap_reject_compress_poor++;\n" + "@@ -689,11 +690,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n" + " \t\tzswap_reject_alloc_fail++;\n" + " \t\tgoto freepage;\n" + " \t}\n" + "-\tzhdr = zbud_map(tree->pool, handle);\n" + "+\tzhdr = zbud_map(mem_pool, handle);\n" + " \tzhdr->swpentry = swp_entry(type, offset);\n" + " \tbuf = (u8 *)(zhdr + 1);\n" + " \tmemcpy(buf, dst, dlen);\n" + "-\tzbud_unmap(tree->pool, handle);\n" + "+\tzbud_unmap(mem_pool, handle);\n" + " \tput_cpu_var(zswap_dstmem);\n" + " \n" + " \t/* populate entry */\n" + "@@ -716,7 +717,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,\n" + " \n" + " \t/* update stats */\n" + " \tatomic_inc(&zswap_stored_pages);\n" + "-\tzswap_pool_pages = zbud_get_pool_size(tree->pool);\n" + "+\tzswap_pool_pages = zbud_get_pool_size(mem_pool);\n" + " \n" + " \treturn 0;\n" + " \n" + "@@ -752,13 +753,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,\n" + " \n" + " \t/* decompress */\n" + " \tdlen = PAGE_SIZE;\n" + "-\tsrc = (u8 *)zbud_map(tree->pool, entry->handle) +\n" + "+\tsrc = (u8 *)zbud_map(mem_pool, entry->handle) +\n" + " \t\t\tsizeof(struct zswap_header);\n" + " \tdst = kmap_atomic(page);\n" + " \tret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,\n" + " \t\tdst, &dlen);\n" + " \tkunmap_atomic(dst);\n" + "-\tzbud_unmap(tree->pool, entry->handle);\n" + "+\tzbud_unmap(mem_pool, entry->handle);\n" + " \tBUG_ON(ret);\n" + " \n" + " \tspin_lock(&tree->lock);\n" + "@@ -807,8 +808,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)\n" + " \t\tzswap_free_entry(tree, entry);\n" + " \ttree->rbroot = RB_ROOT;\n" + " \tspin_unlock(&tree->lock);\n" + "-\n" + "-\tzbud_destroy_pool(tree->pool);\n" + " \tkfree(tree);\n" + " \tzswap_trees[type] = NULL;\n" + " }\n" + "@@ -822,20 +821,14 @@ static void zswap_frontswap_init(unsigned type)\n" + " \tstruct zswap_tree *tree;\n" + " \n" + " \ttree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);\n" + "-\tif (!tree)\n" + "-\t\tgoto err;\n" + "-\ttree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);\n" + "-\tif (!tree->pool)\n" + "-\t\tgoto freetree;\n" + "+\tif (!tree) {\n" + "+\t\tpr_err(\"alloc failed, zswap disabled for swap type %d\\n\", type);\n" + "+\t\treturn;\n" + "+\t}\n" + "+\n" + " \ttree->rbroot = RB_ROOT;\n" + " \tspin_lock_init(&tree->lock);\n" + " \tzswap_trees[type] = tree;\n" + "-\treturn;\n" + "-\n" + "-freetree:\n" + "-\tkfree(tree);\n" + "-err:\n" + "-\tpr_err(\"alloc failed, zswap disabled for swap type %d\\n\", type);\n" + " }\n" + " \n" + " static struct frontswap_ops zswap_frontswap_ops = {\n" + "@@ -907,9 +900,14 @@ static int __init init_zswap(void)\n" + " \t\treturn 0;\n" + " \n" + " \tpr_info(\"loading zswap\\n\");\n" + "+\n" + "+\tmem_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);\n" + "+\tif (!mem_pool)\n" + "+\t\tgoto error;\n" + "+\n" + " \tif (zswap_entry_cache_create()) {\n" + " \t\tpr_err(\"entry cache creation failed\\n\");\n" + "-\t\tgoto error;\n" + "+\t\tgoto cachefail;\n" + " \t}\n" + " \tif (zswap_comp_init()) {\n" + " \t\tpr_err(\"compressor initialization failed\\n\");\n" + "@@ -919,6 +917,8 @@ static int __init init_zswap(void)\n" + " \t\tpr_err(\"per-cpu initialization failed\\n\");\n" + " \t\tgoto pcpufail;\n" + " \t}\n" + "+\n" + "+\n" + " \tfrontswap_register_ops(&zswap_frontswap_ops);\n" + " \tif (zswap_debugfs_init())\n" + " \t\tpr_warn(\"debugfs initialization failed\\n\");\n" + "@@ -927,6 +927,8 @@ pcpufail:\n" + " \tzswap_comp_exit();\n" + " compfail:\n" + " \tzswap_entry_cache_destory();\n" + "+cachefail:\n" + "+\tzbud_destroy_pool(mem_pool);\n" + " error:\n" + " \treturn -ENOMEM;\n" + " }\n" + "-- \n" + "1.8.5.2\n" + "\n" + "\n" + "-- \n" + "Kind regards,\n" + Minchan Kim -b1dde008946c3718c5836ea6bf83ca425004c9699097bad6f44cfcd581816f7a +5016db950fd03258134ce1ba1c5610f806b2aa6234588d01e8a3d4b645814daa
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.