linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Minchan Kim <minchan@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	jlayton@poochiereds.net, bfields@fieldses.org,
	Vlastimil Babka <vbabka@suse.cz>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	koct9i@gmail.com, aquini@redhat.com,
	virtualization@lists.linux-foundation.org,
	Mel Gorman <mgorman@suse.de>, Hugh Dickins <hughd@google.com>,
	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
	rknize@motorola.com, Rik van Riel <riel@redhat.com>,
	Gioh Kim <gurugio@hanmail.net>, Minchan Kim <minchan@kernel.org>
Subject: [PATCH v1 18/19] zsmalloc: migrate tail pages in zspage
Date: Fri, 11 Mar 2016 16:30:22 +0900	[thread overview]
Message-ID: <1457681423-26664-19-git-send-email-minchan@kernel.org> (raw)
In-Reply-To: <1457681423-26664-1-git-send-email-minchan@kernel.org>

This patch enables tail page migration of zspage.

In this point, I tested zsmalloc regression with micro-benchmark
which does zs_malloc/map/unmap/zs_free for all size class
in every CPU(my system is 12) during 20 sec.

It shows 1% regression which is really small when we consider
the benefit of this feature and realworkload overhead(i.e.,
most overhead comes from compression).

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 mm/zsmalloc.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 115 insertions(+), 16 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 24d8dd1fc749..b9ff698115a1 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -550,6 +550,19 @@ static void set_zspage_mapping(struct page *first_page,
 	m->class = class_idx;
 }
 
+static bool check_isolated_page(struct page *first_page)
+{
+	struct page *cursor;
+
+	for (cursor = first_page; cursor != NULL; cursor =
+					get_next_page(cursor)) {
+		if (PageIsolated(cursor))
+			return true;
+	}
+
+	return false;
+}
+
 /*
  * zsmalloc divides the pool into various size classes where each
  * class maintains a list of zspages where each zspage is divided
@@ -1045,6 +1058,44 @@ void lock_zspage(struct page *first_page)
 	} while ((cursor = get_next_page(cursor)) != NULL);
 }
 
+int trylock_zspage(struct page *first_page, struct page *locked_page)
+{
+	struct page *cursor, *fail;
+
+	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
+
+	for (cursor = first_page; cursor != NULL; cursor =
+			get_next_page(cursor)) {
+		if (cursor != locked_page) {
+			if (!trylock_page(cursor)) {
+				fail = cursor;
+				goto unlock;
+			}
+		}
+	}
+
+	return 1;
+unlock:
+	for (cursor = first_page; cursor != fail; cursor =
+			get_next_page(cursor)) {
+		if (cursor != locked_page)
+			unlock_page(cursor);
+	}
+
+	return 0;
+}
+
+void unlock_zspage(struct page *first_page, struct page *locked_page)
+{
+	struct page *cursor = first_page;
+
+	for (; cursor != NULL; cursor = get_next_page(cursor)) {
+		VM_BUG_ON_PAGE(!PageLocked(cursor), cursor);
+		if (cursor != locked_page)
+			unlock_page(cursor);
+	};
+}
+
 static void free_zspage(struct zs_pool *pool, struct page *first_page)
 {
 	struct page *nextp, *tmp;
@@ -1083,16 +1134,17 @@ static void init_zspage(struct size_class *class, struct page *first_page,
 	first_page->freelist = NULL;
 	INIT_LIST_HEAD(&first_page->lru);
 	set_zspage_inuse(first_page, 0);
-	BUG_ON(!trylock_page(first_page));
-	first_page->mapping = mapping;
-	__SetPageMovable(first_page);
-	unlock_page(first_page);
 
 	while (page) {
 		struct page *next_page;
 		struct link_free *link;
 		void *vaddr;
 
+		BUG_ON(!trylock_page(page));
+		page->mapping = mapping;
+		__SetPageMovable(page);
+		unlock_page(page);
+
 		vaddr = kmap_atomic(page);
 		link = (struct link_free *)vaddr + off / sizeof(*link);
 
@@ -1845,6 +1897,7 @@ static enum fullness_group putback_zspage(struct size_class *class,
 
 	VM_BUG_ON_PAGE(!list_empty(&first_page->lru), first_page);
 	VM_BUG_ON_PAGE(ZsPageIsolate(first_page), first_page);
+	VM_BUG_ON_PAGE(check_isolated_page(first_page), first_page);
 
 	fullness = get_fullness_group(class, first_page);
 	insert_zspage(class, fullness, first_page);
@@ -1951,6 +2004,12 @@ static struct page *isolate_source_page(struct size_class *class)
 		if (!page)
 			continue;
 
+		/* To prevent race between object and page migration */
+		if (!trylock_zspage(page, NULL)) {
+			page = NULL;
+			continue;
+		}
+
 		remove_zspage(class, i, page);
 
 		inuse = get_zspage_inuse(page);
@@ -1959,6 +2018,7 @@ static struct page *isolate_source_page(struct size_class *class)
 		if (inuse != freezed) {
 			unfreeze_zspage(class, page, freezed);
 			putback_zspage(class, page);
+			unlock_zspage(page, NULL);
 			page = NULL;
 			continue;
 		}
@@ -1990,6 +2050,12 @@ static struct page *isolate_target_page(struct size_class *class)
 		if (!page)
 			continue;
 
+		/* To prevent race between object and page migration */
+		if (!trylock_zspage(page, NULL)) {
+			page = NULL;
+			continue;
+		}
+
 		remove_zspage(class, i, page);
 
 		inuse = get_zspage_inuse(page);
@@ -1998,6 +2064,7 @@ static struct page *isolate_target_page(struct size_class *class)
 		if (inuse != freezed) {
 			unfreeze_zspage(class, page, freezed);
 			putback_zspage(class, page);
+			unlock_zspage(page, NULL);
 			page = NULL;
 			continue;
 		}
@@ -2071,11 +2138,13 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 			putback_zspage(class, dst_page);
 			unfreeze_zspage(class, dst_page,
 				class->objs_per_zspage);
+			unlock_zspage(dst_page, NULL);
 			spin_unlock(&class->lock);
 			dst_page = NULL;
 		}
 
 		if (zspage_empty(class, src_page)) {
+			unlock_zspage(src_page, NULL);
 			free_zspage(pool, src_page);
 			spin_lock(&class->lock);
 			zs_stat_dec(class, OBJ_ALLOCATED,
@@ -2098,12 +2167,14 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 		putback_zspage(class, src_page);
 		unfreeze_zspage(class, src_page,
 				class->objs_per_zspage);
+		unlock_zspage(src_page, NULL);
 	}
 
 	if (dst_page) {
 		putback_zspage(class, dst_page);
 		unfreeze_zspage(class, dst_page,
 				class->objs_per_zspage);
+		unlock_zspage(dst_page, NULL);
 	}
 
 	spin_unlock(&class->lock);
@@ -2206,10 +2277,11 @@ bool zs_page_isolate(struct page *page, isolate_mode_t mode)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageIsolated(page), page);
 	/*
-	 * In this implementation, it allows only first page migration.
+	 * first_page will not be destroyed by PG_lock of @page but it could
+	 * be migrated out. For prohibiting it, zs_page_migrate calls
+	 * trylock_zspage so it closes the race.
 	 */
-	VM_BUG_ON_PAGE(!is_first_page(page), page);
-	first_page = page;
+	first_page = get_first_page(page);
 
 	/*
 	 * Without class lock, fullness is meaningless while constant
@@ -2223,9 +2295,18 @@ bool zs_page_isolate(struct page *page, isolate_mode_t mode)
 	if (!spin_trylock(&class->lock))
 		return false;
 
+	if (check_isolated_page(first_page))
+		goto skip_isolate;
+
+	/*
+	 * If this is first time isolation for zspage, isolate zspage from
+	 * size_class to prevent further allocations from the zspage.
+	 */
 	get_zspage_mapping(first_page, &class_idx, &fullness);
 	remove_zspage(class, fullness, first_page);
 	SetZsPageIsolate(first_page);
+
+skip_isolate:
 	SetPageIsolated(page);
 	spin_unlock(&class->lock);
 
@@ -2248,7 +2329,7 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage,
 	VM_BUG_ON_PAGE(!PageMovable(page), page);
 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
 
-	first_page = page;
+	first_page = get_first_page(page);
 	get_zspage_mapping(first_page, &class_idx, &fullness);
 	pool = page->mapping->private_data;
 	class = pool->size_class[class_idx];
@@ -2263,6 +2344,13 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage,
 	if (get_zspage_inuse(first_page) == 0)
 		goto out_class_unlock;
 
+	/*
+	 * It prevents first_page migration during tail page opeartion for
+	 * get_first_page's stability.
+	 */
+	if (!trylock_zspage(first_page, page))
+		goto out_class_unlock;
+
 	freezed = freeze_zspage(class, first_page);
 	if (freezed != get_zspage_inuse(first_page))
 		goto out_unfreeze;
@@ -2301,21 +2389,26 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage,
 	kunmap_atomic(addr);
 
 	replace_sub_page(class, first_page, newpage, page);
-	first_page = newpage;
+	first_page = get_first_page(newpage);
 	get_page(newpage);
 	VM_BUG_ON_PAGE(get_fullness_group(class, first_page) ==
 			ZS_EMPTY, first_page);
-	ClearZsPageIsolate(first_page);
-	putback_zspage(class, first_page);
+	if (!check_isolated_page(first_page)) {
+		INIT_LIST_HEAD(&first_page->lru);
+		ClearZsPageIsolate(first_page);
+		putback_zspage(class, first_page);
+	}
+
 
 	/* Migration complete. Free old page */
 	reset_page(page);
 	ClearPageIsolated(page);
 	put_page(page);
 	ret = MIGRATEPAGE_SUCCESS;
-
+	page = newpage;
 out_unfreeze:
 	unfreeze_zspage(class, first_page, freezed);
+	unlock_zspage(first_page, page);
 out_class_unlock:
 	spin_unlock(&class->lock);
 
@@ -2333,7 +2426,7 @@ void zs_page_putback(struct page *page)
 	VM_BUG_ON_PAGE(!PageMovable(page), page);
 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
 
-	first_page = page;
+	first_page = get_first_page(page);
 	get_zspage_mapping(first_page, &class_idx, &fullness);
 	pool = page->mapping->private_data;
 	class = pool->size_class[class_idx];
@@ -2343,11 +2436,17 @@ void zs_page_putback(struct page *page)
 	 * in zs_free will wait the page lock of @page without
 	 * destroying of zspage.
 	 */
-	INIT_LIST_HEAD(&first_page->lru);
 	spin_lock(&class->lock);
 	ClearPageIsolated(page);
-	ClearZsPageIsolate(first_page);
-	putback_zspage(class, first_page);
+	/*
+	 * putback zspage to right list if this is last isolated page
+	 * putback in the zspage.
+	 */
+	if (!check_isolated_page(first_page)) {
+		INIT_LIST_HEAD(&first_page->lru);
+		ClearZsPageIsolate(first_page);
+		putback_zspage(class, first_page);
+	}
 	spin_unlock(&class->lock);
 }
 
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-03-11  7:37 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-11  7:30 [PATCH v1 00/19] Support non-lru page migration Minchan Kim
2016-03-11  7:30 ` [PATCH v1 01/19] mm: use put_page to free page instead of putback_lru_page Minchan Kim
2016-03-14  8:48   ` Vlastimil Babka
2016-03-15  1:16     ` Minchan Kim
2016-03-15 19:06       ` Vlastimil Babka
2016-03-11  7:30 ` [PATCH v1 02/19] mm/compaction: support non-lru movable page migration Minchan Kim
2016-03-11  8:11   ` kbuild test robot
2016-03-11  8:35     ` Minchan Kim
2016-03-11  7:30 ` [PATCH v1 03/19] fs/anon_inodes: new interface to create new inode Minchan Kim
2016-03-11  8:05   ` Al Viro
2016-03-11 14:24     ` Gioh Kim
2016-03-11  7:30 ` [PATCH v1 04/19] mm/balloon: use general movable page feature into balloon Minchan Kim
2016-03-11  7:30 ` [PATCH v1 05/19] zsmalloc: use first_page rather than page Minchan Kim
2016-03-15  6:19   ` Sergey Senozhatsky
2016-03-11  7:30 ` [PATCH v1 06/19] zsmalloc: clean up many BUG_ON Minchan Kim
2016-03-15  6:19   ` Sergey Senozhatsky
2016-03-11  7:30 ` [PATCH v1 07/19] zsmalloc: reordering function parameter Minchan Kim
2016-03-15  6:20   ` Sergey Senozhatsky
2016-03-11  7:30 ` [PATCH v1 08/19] zsmalloc: remove unused pool param in obj_free Minchan Kim
2016-03-15  6:21   ` Sergey Senozhatsky
2016-03-11  7:30 ` [PATCH v1 09/19] zsmalloc: keep max_object in size_class Minchan Kim
2016-03-12  1:44   ` xuyiping
2016-03-14  4:55     ` Minchan Kim
2016-03-15  6:28   ` Sergey Senozhatsky
2016-03-15  6:41     ` Minchan Kim
2016-03-11  7:30 ` [PATCH v1 10/19] zsmalloc: squeeze inuse into page->mapping Minchan Kim
2016-03-11  7:30 ` [PATCH v1 11/19] zsmalloc: squeeze freelist " Minchan Kim
2016-03-15  6:40   ` Sergey Senozhatsky
2016-03-15  6:51     ` Minchan Kim
2016-03-17 12:09       ` YiPing Xu
2016-03-17 22:17         ` Minchan Kim
2016-03-11  7:30 ` [PATCH v1 12/19] zsmalloc: move struct zs_meta from mapping to freelist Minchan Kim
2016-03-11  7:30 ` [PATCH v1 13/19] zsmalloc: factor page chain functionality out Minchan Kim
2016-03-12  3:09   ` xuyiping
2016-03-14  4:58     ` Minchan Kim
2016-03-11  7:30 ` [PATCH v1 14/19] zsmalloc: separate free_zspage from putback_zspage Minchan Kim
2016-03-11  7:30 ` [PATCH v1 15/19] zsmalloc: zs_compact refactoring Minchan Kim
2016-03-11  7:30 ` [PATCH v1 16/19] zsmalloc: migrate head page of zspage Minchan Kim
2016-03-11  7:30 ` [PATCH v1 17/19] zsmalloc: use single linked list for page chain Minchan Kim
2016-03-11  7:30 ` Minchan Kim [this message]
2016-03-11  7:30 ` [PATCH v1 19/19] zram: use __GFP_MOVABLE for memory allocation Minchan Kim
2016-03-15  6:56   ` Sergey Senozhatsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457681423-26664-19-git-send-email-minchan@kernel.org \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=aquini@redhat.com \
    --cc=bfields@fieldses.org \
    --cc=gurugio@hanmail.net \
    --cc=hughd@google.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=jlayton@poochiereds.net \
    --cc=koct9i@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=riel@redhat.com \
    --cc=rknize@motorola.com \
    --cc=sergey.senozhatsky@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).