All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20160526215022.GA2322@bbox>

diff --git a/a/1.txt b/N1/1.txt
index 87a1ab2..79528a4 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1 +1,1373 @@
 Follow up Sergey's review
+
+>From 2deede28c91910a9d3493feae30bed507e72f213 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Thu, 5 May 2016 00:01:03 +0900
+Subject: [PATCH v6r2] zsmalloc: page migration support
+
+This patch introduces run-time migration feature for zspage.
+
+For migration, VM uses page.lru field so it would be better to not use
+page.next field which is unified with page.lru for own purpose.
+For that, firstly, we can get first object offset of the page via
+runtime calculation instead of using page.index so we can use
+page.index as link for page chaining instead of page.next.
+
+In case of huge object, it stores handle to page.index instead of
+next link of page chaining because huge object doesn't need to next
+link for page chaining. So get_next_page need to identify huge
+object to return NULL. For it, this patch uses PG_owner_priv_1 flag
+of the page flag.
+
+For migration, it supports three functions
+
+* zs_page_isolate
+
+It isolates a zspage which includes a subpage VM want to migrate
+from class so anyone cannot allocate new object from the zspage.
+
+We could try to isolate a zspage by the number of subpage so
+subsequent isolation trial of other subpage of the zpsage shouldn't
+fail. For that, we introduce zspage.isolated count. With that,
+zs_page_isolate can know whether zspage is already isolated or not
+for migration so if it is isolated for migration, subsequent
+isolation trial can be successful without trying further isolation.
+
+* zs_page_migrate
+
+First of all, it holds write-side zspage->lock to prevent migrate other
+subpage in zspage. Then, lock all objects in the page VM want to migrate.
+The reason we should lock all objects in the page is due to race between
+zs_map_object and zs_page_migrate.
+
+zs_map_object				zs_page_migrate
+
+pin_tag(handle)
+obj = handle_to_obj(handle)
+obj_to_location(obj, &page, &obj_idx);
+
+					write_lock(&zspage->lock)
+					if (!trypin_tag(handle))
+						goto unpin_object
+
+zspage = get_zspage(page);
+read_lock(&zspage->lock);
+
+If zs_page_migrate doesn't do trypin_tag, zs_map_object's page can
+be stale by migration so it goes crash.
+
+If it locks all of objects successfully, it copies content from
+old page to new one, finally, create new zspage chain with new page.
+And if it's last isolated subpage in the zspage, put the zspage back
+to class.
+
+* zs_page_putback
+
+It returns isolated zspage to right fullness_group list if it fails to
+migrate a page. If it find a zspage is ZS_EMPTY, it queues zspage
+freeing to workqueue. See below about async zspage freeing.
+
+This patch introduces asynchronous zspage free. The reason to need it
+is we need page_lock to clear PG_movable but unfortunately,
+zs_free path should be atomic so the apporach is try to grab page_lock.
+If it got page_lock of all of pages successfully, it can free zspage
+immediately. Otherwise, it queues free request and free zspage via
+workqueue in process context.
+
+If zs_free finds the zspage is isolated when it try to free zspage,
+it delays the freeing until zs_page_putback finds it so it will free
+free the zspage finally.
+
+In this patch, we expand fullness_list from ZS_EMPTY to ZS_FULL.
+First of all, it will use ZS_EMPTY list for delay freeing.
+And with adding ZS_FULL list, it makes to identify whether zspage is
+isolated or not via list_empty(&zspage->list) test.
+
+Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+---
+ include/uapi/linux/magic.h |   1 +
+ mm/zsmalloc.c              | 793 ++++++++++++++++++++++++++++++++++++++-------
+ 2 files changed, 672 insertions(+), 122 deletions(-)
+
+diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
+index d829ce63529d..e398beac67b8 100644
+--- a/include/uapi/linux/magic.h
++++ b/include/uapi/linux/magic.h
+@@ -81,5 +81,6 @@
+ /* Since UDF 2.01 is ISO 13346 based... */
+ #define UDF_SUPER_MAGIC		0x15013346
+ #define BALLOON_KVM_MAGIC	0x13661366
++#define ZSMALLOC_MAGIC		0x58295829
+ 
+ #endif /* __LINUX_MAGIC_H__ */
+diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
+index c6fb543cfb98..a80100db16d6 100644
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -17,14 +17,14 @@
+  *
+  * Usage of struct page fields:
+  *	page->private: points to zspage
+- *	page->index: offset of the first object starting in this page.
+- *		For the first page, this is always 0, so we use this field
+- *		to store handle for huge object.
+- *	page->next: links together all component pages of a zspage
++ *	page->freelist(index): links together all component pages of a zspage
++ *		For the huge page, this is always 0, so we use this field
++ *		to store handle.
+  *
+  * Usage of struct page flags:
+  *	PG_private: identifies the first component page
+  *	PG_private2: identifies the last component page
++ *	PG_owner_priv_1: indentifies the huge component page
+  *
+  */
+ 
+@@ -49,6 +49,11 @@
+ #include <linux/debugfs.h>
+ #include <linux/zsmalloc.h>
+ #include <linux/zpool.h>
++#include <linux/mount.h>
++#include <linux/compaction.h>
++#include <linux/pagemap.h>
++
++#define ZSPAGE_MAGIC	0x58
+ 
+ /*
+  * This must be power of 2 and greater than of equal to sizeof(link_free).
+@@ -136,25 +141,23 @@
+  * We do not maintain any list for completely empty or full pages
+  */
+ enum fullness_group {
+-	ZS_ALMOST_FULL,
+-	ZS_ALMOST_EMPTY,
+ 	ZS_EMPTY,
+-	ZS_FULL
++	ZS_ALMOST_EMPTY,
++	ZS_ALMOST_FULL,
++	ZS_FULL,
++	NR_ZS_FULLNESS,
+ };
+ 
+ enum zs_stat_type {
++	CLASS_EMPTY,
++	CLASS_ALMOST_EMPTY,
++	CLASS_ALMOST_FULL,
++	CLASS_FULL,
+ 	OBJ_ALLOCATED,
+ 	OBJ_USED,
+-	CLASS_ALMOST_FULL,
+-	CLASS_ALMOST_EMPTY,
++	NR_ZS_STAT_TYPE,
+ };
+ 
+-#ifdef CONFIG_ZSMALLOC_STAT
+-#define NR_ZS_STAT_TYPE	(CLASS_ALMOST_EMPTY + 1)
+-#else
+-#define NR_ZS_STAT_TYPE	(OBJ_USED + 1)
+-#endif
+-
+ struct zs_size_stat {
+ 	unsigned long objs[NR_ZS_STAT_TYPE];
+ };
+@@ -163,6 +166,10 @@ struct zs_size_stat {
+ static struct dentry *zs_stat_root;
+ #endif
+ 
++#ifdef CONFIG_COMPACTION
++static struct vfsmount *zsmalloc_mnt;
++#endif
++
+ /*
+  * number of size_classes
+  */
+@@ -186,23 +193,36 @@ static const int fullness_threshold_frac = 4;
+ 
+ struct size_class {
+ 	spinlock_t lock;
+-	struct list_head fullness_list[2];
++	struct list_head fullness_list[NR_ZS_FULLNESS];
+ 	/*
+ 	 * Size of objects stored in this class. Must be multiple
+ 	 * of ZS_ALIGN.
+ 	 */
+ 	int size;
+ 	int objs_per_zspage;
+-	unsigned int index;
+-
+-	struct zs_size_stat stats;
+-
+ 	/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
+ 	int pages_per_zspage;
+-	/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
+-	bool huge;
++
++	unsigned int index;
++	struct zs_size_stat stats;
+ };
+ 
++/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
++static void SetPageHugeObject(struct page *page)
++{
++	SetPageOwnerPriv1(page);
++}
++
++static void ClearPageHugeObject(struct page *page)
++{
++	ClearPageOwnerPriv1(page);
++}
++
++static int PageHugeObject(struct page *page)
++{
++	return PageOwnerPriv1(page);
++}
++
+ /*
+  * Placed within free objects to form a singly linked list.
+  * For every zspage, zspage->freeobj gives head of this list.
+@@ -244,6 +264,10 @@ struct zs_pool {
+ #ifdef CONFIG_ZSMALLOC_STAT
+ 	struct dentry *stat_dentry;
+ #endif
++#ifdef CONFIG_COMPACTION
++	struct inode *inode;
++	struct work_struct free_work;
++#endif
+ };
+ 
+ /*
+@@ -252,16 +276,23 @@ struct zs_pool {
+  */
+ #define FULLNESS_BITS	2
+ #define CLASS_BITS	8
++#define ISOLATED_BITS	3
++#define MAGIC_VAL_BITS	8
+ 
+ struct zspage {
+ 	struct {
+ 		unsigned int fullness:FULLNESS_BITS;
+ 		unsigned int class:CLASS_BITS;
++		unsigned int isolated:ISOLATED_BITS;
++		unsigned int magic:MAGIC_VAL_BITS;
+ 	};
+ 	unsigned int inuse;
+ 	unsigned int freeobj;
+ 	struct page *first_page;
+ 	struct list_head list; /* fullness list */
++#ifdef CONFIG_COMPACTION
++	rwlock_t lock;
++#endif
+ };
+ 
+ struct mapping_area {
+@@ -274,6 +305,28 @@ struct mapping_area {
+ 	enum zs_mapmode vm_mm; /* mapping mode */
+ };
+ 
++#ifdef CONFIG_COMPACTION
++static int zs_register_migration(struct zs_pool *pool);
++static void zs_unregister_migration(struct zs_pool *pool);
++static void migrate_lock_init(struct zspage *zspage);
++static void migrate_read_lock(struct zspage *zspage);
++static void migrate_read_unlock(struct zspage *zspage);
++static void kick_deferred_free(struct zs_pool *pool);
++static void init_deferred_free(struct zs_pool *pool);
++static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
++#else
++static int zsmalloc_mount(void) { return 0; }
++static void zsmalloc_unmount(void) {}
++static int zs_register_migration(struct zs_pool *pool) { return 0; }
++static void zs_unregister_migration(struct zs_pool *pool) {}
++static void migrate_lock_init(struct zspage *zspage) {}
++static void migrate_read_lock(struct zspage *zspage) {}
++static void migrate_read_unlock(struct zspage *zspage) {}
++static void kick_deferred_free(struct zs_pool *pool) {}
++static void init_deferred_free(struct zs_pool *pool) {}
++static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
++#endif
++
+ static int create_cache(struct zs_pool *pool)
+ {
+ 	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
+@@ -301,7 +354,7 @@ static void destroy_cache(struct zs_pool *pool)
+ static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
+ {
+ 	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
+-			gfp & ~__GFP_HIGHMEM);
++			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+ }
+ 
+ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
+@@ -311,7 +364,8 @@ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
+ 
+ static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
+ {
+-	return kmem_cache_alloc(pool->zspage_cachep, flags & ~__GFP_HIGHMEM);
++	return kmem_cache_alloc(pool->zspage_cachep,
++			flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+ };
+ 
+ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+@@ -421,11 +475,17 @@ static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
+ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
+ static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+ 
++static bool is_zspage_isolated(struct zspage *zspage)
++{
++	return zspage->isolated;
++}
++
+ static int is_first_page(struct page *page)
+ {
+ 	return PagePrivate(page);
+ }
+ 
++/* Protected by class->lock */
+ static inline int get_zspage_inuse(struct zspage *zspage)
+ {
+ 	return zspage->inuse;
+@@ -441,20 +501,12 @@ static inline void mod_zspage_inuse(struct zspage *zspage, int val)
+ 	zspage->inuse += val;
+ }
+ 
+-static inline int get_first_obj_offset(struct page *page)
++static inline struct page *get_first_page(struct zspage *zspage)
+ {
+-	if (is_first_page(page))
+-		return 0;
++	struct page *first_page = zspage->first_page;
+ 
+-	return page->index;
+-}
+-
+-static inline void set_first_obj_offset(struct page *page, int offset)
+-{
+-	if (is_first_page(page))
+-		return;
+-
+-	page->index = offset;
++	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
++	return first_page;
+ }
+ 
+ static inline unsigned int get_freeobj(struct zspage *zspage)
+@@ -471,6 +523,8 @@ static void get_zspage_mapping(struct zspage *zspage,
+ 				unsigned int *class_idx,
+ 				enum fullness_group *fullness)
+ {
++	VM_BUG_ON(zspage->magic != ZSPAGE_MAGIC);
++
+ 	*fullness = zspage->fullness;
+ 	*class_idx = zspage->class;
+ }
+@@ -504,23 +558,19 @@ static int get_size_class_index(int size)
+ static inline void zs_stat_inc(struct size_class *class,
+ 				enum zs_stat_type type, unsigned long cnt)
+ {
+-	if (type < NR_ZS_STAT_TYPE)
+-		class->stats.objs[type] += cnt;
++	class->stats.objs[type] += cnt;
+ }
+ 
+ static inline void zs_stat_dec(struct size_class *class,
+ 				enum zs_stat_type type, unsigned long cnt)
+ {
+-	if (type < NR_ZS_STAT_TYPE)
+-		class->stats.objs[type] -= cnt;
++	class->stats.objs[type] -= cnt;
+ }
+ 
+ static inline unsigned long zs_stat_get(struct size_class *class,
+ 				enum zs_stat_type type)
+ {
+-	if (type < NR_ZS_STAT_TYPE)
+-		return class->stats.objs[type];
+-	return 0;
++	return class->stats.objs[type];
+ }
+ 
+ #ifdef CONFIG_ZSMALLOC_STAT
+@@ -664,6 +714,7 @@ static inline void zs_pool_stat_destroy(struct zs_pool *pool)
+ }
+ #endif
+ 
++
+ /*
+  * For each size class, zspages are divided into different groups
+  * depending on how "full" they are. This was done so that we could
+@@ -704,15 +755,9 @@ static void insert_zspage(struct size_class *class,
+ {
+ 	struct zspage *head;
+ 
+-	if (fullness >= ZS_EMPTY)
+-		return;
+-
++	zs_stat_inc(class, fullness, 1);
+ 	head = list_first_entry_or_null(&class->fullness_list[fullness],
+ 					struct zspage, list);
+-
+-	zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?
+-			CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
+-
+ 	/*
+ 	 * We want to see more ZS_FULL pages and less almost empty/full.
+ 	 * Put pages with higher ->inuse first.
+@@ -734,14 +779,11 @@ static void remove_zspage(struct size_class *class,
+ 				struct zspage *zspage,
+ 				enum fullness_group fullness)
+ {
+-	if (fullness >= ZS_EMPTY)
+-		return;
+-
+ 	VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
++	VM_BUG_ON(is_zspage_isolated(zspage));
+ 
+ 	list_del_init(&zspage->list);
+-	zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ?
+-			CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
++	zs_stat_dec(class, fullness, 1);
+ }
+ 
+ /*
+@@ -764,8 +806,11 @@ static enum fullness_group fix_fullness_group(struct size_class *class,
+ 	if (newfg == currfg)
+ 		goto out;
+ 
+-	remove_zspage(class, zspage, currfg);
+-	insert_zspage(class, zspage, newfg);
++	if (!is_zspage_isolated(zspage)) {
++		remove_zspage(class, zspage, currfg);
++		insert_zspage(class, zspage, newfg);
++	}
++
+ 	set_zspage_mapping(zspage, class_idx, newfg);
+ 
+ out:
+@@ -808,19 +853,45 @@ static int get_pages_per_zspage(int class_size)
+ 	return max_usedpc_order;
+ }
+ 
+-static struct page *get_first_page(struct zspage *zspage)
++static struct zspage *get_zspage(struct page *page)
+ {
+-	return zspage->first_page;
++	struct zspage *zspage = (struct zspage *)page->private;
++
++	VM_BUG_ON(zspage->magic != ZSPAGE_MAGIC);
++	return zspage;
+ }
+ 
+-static struct zspage *get_zspage(struct page *page)
++static struct page *get_next_page(struct page *page)
+ {
+-	return (struct zspage *)page->private;
++	if (unlikely(PageHugeObject(page)))
++		return NULL;
++
++	return page->freelist;
+ }
+ 
+-static struct page *get_next_page(struct page *page)
++/* Get byte offset of first object in the @page */
++static int get_first_obj_offset(struct size_class *class,
++				struct page *first_page, struct page *page)
+ {
+-	return page->next;
++	int pos;
++	int page_idx = 0;
++	int ofs = 0;
++	struct page *cursor = first_page;
++
++	if (first_page == page)
++		goto out;
++
++	while (page != cursor) {
++		page_idx++;
++		cursor = get_next_page(cursor);
++	}
++
++	pos = class->objs_per_zspage * class->size *
++		page_idx / class->pages_per_zspage;
++
++	ofs = (pos + class->size) % PAGE_SIZE;
++out:
++	return ofs;
+ }
+ 
+ /**
+@@ -857,16 +928,20 @@ static unsigned long handle_to_obj(unsigned long handle)
+ 	return *(unsigned long *)handle;
+ }
+ 
+-static unsigned long obj_to_head(struct size_class *class, struct page *page,
+-			void *obj)
++static unsigned long obj_to_head(struct page *page, void *obj)
+ {
+-	if (class->huge) {
++	if (unlikely(PageHugeObject(page))) {
+ 		VM_BUG_ON_PAGE(!is_first_page(page), page);
+ 		return page->index;
+ 	} else
+ 		return *(unsigned long *)obj;
+ }
+ 
++static inline int testpin_tag(unsigned long handle)
++{
++	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
++}
++
+ static inline int trypin_tag(unsigned long handle)
+ {
+ 	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+@@ -884,27 +959,93 @@ static void unpin_tag(unsigned long handle)
+ 
+ static void reset_page(struct page *page)
+ {
++	__ClearPageMovable(page);
+ 	clear_bit(PG_private, &page->flags);
+ 	clear_bit(PG_private_2, &page->flags);
+ 	set_page_private(page, 0);
+-	page->index = 0;
++	ClearPageHugeObject(page);
++	page->freelist = NULL;
+ }
+ 
+-static void free_zspage(struct zs_pool *pool, struct zspage *zspage)
++/*
++ * To prevent zspage destroy during migration, zspage freeing should
++ * hold locks of all pages in the zspage.
++ */
++void lock_zspage(struct zspage *zspage)
++{
++	struct page *page = get_first_page(zspage);
++
++	do {
++		lock_page(page);
++	} while ((page = get_next_page(page)) != NULL);
++}
++
++int trylock_zspage(struct zspage *zspage)
++{
++	struct page *cursor, *fail;
++
++	for (cursor = get_first_page(zspage); cursor != NULL; cursor =
++					get_next_page(cursor)) {
++		if (!trylock_page(cursor)) {
++			fail = cursor;
++			goto unlock;
++		}
++	}
++
++	return 1;
++unlock:
++	for (cursor = get_first_page(zspage); cursor != fail; cursor =
++					get_next_page(cursor))
++		unlock_page(cursor);
++
++	return 0;
++}
++
++static void __free_zspage(struct zs_pool *pool, struct size_class *class,
++				struct zspage *zspage)
+ {
+ 	struct page *page, *next;
++	enum fullness_group fg;
++	unsigned int class_idx;
++
++	get_zspage_mapping(zspage, &class_idx, &fg);
++
++	assert_spin_locked(&class->lock);
+ 
+ 	VM_BUG_ON(get_zspage_inuse(zspage));
++	VM_BUG_ON(fg != ZS_EMPTY);
+ 
+-	next = page = zspage->first_page;
++	next = page = get_first_page(zspage);
+ 	do {
+-		next = page->next;
++		VM_BUG_ON_PAGE(!PageLocked(page), page);
++		next = get_next_page(page);
+ 		reset_page(page);
++		unlock_page(page);
+ 		put_page(page);
+ 		page = next;
+ 	} while (page != NULL);
+ 
+ 	cache_free_zspage(pool, zspage);
++
++	zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
++			class->size, class->pages_per_zspage));
++	atomic_long_sub(class->pages_per_zspage,
++					&pool->pages_allocated);
++}
++
++static void free_zspage(struct zs_pool *pool, struct size_class *class,
++				struct zspage *zspage)
++{
++	VM_BUG_ON(get_zspage_inuse(zspage));
++	VM_BUG_ON(list_empty(&zspage->list));
++
++	if (!trylock_zspage(zspage)) {
++		kick_deferred_free(pool);
++		return;
++	}
++
++	remove_zspage(class, zspage, ZS_EMPTY);
++	__free_zspage(pool, class, zspage);
+ }
+ 
+ /* Initialize a newly allocated zspage */
+@@ -912,15 +1053,13 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
+ {
+ 	unsigned int freeobj = 1;
+ 	unsigned long off = 0;
+-	struct page *page = zspage->first_page;
++	struct page *page = get_first_page(zspage);
+ 
+ 	while (page) {
+ 		struct page *next_page;
+ 		struct link_free *link;
+ 		void *vaddr;
+ 
+-		set_first_obj_offset(page, off);
+-
+ 		vaddr = kmap_atomic(page);
+ 		link = (struct link_free *)vaddr + off / sizeof(*link);
+ 
+@@ -952,16 +1091,17 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
+ 	set_freeobj(zspage, 0);
+ }
+ 
+-static void create_page_chain(struct zspage *zspage, struct page *pages[],
+-				int nr_pages)
++static void create_page_chain(struct size_class *class, struct zspage *zspage,
++				struct page *pages[])
+ {
+ 	int i;
+ 	struct page *page;
+ 	struct page *prev_page = NULL;
++	int nr_pages = class->pages_per_zspage;
+ 
+ 	/*
+ 	 * Allocate individual pages and link them together as:
+-	 * 1. all pages are linked together using page->next
++	 * 1. all pages are linked together using page->freelist
+ 	 * 2. each sub-page point to zspage using page->private
+ 	 *
+ 	 * we set PG_private to identify the first page (i.e. no other sub-page
+@@ -970,16 +1110,18 @@ static void create_page_chain(struct zspage *zspage, struct page *pages[],
+ 	for (i = 0; i < nr_pages; i++) {
+ 		page = pages[i];
+ 		set_page_private(page, (unsigned long)zspage);
++		page->freelist = NULL;
+ 		if (i == 0) {
+ 			zspage->first_page = page;
+ 			SetPagePrivate(page);
++			if (unlikely(class->objs_per_zspage == 1 &&
++					class->pages_per_zspage == 1))
++				SetPageHugeObject(page);
+ 		} else {
+-			prev_page->next = page;
++			prev_page->freelist = page;
+ 		}
+-		if (i == nr_pages - 1) {
++		if (i == nr_pages - 1)
+ 			SetPagePrivate2(page);
+-			page->next = NULL;
+-		}
+ 		prev_page = page;
+ 	}
+ }
+@@ -999,6 +1141,8 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
+ 		return NULL;
+ 
+ 	memset(zspage, 0, sizeof(struct zspage));
++	zspage->magic = ZSPAGE_MAGIC;
++	migrate_lock_init(zspage);
+ 
+ 	for (i = 0; i < class->pages_per_zspage; i++) {
+ 		struct page *page;
+@@ -1013,7 +1157,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
+ 		pages[i] = page;
+ 	}
+ 
+-	create_page_chain(zspage, pages, class->pages_per_zspage);
++	create_page_chain(class, zspage, pages);
+ 	init_zspage(class, zspage);
+ 
+ 	return zspage;
+@@ -1024,7 +1168,7 @@ static struct zspage *find_get_zspage(struct size_class *class)
+ 	int i;
+ 	struct zspage *zspage;
+ 
+-	for (i = ZS_ALMOST_FULL; i <= ZS_ALMOST_EMPTY; i++) {
++	for (i = ZS_ALMOST_FULL; i >= ZS_EMPTY; i--) {
+ 		zspage = list_first_entry_or_null(&class->fullness_list[i],
+ 				struct zspage, list);
+ 		if (zspage)
+@@ -1289,6 +1433,10 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ 	obj = handle_to_obj(handle);
+ 	obj_to_location(obj, &page, &obj_idx);
+ 	zspage = get_zspage(page);
++
++	/* migration cannot move any subpage in this zspage */
++	migrate_read_lock(zspage);
++
+ 	get_zspage_mapping(zspage, &class_idx, &fg);
+ 	class = pool->size_class[class_idx];
+ 	off = (class->size * obj_idx) & ~PAGE_MASK;
+@@ -1309,7 +1457,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ 
+ 	ret = __zs_map_object(area, pages, off, class->size);
+ out:
+-	if (!class->huge)
++	if (likely(!PageHugeObject(page)))
+ 		ret += ZS_HANDLE_SIZE;
+ 
+ 	return ret;
+@@ -1348,6 +1496,8 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+ 		__zs_unmap_object(area, pages, off, class->size);
+ 	}
+ 	put_cpu_var(zs_map_area);
++
++	migrate_read_unlock(zspage);
+ 	unpin_tag(handle);
+ }
+ EXPORT_SYMBOL_GPL(zs_unmap_object);
+@@ -1377,7 +1527,7 @@ static unsigned long obj_malloc(struct size_class *class,
+ 	vaddr = kmap_atomic(m_page);
+ 	link = (struct link_free *)vaddr + m_offset / sizeof(*link);
+ 	set_freeobj(zspage, link->next >> OBJ_ALLOCATED_TAG);
+-	if (!class->huge)
++	if (likely(!PageHugeObject(m_page)))
+ 		/* record handle in the header of allocated chunk */
+ 		link->handle = handle;
+ 	else
+@@ -1407,6 +1557,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
+ {
+ 	unsigned long handle, obj;
+ 	struct size_class *class;
++	enum fullness_group newfg;
+ 	struct zspage *zspage;
+ 
+ 	if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
+@@ -1422,28 +1573,37 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
+ 
+ 	spin_lock(&class->lock);
+ 	zspage = find_get_zspage(class);
+-
+-	if (!zspage) {
++	if (likely(zspage)) {
++		obj = obj_malloc(class, zspage, handle);
++		/* Now move the zspage to another fullness group, if required */
++		fix_fullness_group(class, zspage);
++		record_obj(handle, obj);
+ 		spin_unlock(&class->lock);
+-		zspage = alloc_zspage(pool, class, gfp);
+-		if (unlikely(!zspage)) {
+-			cache_free_handle(pool, handle);
+-			return 0;
+-		}
+ 
+-		set_zspage_mapping(zspage, class->index, ZS_EMPTY);
+-		atomic_long_add(class->pages_per_zspage,
+-					&pool->pages_allocated);
++		return handle;
++	}
+ 
+-		spin_lock(&class->lock);
+-		zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
+-				class->size, class->pages_per_zspage));
++	spin_unlock(&class->lock);
++
++	zspage = alloc_zspage(pool, class, gfp);
++	if (!zspage) {
++		cache_free_handle(pool, handle);
++		return 0;
+ 	}
+ 
++	spin_lock(&class->lock);
+ 	obj = obj_malloc(class, zspage, handle);
+-	/* Now move the zspage to another fullness group, if required */
+-	fix_fullness_group(class, zspage);
++	newfg = get_fullness_group(class, zspage);
++	insert_zspage(class, zspage, newfg);
++	set_zspage_mapping(zspage, class->index, newfg);
+ 	record_obj(handle, obj);
++	atomic_long_add(class->pages_per_zspage,
++				&pool->pages_allocated);
++	zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
++			class->size, class->pages_per_zspage));
++
++	/* We completely set up zspage so mark them as movable */
++	SetZsPageMovable(pool, zspage);
+ 	spin_unlock(&class->lock);
+ 
+ 	return handle;
+@@ -1484,6 +1644,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
+ 	int class_idx;
+ 	struct size_class *class;
+ 	enum fullness_group fullness;
++	bool isolated;
+ 
+ 	if (unlikely(!handle))
+ 		return;
+@@ -1493,22 +1654,28 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
+ 	obj_to_location(obj, &f_page, &f_objidx);
+ 	zspage = get_zspage(f_page);
+ 
++	migrate_read_lock(zspage);
++
+ 	get_zspage_mapping(zspage, &class_idx, &fullness);
+ 	class = pool->size_class[class_idx];
+ 
+ 	spin_lock(&class->lock);
+ 	obj_free(class, obj);
+ 	fullness = fix_fullness_group(class, zspage);
+-	if (fullness == ZS_EMPTY) {
+-		zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
+-				class->size, class->pages_per_zspage));
+-		atomic_long_sub(class->pages_per_zspage,
+-				&pool->pages_allocated);
+-		free_zspage(pool, zspage);
++	if (fullness != ZS_EMPTY) {
++		migrate_read_unlock(zspage);
++		goto out;
+ 	}
++
++	isolated = is_zspage_isolated(zspage);
++	migrate_read_unlock(zspage);
++	/* If zspage is isolated, zs_page_putback will free the zspage */
++	if (likely(!isolated))
++		free_zspage(pool, class, zspage);
++out:
++
+ 	spin_unlock(&class->lock);
+ 	unpin_tag(handle);
+-
+ 	cache_free_handle(pool, handle);
+ }
+ EXPORT_SYMBOL_GPL(zs_free);
+@@ -1587,12 +1754,13 @@ static unsigned long find_alloced_obj(struct size_class *class,
+ 	int offset = 0;
+ 	unsigned long handle = 0;
+ 	void *addr = kmap_atomic(page);
++	struct zspage *zspage = get_zspage(page);
+ 
+-	offset = get_first_obj_offset(page);
++	offset = get_first_obj_offset(class, get_first_page(zspage), page);
+ 	offset += class->size * index;
+ 
+ 	while (offset < PAGE_SIZE) {
+-		head = obj_to_head(class, page, addr + offset);
++		head = obj_to_head(page, addr + offset);
+ 		if (head & OBJ_ALLOCATED_TAG) {
+ 			handle = head & ~OBJ_ALLOCATED_TAG;
+ 			if (trypin_tag(handle))
+@@ -1684,6 +1852,7 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source)
+ 		zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
+ 							struct zspage, list);
+ 		if (zspage) {
++			VM_BUG_ON(is_zspage_isolated(zspage));
+ 			remove_zspage(class, zspage, fg[i]);
+ 			return zspage;
+ 		}
+@@ -1704,6 +1873,8 @@ static enum fullness_group putback_zspage(struct size_class *class,
+ {
+ 	enum fullness_group fullness;
+ 
++	VM_BUG_ON(is_zspage_isolated(zspage));
++
+ 	fullness = get_fullness_group(class, zspage);
+ 	insert_zspage(class, zspage, fullness);
+ 	set_zspage_mapping(zspage, class->index, fullness);
+@@ -1711,6 +1882,377 @@ static enum fullness_group putback_zspage(struct size_class *class,
+ 	return fullness;
+ }
+ 
++#ifdef CONFIG_COMPACTION
++static struct dentry *zs_mount(struct file_system_type *fs_type,
++				int flags, const char *dev_name, void *data)
++{
++	static const struct dentry_operations ops = {
++		.d_dname = simple_dname,
++	};
++
++	return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC);
++}
++
++static struct file_system_type zsmalloc_fs = {
++	.name		= "zsmalloc",
++	.mount		= zs_mount,
++	.kill_sb	= kill_anon_super,
++};
++
++static int zsmalloc_mount(void)
++{
++	int ret = 0;
++
++	zsmalloc_mnt = kern_mount(&zsmalloc_fs);
++	if (IS_ERR(zsmalloc_mnt))
++		ret = PTR_ERR(zsmalloc_mnt);
++
++	return ret;
++}
++
++static void zsmalloc_unmount(void)
++{
++	kern_unmount(zsmalloc_mnt);
++}
++
++static void migrate_lock_init(struct zspage *zspage)
++{
++	rwlock_init(&zspage->lock);
++}
++
++static void migrate_read_lock(struct zspage *zspage)
++{
++	read_lock(&zspage->lock);
++}
++
++static void migrate_read_unlock(struct zspage *zspage)
++{
++	read_unlock(&zspage->lock);
++}
++
++static void migrate_write_lock(struct zspage *zspage)
++{
++	write_lock(&zspage->lock);
++}
++
++static void migrate_write_unlock(struct zspage *zspage)
++{
++	write_unlock(&zspage->lock);
++}
++
++/* Number of isolated subpage for *page migration* in this zspage */
++static void inc_zspage_isolation(struct zspage *zspage)
++{
++	zspage->isolated++;
++}
++
++static void dec_zspage_isolation(struct zspage *zspage)
++{
++	zspage->isolated--;
++}
++
++static void replace_sub_page(struct size_class *class, struct zspage *zspage,
++				struct page *newpage, struct page *oldpage)
++{
++	struct page *page;
++	struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, };
++	int idx = 0;
++
++	page = get_first_page(zspage);
++	do {
++		if (page == oldpage)
++			pages[idx] = newpage;
++		else
++			pages[idx] = page;
++		idx++;
++	} while ((page = get_next_page(page)) != NULL);
++
++	create_page_chain(class, zspage, pages);
++	if (unlikely(PageHugeObject(oldpage)))
++		newpage->index = oldpage->index;
++	__SetPageMovable(newpage, page_mapping(oldpage));
++}
++
++bool zs_page_isolate(struct page *page, isolate_mode_t mode)
++{
++	struct zs_pool *pool;
++	struct size_class *class;
++	int class_idx;
++	enum fullness_group fullness;
++	struct zspage *zspage;
++	struct address_space *mapping;
++
++	/*
++	 * Page is locked so zspage couldn't be destroyed. For detail, look at
++	 * lock_zspage in free_zspage.
++	 */
++	VM_BUG_ON_PAGE(!PageMovable(page), page);
++	VM_BUG_ON_PAGE(PageIsolated(page), page);
++
++	zspage = get_zspage(page);
++
++	/*
++	 * Without class lock, fullness could be stale while class_idx is okay
++	 * because class_idx is constant unless page is freed so we should get
++	 * fullness again under class lock.
++	 */
++	get_zspage_mapping(zspage, &class_idx, &fullness);
++	mapping = page_mapping(page);
++	pool = mapping->private_data;
++	class = pool->size_class[class_idx];
++
++	spin_lock(&class->lock);
++	if (get_zspage_inuse(zspage) == 0) {
++		spin_unlock(&class->lock);
++		return false;
++	}
++
++	/* zspage is isolated for object migration */
++	if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
++		spin_unlock(&class->lock);
++		return false;
++	}
++
++	/*
++	 * If this is first time isolation for the zspage, isolate zspage from
++	 * size_class to prevent further object allocation from the zspage.
++	 */
++	if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
++		get_zspage_mapping(zspage, &class_idx, &fullness);
++		remove_zspage(class, zspage, fullness);
++	}
++
++	inc_zspage_isolation(zspage);
++	spin_unlock(&class->lock);
++
++	return true;
++}
++
++int zs_page_migrate(struct address_space *mapping, struct page *newpage,
++		struct page *page, enum migrate_mode mode)
++{
++	struct zs_pool *pool;
++	struct size_class *class;
++	int class_idx;
++	enum fullness_group fullness;
++	struct zspage *zspage;
++	struct page *dummy;
++	void *s_addr, *d_addr, *addr;
++	int offset, pos;
++	unsigned long handle, head;
++	unsigned long old_obj, new_obj;
++	unsigned int obj_idx;
++	int ret = -EAGAIN;
++
++	VM_BUG_ON_PAGE(!PageMovable(page), page);
++	VM_BUG_ON_PAGE(!PageIsolated(page), page);
++
++	zspage = get_zspage(page);
++
++	/* Concurrent compactor cannot migrate any subpage in zspage */
++	migrate_write_lock(zspage);
++	get_zspage_mapping(zspage, &class_idx, &fullness);
++	pool = mapping->private_data;
++	class = pool->size_class[class_idx];
++	offset = get_first_obj_offset(class, get_first_page(zspage), page);
++
++	spin_lock(&class->lock);
++	if (!get_zspage_inuse(zspage)) {
++		ret = -EBUSY;
++		goto unlock_class;
++	}
++
++	pos = offset;
++	s_addr = kmap_atomic(page);
++	while (pos < PAGE_SIZE) {
++		head = obj_to_head(page, s_addr + pos);
++		if (head & OBJ_ALLOCATED_TAG) {
++			handle = head & ~OBJ_ALLOCATED_TAG;
++			if (!trypin_tag(handle))
++				goto unpin_objects;
++		}
++		pos += class->size;
++	}
++
++	/*
++	 * Here, any user cannot access all objects in the zspage so let's move.
++	 */
++	d_addr = kmap_atomic(newpage);
++	memcpy(d_addr, s_addr, PAGE_SIZE);
++	kunmap_atomic(d_addr);
++
++	for (addr = s_addr + offset; addr < s_addr + pos;
++					addr += class->size) {
++		head = obj_to_head(page, addr);
++		if (head & OBJ_ALLOCATED_TAG) {
++			handle = head & ~OBJ_ALLOCATED_TAG;
++			if (!testpin_tag(handle))
++				BUG();
++
++			old_obj = handle_to_obj(handle);
++			obj_to_location(old_obj, &dummy, &obj_idx);
++			new_obj = (unsigned long)location_to_obj(newpage,
++								obj_idx);
++			new_obj |= BIT(HANDLE_PIN_BIT);
++			record_obj(handle, new_obj);
++		}
++	}
++
++	replace_sub_page(class, zspage, newpage, page);
++	get_page(newpage);
++
++	dec_zspage_isolation(zspage);
++
++	/*
++	 * Page migration is done so let's putback isolated zspage to
++	 * the list if @page is final isolated subpage in the zspage.
++	 */
++	if (!is_zspage_isolated(zspage))
++		putback_zspage(class, zspage);
++
++	reset_page(page);
++	put_page(page);
++	page = newpage;
++
++	ret = 0;
++unpin_objects:
++	for (addr = s_addr + offset; addr < s_addr + pos;
++						addr += class->size) {
++		head = obj_to_head(page, addr);
++		if (head & OBJ_ALLOCATED_TAG) {
++			handle = head & ~OBJ_ALLOCATED_TAG;
++			if (!testpin_tag(handle))
++				BUG();
++			unpin_tag(handle);
++		}
++	}
++	kunmap_atomic(s_addr);
++unlock_class:
++	spin_unlock(&class->lock);
++	migrate_write_unlock(zspage);
++
++	return ret;
++}
++
++void zs_page_putback(struct page *page)
++{
++	struct zs_pool *pool;
++	struct size_class *class;
++	int class_idx;
++	enum fullness_group fg;
++	struct address_space *mapping;
++	struct zspage *zspage;
++
++	VM_BUG_ON_PAGE(!PageMovable(page), page);
++	VM_BUG_ON_PAGE(!PageIsolated(page), page);
++
++	zspage = get_zspage(page);
++	get_zspage_mapping(zspage, &class_idx, &fg);
++	mapping = page_mapping(page);
++	pool = mapping->private_data;
++	class = pool->size_class[class_idx];
++
++	spin_lock(&class->lock);
++	dec_zspage_isolation(zspage);
++	if (!is_zspage_isolated(zspage)) {
++		fg = putback_zspage(class, zspage);
++		/*
++		 * Due to page_lock, we cannot free zspage immediately
++		 * so let's defer.
++		 */
++		if (fg == ZS_EMPTY)
++			schedule_work(&pool->free_work);
++	}
++	spin_unlock(&class->lock);
++}
++
++const struct address_space_operations zsmalloc_aops = {
++	.isolate_page = zs_page_isolate,
++	.migratepage = zs_page_migrate,
++	.putback_page = zs_page_putback,
++};
++
++static int zs_register_migration(struct zs_pool *pool)
++{
++	pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb);
++	if (IS_ERR(pool->inode)) {
++		pool->inode = NULL;
++		return 1;
++	}
++
++	pool->inode->i_mapping->private_data = pool;
++	pool->inode->i_mapping->a_ops = &zsmalloc_aops;
++	return 0;
++}
++
++static void zs_unregister_migration(struct zs_pool *pool)
++{
++	flush_work(&pool->free_work);
++	if (pool->inode)
++		iput(pool->inode);
++}
++
++/*
++ * Caller should hold page_lock of all pages in the zspage
++ * In here, we cannot use zspage meta data.
++ */
++static void async_free_zspage(struct work_struct *work)
++{
++	int i;
++	struct size_class *class;
++	unsigned int class_idx;
++	enum fullness_group fullness;
++	struct zspage *zspage, *tmp;
++	LIST_HEAD(free_pages);
++	struct zs_pool *pool = container_of(work, struct zs_pool,
++					free_work);
++
++	for (i = 0; i < zs_size_classes; i++) {
++		class = pool->size_class[i];
++		if (class->index != i)
++			continue;
++
++		spin_lock(&class->lock);
++		list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);
++		spin_unlock(&class->lock);
++	}
++
++
++	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
++		list_del(&zspage->list);
++		lock_zspage(zspage);
++
++		get_zspage_mapping(zspage, &class_idx, &fullness);
++		VM_BUG_ON(fullness != ZS_EMPTY);
++		class = pool->size_class[class_idx];
++		spin_lock(&class->lock);
++		__free_zspage(pool, pool->size_class[class_idx], zspage);
++		spin_unlock(&class->lock);
++	}
++};
++
++static void kick_deferred_free(struct zs_pool *pool)
++{
++	schedule_work(&pool->free_work);
++}
++
++static void init_deferred_free(struct zs_pool *pool)
++{
++	INIT_WORK(&pool->free_work, async_free_zspage);
++}
++
++static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage)
++{
++	struct page *page = get_first_page(zspage);
++
++	do {
++		WARN_ON(!trylock_page(page));
++		__SetPageMovable(page, pool->inode->i_mapping);
++		unlock_page(page);
++	} while ((page = get_next_page(page)) != NULL);
++}
++#endif
++
+ /*
+  *
+  * Based on the number of unused allocated objects calculate
+@@ -1745,10 +2287,10 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
+ 			break;
+ 
+ 		cc.index = 0;
+-		cc.s_page = src_zspage->first_page;
++		cc.s_page = get_first_page(src_zspage);
+ 
+ 		while ((dst_zspage = isolate_zspage(class, false))) {
+-			cc.d_page = dst_zspage->first_page;
++			cc.d_page = get_first_page(dst_zspage);
+ 			/*
+ 			 * If there is no more space in dst_page, resched
+ 			 * and see if anyone had allocated another zspage.
+@@ -1765,11 +2307,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
+ 
+ 		putback_zspage(class, dst_zspage);
+ 		if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
+-			zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
+-					class->size, class->pages_per_zspage));
+-			atomic_long_sub(class->pages_per_zspage,
+-					&pool->pages_allocated);
+-			free_zspage(pool, src_zspage);
++			free_zspage(pool, class, src_zspage);
+ 			pool->stats.pages_compacted += class->pages_per_zspage;
+ 		}
+ 		spin_unlock(&class->lock);
+@@ -1885,6 +2423,7 @@ struct zs_pool *zs_create_pool(const char *name)
+ 	if (!pool)
+ 		return NULL;
+ 
++	init_deferred_free(pool);
+ 	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
+ 			GFP_KERNEL);
+ 	if (!pool->size_class) {
+@@ -1939,12 +2478,10 @@ struct zs_pool *zs_create_pool(const char *name)
+ 		class->pages_per_zspage = pages_per_zspage;
+ 		class->objs_per_zspage = class->pages_per_zspage *
+ 						PAGE_SIZE / class->size;
+-		if (pages_per_zspage == 1 && class->objs_per_zspage == 1)
+-			class->huge = true;
+ 		spin_lock_init(&class->lock);
+ 		pool->size_class[i] = class;
+-		for (fullness = ZS_ALMOST_FULL; fullness <= ZS_ALMOST_EMPTY;
+-								fullness++)
++		for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
++							fullness++)
+ 			INIT_LIST_HEAD(&class->fullness_list[fullness]);
+ 
+ 		prev_class = class;
+@@ -1953,6 +2490,9 @@ struct zs_pool *zs_create_pool(const char *name)
+ 	/* debug only, don't abort if it fails */
+ 	zs_pool_stat_create(pool, name);
+ 
++	if (zs_register_migration(pool))
++		goto err;
++
+ 	/*
+ 	 * Not critical, we still can use the pool
+ 	 * and user can trigger compaction manually.
+@@ -1972,6 +2512,7 @@ void zs_destroy_pool(struct zs_pool *pool)
+ 	int i;
+ 
+ 	zs_unregister_shrinker(pool);
++	zs_unregister_migration(pool);
+ 	zs_pool_stat_destroy(pool);
+ 
+ 	for (i = 0; i < zs_size_classes; i++) {
+@@ -1984,7 +2525,7 @@ void zs_destroy_pool(struct zs_pool *pool)
+ 		if (class->index != i)
+ 			continue;
+ 
+-		for (fg = ZS_ALMOST_FULL; fg <= ZS_ALMOST_EMPTY; fg++) {
++		for (fg = ZS_EMPTY; fg < NR_ZS_FULLNESS; fg++) {
+ 			if (!list_empty(&class->fullness_list[fg])) {
+ 				pr_info("Freeing non-empty class with size %db, fullness group %d\n",
+ 					class->size, fg);
+@@ -2002,7 +2543,13 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);
+ 
+ static int __init zs_init(void)
+ {
+-	int ret = zs_register_cpu_notifier();
++	int ret;
++
++	ret = zsmalloc_mount();
++	if (ret)
++		goto out;
++
++	ret = zs_register_cpu_notifier();
+ 
+ 	if (ret)
+ 		goto notifier_fail;
+@@ -2019,7 +2566,8 @@ static int __init zs_init(void)
+ 
+ notifier_fail:
+ 	zs_unregister_cpu_notifier();
+-
++	zsmalloc_unmount();
++out:
+ 	return ret;
+ }
+ 
+@@ -2028,6 +2576,7 @@ static void __exit zs_exit(void)
+ #ifdef CONFIG_ZPOOL
+ 	zpool_unregister_driver(&zs_zpool_driver);
+ #endif
++	zsmalloc_unmount();
+ 	zs_unregister_cpu_notifier();
+ 
+ 	zs_stat_exit();
+-- 
+1.9.1
diff --git a/a/content_digest b/N1/content_digest
index f11822e..9e5d209 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -4,11 +4,1383 @@
  "Subject\0[PATCH v6r2 11/12] zsmalloc: page migration support\0"
  "Date\0Fri, 27 May 2016 06:50:22 +0900\0"
  "To\0Andrew Morton <akpm@linux-foundation.org>\0"
- "Cc\0linux-mm@kvack.org"
-  linux-kernel@vger.kernel.org
+ "Cc\0<linux-mm@kvack.org>"
+  <linux-kernel@vger.kernel.org>
  " Sergey Senozhatsky <sergey.senozhatsky@gmail.com>\0"
  "\00:1\0"
  "b\0"
- Follow up Sergey's review
+ "Follow up Sergey's review\n"
+ "\n"
+ ">From 2deede28c91910a9d3493feae30bed507e72f213 Mon Sep 17 00:00:00 2001\n"
+ "From: Minchan Kim <minchan@kernel.org>\n"
+ "Date: Thu, 5 May 2016 00:01:03 +0900\n"
+ "Subject: [PATCH v6r2] zsmalloc: page migration support\n"
+ "\n"
+ "This patch introduces run-time migration feature for zspage.\n"
+ "\n"
+ "For migration, VM uses page.lru field so it would be better to not use\n"
+ "page.next field which is unified with page.lru for own purpose.\n"
+ "For that, firstly, we can get first object offset of the page via\n"
+ "runtime calculation instead of using page.index so we can use\n"
+ "page.index as link for page chaining instead of page.next.\n"
+ "\n"
+ "In case of huge object, it stores handle to page.index instead of\n"
+ "next link of page chaining because huge object doesn't need to next\n"
+ "link for page chaining. So get_next_page need to identify huge\n"
+ "object to return NULL. For it, this patch uses PG_owner_priv_1 flag\n"
+ "of the page flag.\n"
+ "\n"
+ "For migration, it supports three functions\n"
+ "\n"
+ "* zs_page_isolate\n"
+ "\n"
+ "It isolates a zspage which includes a subpage VM want to migrate\n"
+ "from class so anyone cannot allocate new object from the zspage.\n"
+ "\n"
+ "We could try to isolate a zspage by the number of subpage so\n"
+ "subsequent isolation trial of other subpage of the zpsage shouldn't\n"
+ "fail. For that, we introduce zspage.isolated count. With that,\n"
+ "zs_page_isolate can know whether zspage is already isolated or not\n"
+ "for migration so if it is isolated for migration, subsequent\n"
+ "isolation trial can be successful without trying further isolation.\n"
+ "\n"
+ "* zs_page_migrate\n"
+ "\n"
+ "First of all, it holds write-side zspage->lock to prevent migrate other\n"
+ "subpage in zspage. Then, lock all objects in the page VM want to migrate.\n"
+ "The reason we should lock all objects in the page is due to race between\n"
+ "zs_map_object and zs_page_migrate.\n"
+ "\n"
+ "zs_map_object\t\t\t\tzs_page_migrate\n"
+ "\n"
+ "pin_tag(handle)\n"
+ "obj = handle_to_obj(handle)\n"
+ "obj_to_location(obj, &page, &obj_idx);\n"
+ "\n"
+ "\t\t\t\t\twrite_lock(&zspage->lock)\n"
+ "\t\t\t\t\tif (!trypin_tag(handle))\n"
+ "\t\t\t\t\t\tgoto unpin_object\n"
+ "\n"
+ "zspage = get_zspage(page);\n"
+ "read_lock(&zspage->lock);\n"
+ "\n"
+ "If zs_page_migrate doesn't do trypin_tag, zs_map_object's page can\n"
+ "be stale by migration so it goes crash.\n"
+ "\n"
+ "If it locks all of objects successfully, it copies content from\n"
+ "old page to new one, finally, create new zspage chain with new page.\n"
+ "And if it's last isolated subpage in the zspage, put the zspage back\n"
+ "to class.\n"
+ "\n"
+ "* zs_page_putback\n"
+ "\n"
+ "It returns isolated zspage to right fullness_group list if it fails to\n"
+ "migrate a page. If it find a zspage is ZS_EMPTY, it queues zspage\n"
+ "freeing to workqueue. See below about async zspage freeing.\n"
+ "\n"
+ "This patch introduces asynchronous zspage free. The reason to need it\n"
+ "is we need page_lock to clear PG_movable but unfortunately,\n"
+ "zs_free path should be atomic so the apporach is try to grab page_lock.\n"
+ "If it got page_lock of all of pages successfully, it can free zspage\n"
+ "immediately. Otherwise, it queues free request and free zspage via\n"
+ "workqueue in process context.\n"
+ "\n"
+ "If zs_free finds the zspage is isolated when it try to free zspage,\n"
+ "it delays the freeing until zs_page_putback finds it so it will free\n"
+ "free the zspage finally.\n"
+ "\n"
+ "In this patch, we expand fullness_list from ZS_EMPTY to ZS_FULL.\n"
+ "First of all, it will use ZS_EMPTY list for delay freeing.\n"
+ "And with adding ZS_FULL list, it makes to identify whether zspage is\n"
+ "isolated or not via list_empty(&zspage->list) test.\n"
+ "\n"
+ "Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "---\n"
+ " include/uapi/linux/magic.h |   1 +\n"
+ " mm/zsmalloc.c              | 793 ++++++++++++++++++++++++++++++++++++++-------\n"
+ " 2 files changed, 672 insertions(+), 122 deletions(-)\n"
+ "\n"
+ "diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h\n"
+ "index d829ce63529d..e398beac67b8 100644\n"
+ "--- a/include/uapi/linux/magic.h\n"
+ "+++ b/include/uapi/linux/magic.h\n"
+ "@@ -81,5 +81,6 @@\n"
+ " /* Since UDF 2.01 is ISO 13346 based... */\n"
+ " #define UDF_SUPER_MAGIC\t\t0x15013346\n"
+ " #define BALLOON_KVM_MAGIC\t0x13661366\n"
+ "+#define ZSMALLOC_MAGIC\t\t0x58295829\n"
+ " \n"
+ " #endif /* __LINUX_MAGIC_H__ */\n"
+ "diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c\n"
+ "index c6fb543cfb98..a80100db16d6 100644\n"
+ "--- a/mm/zsmalloc.c\n"
+ "+++ b/mm/zsmalloc.c\n"
+ "@@ -17,14 +17,14 @@\n"
+ "  *\n"
+ "  * Usage of struct page fields:\n"
+ "  *\tpage->private: points to zspage\n"
+ "- *\tpage->index: offset of the first object starting in this page.\n"
+ "- *\t\tFor the first page, this is always 0, so we use this field\n"
+ "- *\t\tto store handle for huge object.\n"
+ "- *\tpage->next: links together all component pages of a zspage\n"
+ "+ *\tpage->freelist(index): links together all component pages of a zspage\n"
+ "+ *\t\tFor the huge page, this is always 0, so we use this field\n"
+ "+ *\t\tto store handle.\n"
+ "  *\n"
+ "  * Usage of struct page flags:\n"
+ "  *\tPG_private: identifies the first component page\n"
+ "  *\tPG_private2: identifies the last component page\n"
+ "+ *\tPG_owner_priv_1: indentifies the huge component page\n"
+ "  *\n"
+ "  */\n"
+ " \n"
+ "@@ -49,6 +49,11 @@\n"
+ " #include <linux/debugfs.h>\n"
+ " #include <linux/zsmalloc.h>\n"
+ " #include <linux/zpool.h>\n"
+ "+#include <linux/mount.h>\n"
+ "+#include <linux/compaction.h>\n"
+ "+#include <linux/pagemap.h>\n"
+ "+\n"
+ "+#define ZSPAGE_MAGIC\t0x58\n"
+ " \n"
+ " /*\n"
+ "  * This must be power of 2 and greater than of equal to sizeof(link_free).\n"
+ "@@ -136,25 +141,23 @@\n"
+ "  * We do not maintain any list for completely empty or full pages\n"
+ "  */\n"
+ " enum fullness_group {\n"
+ "-\tZS_ALMOST_FULL,\n"
+ "-\tZS_ALMOST_EMPTY,\n"
+ " \tZS_EMPTY,\n"
+ "-\tZS_FULL\n"
+ "+\tZS_ALMOST_EMPTY,\n"
+ "+\tZS_ALMOST_FULL,\n"
+ "+\tZS_FULL,\n"
+ "+\tNR_ZS_FULLNESS,\n"
+ " };\n"
+ " \n"
+ " enum zs_stat_type {\n"
+ "+\tCLASS_EMPTY,\n"
+ "+\tCLASS_ALMOST_EMPTY,\n"
+ "+\tCLASS_ALMOST_FULL,\n"
+ "+\tCLASS_FULL,\n"
+ " \tOBJ_ALLOCATED,\n"
+ " \tOBJ_USED,\n"
+ "-\tCLASS_ALMOST_FULL,\n"
+ "-\tCLASS_ALMOST_EMPTY,\n"
+ "+\tNR_ZS_STAT_TYPE,\n"
+ " };\n"
+ " \n"
+ "-#ifdef CONFIG_ZSMALLOC_STAT\n"
+ "-#define NR_ZS_STAT_TYPE\t(CLASS_ALMOST_EMPTY + 1)\n"
+ "-#else\n"
+ "-#define NR_ZS_STAT_TYPE\t(OBJ_USED + 1)\n"
+ "-#endif\n"
+ "-\n"
+ " struct zs_size_stat {\n"
+ " \tunsigned long objs[NR_ZS_STAT_TYPE];\n"
+ " };\n"
+ "@@ -163,6 +166,10 @@ struct zs_size_stat {\n"
+ " static struct dentry *zs_stat_root;\n"
+ " #endif\n"
+ " \n"
+ "+#ifdef CONFIG_COMPACTION\n"
+ "+static struct vfsmount *zsmalloc_mnt;\n"
+ "+#endif\n"
+ "+\n"
+ " /*\n"
+ "  * number of size_classes\n"
+ "  */\n"
+ "@@ -186,23 +193,36 @@ static const int fullness_threshold_frac = 4;\n"
+ " \n"
+ " struct size_class {\n"
+ " \tspinlock_t lock;\n"
+ "-\tstruct list_head fullness_list[2];\n"
+ "+\tstruct list_head fullness_list[NR_ZS_FULLNESS];\n"
+ " \t/*\n"
+ " \t * Size of objects stored in this class. Must be multiple\n"
+ " \t * of ZS_ALIGN.\n"
+ " \t */\n"
+ " \tint size;\n"
+ " \tint objs_per_zspage;\n"
+ "-\tunsigned int index;\n"
+ "-\n"
+ "-\tstruct zs_size_stat stats;\n"
+ "-\n"
+ " \t/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */\n"
+ " \tint pages_per_zspage;\n"
+ "-\t/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */\n"
+ "-\tbool huge;\n"
+ "+\n"
+ "+\tunsigned int index;\n"
+ "+\tstruct zs_size_stat stats;\n"
+ " };\n"
+ " \n"
+ "+/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */\n"
+ "+static void SetPageHugeObject(struct page *page)\n"
+ "+{\n"
+ "+\tSetPageOwnerPriv1(page);\n"
+ "+}\n"
+ "+\n"
+ "+static void ClearPageHugeObject(struct page *page)\n"
+ "+{\n"
+ "+\tClearPageOwnerPriv1(page);\n"
+ "+}\n"
+ "+\n"
+ "+static int PageHugeObject(struct page *page)\n"
+ "+{\n"
+ "+\treturn PageOwnerPriv1(page);\n"
+ "+}\n"
+ "+\n"
+ " /*\n"
+ "  * Placed within free objects to form a singly linked list.\n"
+ "  * For every zspage, zspage->freeobj gives head of this list.\n"
+ "@@ -244,6 +264,10 @@ struct zs_pool {\n"
+ " #ifdef CONFIG_ZSMALLOC_STAT\n"
+ " \tstruct dentry *stat_dentry;\n"
+ " #endif\n"
+ "+#ifdef CONFIG_COMPACTION\n"
+ "+\tstruct inode *inode;\n"
+ "+\tstruct work_struct free_work;\n"
+ "+#endif\n"
+ " };\n"
+ " \n"
+ " /*\n"
+ "@@ -252,16 +276,23 @@ struct zs_pool {\n"
+ "  */\n"
+ " #define FULLNESS_BITS\t2\n"
+ " #define CLASS_BITS\t8\n"
+ "+#define ISOLATED_BITS\t3\n"
+ "+#define MAGIC_VAL_BITS\t8\n"
+ " \n"
+ " struct zspage {\n"
+ " \tstruct {\n"
+ " \t\tunsigned int fullness:FULLNESS_BITS;\n"
+ " \t\tunsigned int class:CLASS_BITS;\n"
+ "+\t\tunsigned int isolated:ISOLATED_BITS;\n"
+ "+\t\tunsigned int magic:MAGIC_VAL_BITS;\n"
+ " \t};\n"
+ " \tunsigned int inuse;\n"
+ " \tunsigned int freeobj;\n"
+ " \tstruct page *first_page;\n"
+ " \tstruct list_head list; /* fullness list */\n"
+ "+#ifdef CONFIG_COMPACTION\n"
+ "+\trwlock_t lock;\n"
+ "+#endif\n"
+ " };\n"
+ " \n"
+ " struct mapping_area {\n"
+ "@@ -274,6 +305,28 @@ struct mapping_area {\n"
+ " \tenum zs_mapmode vm_mm; /* mapping mode */\n"
+ " };\n"
+ " \n"
+ "+#ifdef CONFIG_COMPACTION\n"
+ "+static int zs_register_migration(struct zs_pool *pool);\n"
+ "+static void zs_unregister_migration(struct zs_pool *pool);\n"
+ "+static void migrate_lock_init(struct zspage *zspage);\n"
+ "+static void migrate_read_lock(struct zspage *zspage);\n"
+ "+static void migrate_read_unlock(struct zspage *zspage);\n"
+ "+static void kick_deferred_free(struct zs_pool *pool);\n"
+ "+static void init_deferred_free(struct zs_pool *pool);\n"
+ "+static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);\n"
+ "+#else\n"
+ "+static int zsmalloc_mount(void) { return 0; }\n"
+ "+static void zsmalloc_unmount(void) {}\n"
+ "+static int zs_register_migration(struct zs_pool *pool) { return 0; }\n"
+ "+static void zs_unregister_migration(struct zs_pool *pool) {}\n"
+ "+static void migrate_lock_init(struct zspage *zspage) {}\n"
+ "+static void migrate_read_lock(struct zspage *zspage) {}\n"
+ "+static void migrate_read_unlock(struct zspage *zspage) {}\n"
+ "+static void kick_deferred_free(struct zs_pool *pool) {}\n"
+ "+static void init_deferred_free(struct zs_pool *pool) {}\n"
+ "+static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}\n"
+ "+#endif\n"
+ "+\n"
+ " static int create_cache(struct zs_pool *pool)\n"
+ " {\n"
+ " \tpool->handle_cachep = kmem_cache_create(\"zs_handle\", ZS_HANDLE_SIZE,\n"
+ "@@ -301,7 +354,7 @@ static void destroy_cache(struct zs_pool *pool)\n"
+ " static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)\n"
+ " {\n"
+ " \treturn (unsigned long)kmem_cache_alloc(pool->handle_cachep,\n"
+ "-\t\t\tgfp & ~__GFP_HIGHMEM);\n"
+ "+\t\t\tgfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));\n"
+ " }\n"
+ " \n"
+ " static void cache_free_handle(struct zs_pool *pool, unsigned long handle)\n"
+ "@@ -311,7 +364,8 @@ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)\n"
+ " \n"
+ " static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)\n"
+ " {\n"
+ "-\treturn kmem_cache_alloc(pool->zspage_cachep, flags & ~__GFP_HIGHMEM);\n"
+ "+\treturn kmem_cache_alloc(pool->zspage_cachep,\n"
+ "+\t\t\tflags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));\n"
+ " };\n"
+ " \n"
+ " static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)\n"
+ "@@ -421,11 +475,17 @@ static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)\n"
+ " /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */\n"
+ " static DEFINE_PER_CPU(struct mapping_area, zs_map_area);\n"
+ " \n"
+ "+static bool is_zspage_isolated(struct zspage *zspage)\n"
+ "+{\n"
+ "+\treturn zspage->isolated;\n"
+ "+}\n"
+ "+\n"
+ " static int is_first_page(struct page *page)\n"
+ " {\n"
+ " \treturn PagePrivate(page);\n"
+ " }\n"
+ " \n"
+ "+/* Protected by class->lock */\n"
+ " static inline int get_zspage_inuse(struct zspage *zspage)\n"
+ " {\n"
+ " \treturn zspage->inuse;\n"
+ "@@ -441,20 +501,12 @@ static inline void mod_zspage_inuse(struct zspage *zspage, int val)\n"
+ " \tzspage->inuse += val;\n"
+ " }\n"
+ " \n"
+ "-static inline int get_first_obj_offset(struct page *page)\n"
+ "+static inline struct page *get_first_page(struct zspage *zspage)\n"
+ " {\n"
+ "-\tif (is_first_page(page))\n"
+ "-\t\treturn 0;\n"
+ "+\tstruct page *first_page = zspage->first_page;\n"
+ " \n"
+ "-\treturn page->index;\n"
+ "-}\n"
+ "-\n"
+ "-static inline void set_first_obj_offset(struct page *page, int offset)\n"
+ "-{\n"
+ "-\tif (is_first_page(page))\n"
+ "-\t\treturn;\n"
+ "-\n"
+ "-\tpage->index = offset;\n"
+ "+\tVM_BUG_ON_PAGE(!is_first_page(first_page), first_page);\n"
+ "+\treturn first_page;\n"
+ " }\n"
+ " \n"
+ " static inline unsigned int get_freeobj(struct zspage *zspage)\n"
+ "@@ -471,6 +523,8 @@ static void get_zspage_mapping(struct zspage *zspage,\n"
+ " \t\t\t\tunsigned int *class_idx,\n"
+ " \t\t\t\tenum fullness_group *fullness)\n"
+ " {\n"
+ "+\tVM_BUG_ON(zspage->magic != ZSPAGE_MAGIC);\n"
+ "+\n"
+ " \t*fullness = zspage->fullness;\n"
+ " \t*class_idx = zspage->class;\n"
+ " }\n"
+ "@@ -504,23 +558,19 @@ static int get_size_class_index(int size)\n"
+ " static inline void zs_stat_inc(struct size_class *class,\n"
+ " \t\t\t\tenum zs_stat_type type, unsigned long cnt)\n"
+ " {\n"
+ "-\tif (type < NR_ZS_STAT_TYPE)\n"
+ "-\t\tclass->stats.objs[type] += cnt;\n"
+ "+\tclass->stats.objs[type] += cnt;\n"
+ " }\n"
+ " \n"
+ " static inline void zs_stat_dec(struct size_class *class,\n"
+ " \t\t\t\tenum zs_stat_type type, unsigned long cnt)\n"
+ " {\n"
+ "-\tif (type < NR_ZS_STAT_TYPE)\n"
+ "-\t\tclass->stats.objs[type] -= cnt;\n"
+ "+\tclass->stats.objs[type] -= cnt;\n"
+ " }\n"
+ " \n"
+ " static inline unsigned long zs_stat_get(struct size_class *class,\n"
+ " \t\t\t\tenum zs_stat_type type)\n"
+ " {\n"
+ "-\tif (type < NR_ZS_STAT_TYPE)\n"
+ "-\t\treturn class->stats.objs[type];\n"
+ "-\treturn 0;\n"
+ "+\treturn class->stats.objs[type];\n"
+ " }\n"
+ " \n"
+ " #ifdef CONFIG_ZSMALLOC_STAT\n"
+ "@@ -664,6 +714,7 @@ static inline void zs_pool_stat_destroy(struct zs_pool *pool)\n"
+ " }\n"
+ " #endif\n"
+ " \n"
+ "+\n"
+ " /*\n"
+ "  * For each size class, zspages are divided into different groups\n"
+ "  * depending on how \"full\" they are. This was done so that we could\n"
+ "@@ -704,15 +755,9 @@ static void insert_zspage(struct size_class *class,\n"
+ " {\n"
+ " \tstruct zspage *head;\n"
+ " \n"
+ "-\tif (fullness >= ZS_EMPTY)\n"
+ "-\t\treturn;\n"
+ "-\n"
+ "+\tzs_stat_inc(class, fullness, 1);\n"
+ " \thead = list_first_entry_or_null(&class->fullness_list[fullness],\n"
+ " \t\t\t\t\tstruct zspage, list);\n"
+ "-\n"
+ "-\tzs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?\n"
+ "-\t\t\tCLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);\n"
+ "-\n"
+ " \t/*\n"
+ " \t * We want to see more ZS_FULL pages and less almost empty/full.\n"
+ " \t * Put pages with higher ->inuse first.\n"
+ "@@ -734,14 +779,11 @@ static void remove_zspage(struct size_class *class,\n"
+ " \t\t\t\tstruct zspage *zspage,\n"
+ " \t\t\t\tenum fullness_group fullness)\n"
+ " {\n"
+ "-\tif (fullness >= ZS_EMPTY)\n"
+ "-\t\treturn;\n"
+ "-\n"
+ " \tVM_BUG_ON(list_empty(&class->fullness_list[fullness]));\n"
+ "+\tVM_BUG_ON(is_zspage_isolated(zspage));\n"
+ " \n"
+ " \tlist_del_init(&zspage->list);\n"
+ "-\tzs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ?\n"
+ "-\t\t\tCLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);\n"
+ "+\tzs_stat_dec(class, fullness, 1);\n"
+ " }\n"
+ " \n"
+ " /*\n"
+ "@@ -764,8 +806,11 @@ static enum fullness_group fix_fullness_group(struct size_class *class,\n"
+ " \tif (newfg == currfg)\n"
+ " \t\tgoto out;\n"
+ " \n"
+ "-\tremove_zspage(class, zspage, currfg);\n"
+ "-\tinsert_zspage(class, zspage, newfg);\n"
+ "+\tif (!is_zspage_isolated(zspage)) {\n"
+ "+\t\tremove_zspage(class, zspage, currfg);\n"
+ "+\t\tinsert_zspage(class, zspage, newfg);\n"
+ "+\t}\n"
+ "+\n"
+ " \tset_zspage_mapping(zspage, class_idx, newfg);\n"
+ " \n"
+ " out:\n"
+ "@@ -808,19 +853,45 @@ static int get_pages_per_zspage(int class_size)\n"
+ " \treturn max_usedpc_order;\n"
+ " }\n"
+ " \n"
+ "-static struct page *get_first_page(struct zspage *zspage)\n"
+ "+static struct zspage *get_zspage(struct page *page)\n"
+ " {\n"
+ "-\treturn zspage->first_page;\n"
+ "+\tstruct zspage *zspage = (struct zspage *)page->private;\n"
+ "+\n"
+ "+\tVM_BUG_ON(zspage->magic != ZSPAGE_MAGIC);\n"
+ "+\treturn zspage;\n"
+ " }\n"
+ " \n"
+ "-static struct zspage *get_zspage(struct page *page)\n"
+ "+static struct page *get_next_page(struct page *page)\n"
+ " {\n"
+ "-\treturn (struct zspage *)page->private;\n"
+ "+\tif (unlikely(PageHugeObject(page)))\n"
+ "+\t\treturn NULL;\n"
+ "+\n"
+ "+\treturn page->freelist;\n"
+ " }\n"
+ " \n"
+ "-static struct page *get_next_page(struct page *page)\n"
+ "+/* Get byte offset of first object in the @page */\n"
+ "+static int get_first_obj_offset(struct size_class *class,\n"
+ "+\t\t\t\tstruct page *first_page, struct page *page)\n"
+ " {\n"
+ "-\treturn page->next;\n"
+ "+\tint pos;\n"
+ "+\tint page_idx = 0;\n"
+ "+\tint ofs = 0;\n"
+ "+\tstruct page *cursor = first_page;\n"
+ "+\n"
+ "+\tif (first_page == page)\n"
+ "+\t\tgoto out;\n"
+ "+\n"
+ "+\twhile (page != cursor) {\n"
+ "+\t\tpage_idx++;\n"
+ "+\t\tcursor = get_next_page(cursor);\n"
+ "+\t}\n"
+ "+\n"
+ "+\tpos = class->objs_per_zspage * class->size *\n"
+ "+\t\tpage_idx / class->pages_per_zspage;\n"
+ "+\n"
+ "+\tofs = (pos + class->size) % PAGE_SIZE;\n"
+ "+out:\n"
+ "+\treturn ofs;\n"
+ " }\n"
+ " \n"
+ " /**\n"
+ "@@ -857,16 +928,20 @@ static unsigned long handle_to_obj(unsigned long handle)\n"
+ " \treturn *(unsigned long *)handle;\n"
+ " }\n"
+ " \n"
+ "-static unsigned long obj_to_head(struct size_class *class, struct page *page,\n"
+ "-\t\t\tvoid *obj)\n"
+ "+static unsigned long obj_to_head(struct page *page, void *obj)\n"
+ " {\n"
+ "-\tif (class->huge) {\n"
+ "+\tif (unlikely(PageHugeObject(page))) {\n"
+ " \t\tVM_BUG_ON_PAGE(!is_first_page(page), page);\n"
+ " \t\treturn page->index;\n"
+ " \t} else\n"
+ " \t\treturn *(unsigned long *)obj;\n"
+ " }\n"
+ " \n"
+ "+static inline int testpin_tag(unsigned long handle)\n"
+ "+{\n"
+ "+\treturn bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);\n"
+ "+}\n"
+ "+\n"
+ " static inline int trypin_tag(unsigned long handle)\n"
+ " {\n"
+ " \treturn bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);\n"
+ "@@ -884,27 +959,93 @@ static void unpin_tag(unsigned long handle)\n"
+ " \n"
+ " static void reset_page(struct page *page)\n"
+ " {\n"
+ "+\t__ClearPageMovable(page);\n"
+ " \tclear_bit(PG_private, &page->flags);\n"
+ " \tclear_bit(PG_private_2, &page->flags);\n"
+ " \tset_page_private(page, 0);\n"
+ "-\tpage->index = 0;\n"
+ "+\tClearPageHugeObject(page);\n"
+ "+\tpage->freelist = NULL;\n"
+ " }\n"
+ " \n"
+ "-static void free_zspage(struct zs_pool *pool, struct zspage *zspage)\n"
+ "+/*\n"
+ "+ * To prevent zspage destroy during migration, zspage freeing should\n"
+ "+ * hold locks of all pages in the zspage.\n"
+ "+ */\n"
+ "+void lock_zspage(struct zspage *zspage)\n"
+ "+{\n"
+ "+\tstruct page *page = get_first_page(zspage);\n"
+ "+\n"
+ "+\tdo {\n"
+ "+\t\tlock_page(page);\n"
+ "+\t} while ((page = get_next_page(page)) != NULL);\n"
+ "+}\n"
+ "+\n"
+ "+int trylock_zspage(struct zspage *zspage)\n"
+ "+{\n"
+ "+\tstruct page *cursor, *fail;\n"
+ "+\n"
+ "+\tfor (cursor = get_first_page(zspage); cursor != NULL; cursor =\n"
+ "+\t\t\t\t\tget_next_page(cursor)) {\n"
+ "+\t\tif (!trylock_page(cursor)) {\n"
+ "+\t\t\tfail = cursor;\n"
+ "+\t\t\tgoto unlock;\n"
+ "+\t\t}\n"
+ "+\t}\n"
+ "+\n"
+ "+\treturn 1;\n"
+ "+unlock:\n"
+ "+\tfor (cursor = get_first_page(zspage); cursor != fail; cursor =\n"
+ "+\t\t\t\t\tget_next_page(cursor))\n"
+ "+\t\tunlock_page(cursor);\n"
+ "+\n"
+ "+\treturn 0;\n"
+ "+}\n"
+ "+\n"
+ "+static void __free_zspage(struct zs_pool *pool, struct size_class *class,\n"
+ "+\t\t\t\tstruct zspage *zspage)\n"
+ " {\n"
+ " \tstruct page *page, *next;\n"
+ "+\tenum fullness_group fg;\n"
+ "+\tunsigned int class_idx;\n"
+ "+\n"
+ "+\tget_zspage_mapping(zspage, &class_idx, &fg);\n"
+ "+\n"
+ "+\tassert_spin_locked(&class->lock);\n"
+ " \n"
+ " \tVM_BUG_ON(get_zspage_inuse(zspage));\n"
+ "+\tVM_BUG_ON(fg != ZS_EMPTY);\n"
+ " \n"
+ "-\tnext = page = zspage->first_page;\n"
+ "+\tnext = page = get_first_page(zspage);\n"
+ " \tdo {\n"
+ "-\t\tnext = page->next;\n"
+ "+\t\tVM_BUG_ON_PAGE(!PageLocked(page), page);\n"
+ "+\t\tnext = get_next_page(page);\n"
+ " \t\treset_page(page);\n"
+ "+\t\tunlock_page(page);\n"
+ " \t\tput_page(page);\n"
+ " \t\tpage = next;\n"
+ " \t} while (page != NULL);\n"
+ " \n"
+ " \tcache_free_zspage(pool, zspage);\n"
+ "+\n"
+ "+\tzs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(\n"
+ "+\t\t\tclass->size, class->pages_per_zspage));\n"
+ "+\tatomic_long_sub(class->pages_per_zspage,\n"
+ "+\t\t\t\t\t&pool->pages_allocated);\n"
+ "+}\n"
+ "+\n"
+ "+static void free_zspage(struct zs_pool *pool, struct size_class *class,\n"
+ "+\t\t\t\tstruct zspage *zspage)\n"
+ "+{\n"
+ "+\tVM_BUG_ON(get_zspage_inuse(zspage));\n"
+ "+\tVM_BUG_ON(list_empty(&zspage->list));\n"
+ "+\n"
+ "+\tif (!trylock_zspage(zspage)) {\n"
+ "+\t\tkick_deferred_free(pool);\n"
+ "+\t\treturn;\n"
+ "+\t}\n"
+ "+\n"
+ "+\tremove_zspage(class, zspage, ZS_EMPTY);\n"
+ "+\t__free_zspage(pool, class, zspage);\n"
+ " }\n"
+ " \n"
+ " /* Initialize a newly allocated zspage */\n"
+ "@@ -912,15 +1053,13 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)\n"
+ " {\n"
+ " \tunsigned int freeobj = 1;\n"
+ " \tunsigned long off = 0;\n"
+ "-\tstruct page *page = zspage->first_page;\n"
+ "+\tstruct page *page = get_first_page(zspage);\n"
+ " \n"
+ " \twhile (page) {\n"
+ " \t\tstruct page *next_page;\n"
+ " \t\tstruct link_free *link;\n"
+ " \t\tvoid *vaddr;\n"
+ " \n"
+ "-\t\tset_first_obj_offset(page, off);\n"
+ "-\n"
+ " \t\tvaddr = kmap_atomic(page);\n"
+ " \t\tlink = (struct link_free *)vaddr + off / sizeof(*link);\n"
+ " \n"
+ "@@ -952,16 +1091,17 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)\n"
+ " \tset_freeobj(zspage, 0);\n"
+ " }\n"
+ " \n"
+ "-static void create_page_chain(struct zspage *zspage, struct page *pages[],\n"
+ "-\t\t\t\tint nr_pages)\n"
+ "+static void create_page_chain(struct size_class *class, struct zspage *zspage,\n"
+ "+\t\t\t\tstruct page *pages[])\n"
+ " {\n"
+ " \tint i;\n"
+ " \tstruct page *page;\n"
+ " \tstruct page *prev_page = NULL;\n"
+ "+\tint nr_pages = class->pages_per_zspage;\n"
+ " \n"
+ " \t/*\n"
+ " \t * Allocate individual pages and link them together as:\n"
+ "-\t * 1. all pages are linked together using page->next\n"
+ "+\t * 1. all pages are linked together using page->freelist\n"
+ " \t * 2. each sub-page point to zspage using page->private\n"
+ " \t *\n"
+ " \t * we set PG_private to identify the first page (i.e. no other sub-page\n"
+ "@@ -970,16 +1110,18 @@ static void create_page_chain(struct zspage *zspage, struct page *pages[],\n"
+ " \tfor (i = 0; i < nr_pages; i++) {\n"
+ " \t\tpage = pages[i];\n"
+ " \t\tset_page_private(page, (unsigned long)zspage);\n"
+ "+\t\tpage->freelist = NULL;\n"
+ " \t\tif (i == 0) {\n"
+ " \t\t\tzspage->first_page = page;\n"
+ " \t\t\tSetPagePrivate(page);\n"
+ "+\t\t\tif (unlikely(class->objs_per_zspage == 1 &&\n"
+ "+\t\t\t\t\tclass->pages_per_zspage == 1))\n"
+ "+\t\t\t\tSetPageHugeObject(page);\n"
+ " \t\t} else {\n"
+ "-\t\t\tprev_page->next = page;\n"
+ "+\t\t\tprev_page->freelist = page;\n"
+ " \t\t}\n"
+ "-\t\tif (i == nr_pages - 1) {\n"
+ "+\t\tif (i == nr_pages - 1)\n"
+ " \t\t\tSetPagePrivate2(page);\n"
+ "-\t\t\tpage->next = NULL;\n"
+ "-\t\t}\n"
+ " \t\tprev_page = page;\n"
+ " \t}\n"
+ " }\n"
+ "@@ -999,6 +1141,8 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,\n"
+ " \t\treturn NULL;\n"
+ " \n"
+ " \tmemset(zspage, 0, sizeof(struct zspage));\n"
+ "+\tzspage->magic = ZSPAGE_MAGIC;\n"
+ "+\tmigrate_lock_init(zspage);\n"
+ " \n"
+ " \tfor (i = 0; i < class->pages_per_zspage; i++) {\n"
+ " \t\tstruct page *page;\n"
+ "@@ -1013,7 +1157,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,\n"
+ " \t\tpages[i] = page;\n"
+ " \t}\n"
+ " \n"
+ "-\tcreate_page_chain(zspage, pages, class->pages_per_zspage);\n"
+ "+\tcreate_page_chain(class, zspage, pages);\n"
+ " \tinit_zspage(class, zspage);\n"
+ " \n"
+ " \treturn zspage;\n"
+ "@@ -1024,7 +1168,7 @@ static struct zspage *find_get_zspage(struct size_class *class)\n"
+ " \tint i;\n"
+ " \tstruct zspage *zspage;\n"
+ " \n"
+ "-\tfor (i = ZS_ALMOST_FULL; i <= ZS_ALMOST_EMPTY; i++) {\n"
+ "+\tfor (i = ZS_ALMOST_FULL; i >= ZS_EMPTY; i--) {\n"
+ " \t\tzspage = list_first_entry_or_null(&class->fullness_list[i],\n"
+ " \t\t\t\tstruct zspage, list);\n"
+ " \t\tif (zspage)\n"
+ "@@ -1289,6 +1433,10 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,\n"
+ " \tobj = handle_to_obj(handle);\n"
+ " \tobj_to_location(obj, &page, &obj_idx);\n"
+ " \tzspage = get_zspage(page);\n"
+ "+\n"
+ "+\t/* migration cannot move any subpage in this zspage */\n"
+ "+\tmigrate_read_lock(zspage);\n"
+ "+\n"
+ " \tget_zspage_mapping(zspage, &class_idx, &fg);\n"
+ " \tclass = pool->size_class[class_idx];\n"
+ " \toff = (class->size * obj_idx) & ~PAGE_MASK;\n"
+ "@@ -1309,7 +1457,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,\n"
+ " \n"
+ " \tret = __zs_map_object(area, pages, off, class->size);\n"
+ " out:\n"
+ "-\tif (!class->huge)\n"
+ "+\tif (likely(!PageHugeObject(page)))\n"
+ " \t\tret += ZS_HANDLE_SIZE;\n"
+ " \n"
+ " \treturn ret;\n"
+ "@@ -1348,6 +1496,8 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)\n"
+ " \t\t__zs_unmap_object(area, pages, off, class->size);\n"
+ " \t}\n"
+ " \tput_cpu_var(zs_map_area);\n"
+ "+\n"
+ "+\tmigrate_read_unlock(zspage);\n"
+ " \tunpin_tag(handle);\n"
+ " }\n"
+ " EXPORT_SYMBOL_GPL(zs_unmap_object);\n"
+ "@@ -1377,7 +1527,7 @@ static unsigned long obj_malloc(struct size_class *class,\n"
+ " \tvaddr = kmap_atomic(m_page);\n"
+ " \tlink = (struct link_free *)vaddr + m_offset / sizeof(*link);\n"
+ " \tset_freeobj(zspage, link->next >> OBJ_ALLOCATED_TAG);\n"
+ "-\tif (!class->huge)\n"
+ "+\tif (likely(!PageHugeObject(m_page)))\n"
+ " \t\t/* record handle in the header of allocated chunk */\n"
+ " \t\tlink->handle = handle;\n"
+ " \telse\n"
+ "@@ -1407,6 +1557,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)\n"
+ " {\n"
+ " \tunsigned long handle, obj;\n"
+ " \tstruct size_class *class;\n"
+ "+\tenum fullness_group newfg;\n"
+ " \tstruct zspage *zspage;\n"
+ " \n"
+ " \tif (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))\n"
+ "@@ -1422,28 +1573,37 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)\n"
+ " \n"
+ " \tspin_lock(&class->lock);\n"
+ " \tzspage = find_get_zspage(class);\n"
+ "-\n"
+ "-\tif (!zspage) {\n"
+ "+\tif (likely(zspage)) {\n"
+ "+\t\tobj = obj_malloc(class, zspage, handle);\n"
+ "+\t\t/* Now move the zspage to another fullness group, if required */\n"
+ "+\t\tfix_fullness_group(class, zspage);\n"
+ "+\t\trecord_obj(handle, obj);\n"
+ " \t\tspin_unlock(&class->lock);\n"
+ "-\t\tzspage = alloc_zspage(pool, class, gfp);\n"
+ "-\t\tif (unlikely(!zspage)) {\n"
+ "-\t\t\tcache_free_handle(pool, handle);\n"
+ "-\t\t\treturn 0;\n"
+ "-\t\t}\n"
+ " \n"
+ "-\t\tset_zspage_mapping(zspage, class->index, ZS_EMPTY);\n"
+ "-\t\tatomic_long_add(class->pages_per_zspage,\n"
+ "-\t\t\t\t\t&pool->pages_allocated);\n"
+ "+\t\treturn handle;\n"
+ "+\t}\n"
+ " \n"
+ "-\t\tspin_lock(&class->lock);\n"
+ "-\t\tzs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(\n"
+ "-\t\t\t\tclass->size, class->pages_per_zspage));\n"
+ "+\tspin_unlock(&class->lock);\n"
+ "+\n"
+ "+\tzspage = alloc_zspage(pool, class, gfp);\n"
+ "+\tif (!zspage) {\n"
+ "+\t\tcache_free_handle(pool, handle);\n"
+ "+\t\treturn 0;\n"
+ " \t}\n"
+ " \n"
+ "+\tspin_lock(&class->lock);\n"
+ " \tobj = obj_malloc(class, zspage, handle);\n"
+ "-\t/* Now move the zspage to another fullness group, if required */\n"
+ "-\tfix_fullness_group(class, zspage);\n"
+ "+\tnewfg = get_fullness_group(class, zspage);\n"
+ "+\tinsert_zspage(class, zspage, newfg);\n"
+ "+\tset_zspage_mapping(zspage, class->index, newfg);\n"
+ " \trecord_obj(handle, obj);\n"
+ "+\tatomic_long_add(class->pages_per_zspage,\n"
+ "+\t\t\t\t&pool->pages_allocated);\n"
+ "+\tzs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(\n"
+ "+\t\t\tclass->size, class->pages_per_zspage));\n"
+ "+\n"
+ "+\t/* We completely set up zspage so mark them as movable */\n"
+ "+\tSetZsPageMovable(pool, zspage);\n"
+ " \tspin_unlock(&class->lock);\n"
+ " \n"
+ " \treturn handle;\n"
+ "@@ -1484,6 +1644,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)\n"
+ " \tint class_idx;\n"
+ " \tstruct size_class *class;\n"
+ " \tenum fullness_group fullness;\n"
+ "+\tbool isolated;\n"
+ " \n"
+ " \tif (unlikely(!handle))\n"
+ " \t\treturn;\n"
+ "@@ -1493,22 +1654,28 @@ void zs_free(struct zs_pool *pool, unsigned long handle)\n"
+ " \tobj_to_location(obj, &f_page, &f_objidx);\n"
+ " \tzspage = get_zspage(f_page);\n"
+ " \n"
+ "+\tmigrate_read_lock(zspage);\n"
+ "+\n"
+ " \tget_zspage_mapping(zspage, &class_idx, &fullness);\n"
+ " \tclass = pool->size_class[class_idx];\n"
+ " \n"
+ " \tspin_lock(&class->lock);\n"
+ " \tobj_free(class, obj);\n"
+ " \tfullness = fix_fullness_group(class, zspage);\n"
+ "-\tif (fullness == ZS_EMPTY) {\n"
+ "-\t\tzs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(\n"
+ "-\t\t\t\tclass->size, class->pages_per_zspage));\n"
+ "-\t\tatomic_long_sub(class->pages_per_zspage,\n"
+ "-\t\t\t\t&pool->pages_allocated);\n"
+ "-\t\tfree_zspage(pool, zspage);\n"
+ "+\tif (fullness != ZS_EMPTY) {\n"
+ "+\t\tmigrate_read_unlock(zspage);\n"
+ "+\t\tgoto out;\n"
+ " \t}\n"
+ "+\n"
+ "+\tisolated = is_zspage_isolated(zspage);\n"
+ "+\tmigrate_read_unlock(zspage);\n"
+ "+\t/* If zspage is isolated, zs_page_putback will free the zspage */\n"
+ "+\tif (likely(!isolated))\n"
+ "+\t\tfree_zspage(pool, class, zspage);\n"
+ "+out:\n"
+ "+\n"
+ " \tspin_unlock(&class->lock);\n"
+ " \tunpin_tag(handle);\n"
+ "-\n"
+ " \tcache_free_handle(pool, handle);\n"
+ " }\n"
+ " EXPORT_SYMBOL_GPL(zs_free);\n"
+ "@@ -1587,12 +1754,13 @@ static unsigned long find_alloced_obj(struct size_class *class,\n"
+ " \tint offset = 0;\n"
+ " \tunsigned long handle = 0;\n"
+ " \tvoid *addr = kmap_atomic(page);\n"
+ "+\tstruct zspage *zspage = get_zspage(page);\n"
+ " \n"
+ "-\toffset = get_first_obj_offset(page);\n"
+ "+\toffset = get_first_obj_offset(class, get_first_page(zspage), page);\n"
+ " \toffset += class->size * index;\n"
+ " \n"
+ " \twhile (offset < PAGE_SIZE) {\n"
+ "-\t\thead = obj_to_head(class, page, addr + offset);\n"
+ "+\t\thead = obj_to_head(page, addr + offset);\n"
+ " \t\tif (head & OBJ_ALLOCATED_TAG) {\n"
+ " \t\t\thandle = head & ~OBJ_ALLOCATED_TAG;\n"
+ " \t\t\tif (trypin_tag(handle))\n"
+ "@@ -1684,6 +1852,7 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source)\n"
+ " \t\tzspage = list_first_entry_or_null(&class->fullness_list[fg[i]],\n"
+ " \t\t\t\t\t\t\tstruct zspage, list);\n"
+ " \t\tif (zspage) {\n"
+ "+\t\t\tVM_BUG_ON(is_zspage_isolated(zspage));\n"
+ " \t\t\tremove_zspage(class, zspage, fg[i]);\n"
+ " \t\t\treturn zspage;\n"
+ " \t\t}\n"
+ "@@ -1704,6 +1873,8 @@ static enum fullness_group putback_zspage(struct size_class *class,\n"
+ " {\n"
+ " \tenum fullness_group fullness;\n"
+ " \n"
+ "+\tVM_BUG_ON(is_zspage_isolated(zspage));\n"
+ "+\n"
+ " \tfullness = get_fullness_group(class, zspage);\n"
+ " \tinsert_zspage(class, zspage, fullness);\n"
+ " \tset_zspage_mapping(zspage, class->index, fullness);\n"
+ "@@ -1711,6 +1882,377 @@ static enum fullness_group putback_zspage(struct size_class *class,\n"
+ " \treturn fullness;\n"
+ " }\n"
+ " \n"
+ "+#ifdef CONFIG_COMPACTION\n"
+ "+static struct dentry *zs_mount(struct file_system_type *fs_type,\n"
+ "+\t\t\t\tint flags, const char *dev_name, void *data)\n"
+ "+{\n"
+ "+\tstatic const struct dentry_operations ops = {\n"
+ "+\t\t.d_dname = simple_dname,\n"
+ "+\t};\n"
+ "+\n"
+ "+\treturn mount_pseudo(fs_type, \"zsmalloc:\", NULL, &ops, ZSMALLOC_MAGIC);\n"
+ "+}\n"
+ "+\n"
+ "+static struct file_system_type zsmalloc_fs = {\n"
+ "+\t.name\t\t= \"zsmalloc\",\n"
+ "+\t.mount\t\t= zs_mount,\n"
+ "+\t.kill_sb\t= kill_anon_super,\n"
+ "+};\n"
+ "+\n"
+ "+static int zsmalloc_mount(void)\n"
+ "+{\n"
+ "+\tint ret = 0;\n"
+ "+\n"
+ "+\tzsmalloc_mnt = kern_mount(&zsmalloc_fs);\n"
+ "+\tif (IS_ERR(zsmalloc_mnt))\n"
+ "+\t\tret = PTR_ERR(zsmalloc_mnt);\n"
+ "+\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+static void zsmalloc_unmount(void)\n"
+ "+{\n"
+ "+\tkern_unmount(zsmalloc_mnt);\n"
+ "+}\n"
+ "+\n"
+ "+static void migrate_lock_init(struct zspage *zspage)\n"
+ "+{\n"
+ "+\trwlock_init(&zspage->lock);\n"
+ "+}\n"
+ "+\n"
+ "+static void migrate_read_lock(struct zspage *zspage)\n"
+ "+{\n"
+ "+\tread_lock(&zspage->lock);\n"
+ "+}\n"
+ "+\n"
+ "+static void migrate_read_unlock(struct zspage *zspage)\n"
+ "+{\n"
+ "+\tread_unlock(&zspage->lock);\n"
+ "+}\n"
+ "+\n"
+ "+static void migrate_write_lock(struct zspage *zspage)\n"
+ "+{\n"
+ "+\twrite_lock(&zspage->lock);\n"
+ "+}\n"
+ "+\n"
+ "+static void migrate_write_unlock(struct zspage *zspage)\n"
+ "+{\n"
+ "+\twrite_unlock(&zspage->lock);\n"
+ "+}\n"
+ "+\n"
+ "+/* Number of isolated subpage for *page migration* in this zspage */\n"
+ "+static void inc_zspage_isolation(struct zspage *zspage)\n"
+ "+{\n"
+ "+\tzspage->isolated++;\n"
+ "+}\n"
+ "+\n"
+ "+static void dec_zspage_isolation(struct zspage *zspage)\n"
+ "+{\n"
+ "+\tzspage->isolated--;\n"
+ "+}\n"
+ "+\n"
+ "+static void replace_sub_page(struct size_class *class, struct zspage *zspage,\n"
+ "+\t\t\t\tstruct page *newpage, struct page *oldpage)\n"
+ "+{\n"
+ "+\tstruct page *page;\n"
+ "+\tstruct page *pages[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, };\n"
+ "+\tint idx = 0;\n"
+ "+\n"
+ "+\tpage = get_first_page(zspage);\n"
+ "+\tdo {\n"
+ "+\t\tif (page == oldpage)\n"
+ "+\t\t\tpages[idx] = newpage;\n"
+ "+\t\telse\n"
+ "+\t\t\tpages[idx] = page;\n"
+ "+\t\tidx++;\n"
+ "+\t} while ((page = get_next_page(page)) != NULL);\n"
+ "+\n"
+ "+\tcreate_page_chain(class, zspage, pages);\n"
+ "+\tif (unlikely(PageHugeObject(oldpage)))\n"
+ "+\t\tnewpage->index = oldpage->index;\n"
+ "+\t__SetPageMovable(newpage, page_mapping(oldpage));\n"
+ "+}\n"
+ "+\n"
+ "+bool zs_page_isolate(struct page *page, isolate_mode_t mode)\n"
+ "+{\n"
+ "+\tstruct zs_pool *pool;\n"
+ "+\tstruct size_class *class;\n"
+ "+\tint class_idx;\n"
+ "+\tenum fullness_group fullness;\n"
+ "+\tstruct zspage *zspage;\n"
+ "+\tstruct address_space *mapping;\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * Page is locked so zspage couldn't be destroyed. For detail, look at\n"
+ "+\t * lock_zspage in free_zspage.\n"
+ "+\t */\n"
+ "+\tVM_BUG_ON_PAGE(!PageMovable(page), page);\n"
+ "+\tVM_BUG_ON_PAGE(PageIsolated(page), page);\n"
+ "+\n"
+ "+\tzspage = get_zspage(page);\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * Without class lock, fullness could be stale while class_idx is okay\n"
+ "+\t * because class_idx is constant unless page is freed so we should get\n"
+ "+\t * fullness again under class lock.\n"
+ "+\t */\n"
+ "+\tget_zspage_mapping(zspage, &class_idx, &fullness);\n"
+ "+\tmapping = page_mapping(page);\n"
+ "+\tpool = mapping->private_data;\n"
+ "+\tclass = pool->size_class[class_idx];\n"
+ "+\n"
+ "+\tspin_lock(&class->lock);\n"
+ "+\tif (get_zspage_inuse(zspage) == 0) {\n"
+ "+\t\tspin_unlock(&class->lock);\n"
+ "+\t\treturn false;\n"
+ "+\t}\n"
+ "+\n"
+ "+\t/* zspage is isolated for object migration */\n"
+ "+\tif (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {\n"
+ "+\t\tspin_unlock(&class->lock);\n"
+ "+\t\treturn false;\n"
+ "+\t}\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * If this is first time isolation for the zspage, isolate zspage from\n"
+ "+\t * size_class to prevent further object allocation from the zspage.\n"
+ "+\t */\n"
+ "+\tif (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {\n"
+ "+\t\tget_zspage_mapping(zspage, &class_idx, &fullness);\n"
+ "+\t\tremove_zspage(class, zspage, fullness);\n"
+ "+\t}\n"
+ "+\n"
+ "+\tinc_zspage_isolation(zspage);\n"
+ "+\tspin_unlock(&class->lock);\n"
+ "+\n"
+ "+\treturn true;\n"
+ "+}\n"
+ "+\n"
+ "+int zs_page_migrate(struct address_space *mapping, struct page *newpage,\n"
+ "+\t\tstruct page *page, enum migrate_mode mode)\n"
+ "+{\n"
+ "+\tstruct zs_pool *pool;\n"
+ "+\tstruct size_class *class;\n"
+ "+\tint class_idx;\n"
+ "+\tenum fullness_group fullness;\n"
+ "+\tstruct zspage *zspage;\n"
+ "+\tstruct page *dummy;\n"
+ "+\tvoid *s_addr, *d_addr, *addr;\n"
+ "+\tint offset, pos;\n"
+ "+\tunsigned long handle, head;\n"
+ "+\tunsigned long old_obj, new_obj;\n"
+ "+\tunsigned int obj_idx;\n"
+ "+\tint ret = -EAGAIN;\n"
+ "+\n"
+ "+\tVM_BUG_ON_PAGE(!PageMovable(page), page);\n"
+ "+\tVM_BUG_ON_PAGE(!PageIsolated(page), page);\n"
+ "+\n"
+ "+\tzspage = get_zspage(page);\n"
+ "+\n"
+ "+\t/* Concurrent compactor cannot migrate any subpage in zspage */\n"
+ "+\tmigrate_write_lock(zspage);\n"
+ "+\tget_zspage_mapping(zspage, &class_idx, &fullness);\n"
+ "+\tpool = mapping->private_data;\n"
+ "+\tclass = pool->size_class[class_idx];\n"
+ "+\toffset = get_first_obj_offset(class, get_first_page(zspage), page);\n"
+ "+\n"
+ "+\tspin_lock(&class->lock);\n"
+ "+\tif (!get_zspage_inuse(zspage)) {\n"
+ "+\t\tret = -EBUSY;\n"
+ "+\t\tgoto unlock_class;\n"
+ "+\t}\n"
+ "+\n"
+ "+\tpos = offset;\n"
+ "+\ts_addr = kmap_atomic(page);\n"
+ "+\twhile (pos < PAGE_SIZE) {\n"
+ "+\t\thead = obj_to_head(page, s_addr + pos);\n"
+ "+\t\tif (head & OBJ_ALLOCATED_TAG) {\n"
+ "+\t\t\thandle = head & ~OBJ_ALLOCATED_TAG;\n"
+ "+\t\t\tif (!trypin_tag(handle))\n"
+ "+\t\t\t\tgoto unpin_objects;\n"
+ "+\t\t}\n"
+ "+\t\tpos += class->size;\n"
+ "+\t}\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * Here, any user cannot access all objects in the zspage so let's move.\n"
+ "+\t */\n"
+ "+\td_addr = kmap_atomic(newpage);\n"
+ "+\tmemcpy(d_addr, s_addr, PAGE_SIZE);\n"
+ "+\tkunmap_atomic(d_addr);\n"
+ "+\n"
+ "+\tfor (addr = s_addr + offset; addr < s_addr + pos;\n"
+ "+\t\t\t\t\taddr += class->size) {\n"
+ "+\t\thead = obj_to_head(page, addr);\n"
+ "+\t\tif (head & OBJ_ALLOCATED_TAG) {\n"
+ "+\t\t\thandle = head & ~OBJ_ALLOCATED_TAG;\n"
+ "+\t\t\tif (!testpin_tag(handle))\n"
+ "+\t\t\t\tBUG();\n"
+ "+\n"
+ "+\t\t\told_obj = handle_to_obj(handle);\n"
+ "+\t\t\tobj_to_location(old_obj, &dummy, &obj_idx);\n"
+ "+\t\t\tnew_obj = (unsigned long)location_to_obj(newpage,\n"
+ "+\t\t\t\t\t\t\t\tobj_idx);\n"
+ "+\t\t\tnew_obj |= BIT(HANDLE_PIN_BIT);\n"
+ "+\t\t\trecord_obj(handle, new_obj);\n"
+ "+\t\t}\n"
+ "+\t}\n"
+ "+\n"
+ "+\treplace_sub_page(class, zspage, newpage, page);\n"
+ "+\tget_page(newpage);\n"
+ "+\n"
+ "+\tdec_zspage_isolation(zspage);\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * Page migration is done so let's putback isolated zspage to\n"
+ "+\t * the list if @page is final isolated subpage in the zspage.\n"
+ "+\t */\n"
+ "+\tif (!is_zspage_isolated(zspage))\n"
+ "+\t\tputback_zspage(class, zspage);\n"
+ "+\n"
+ "+\treset_page(page);\n"
+ "+\tput_page(page);\n"
+ "+\tpage = newpage;\n"
+ "+\n"
+ "+\tret = 0;\n"
+ "+unpin_objects:\n"
+ "+\tfor (addr = s_addr + offset; addr < s_addr + pos;\n"
+ "+\t\t\t\t\t\taddr += class->size) {\n"
+ "+\t\thead = obj_to_head(page, addr);\n"
+ "+\t\tif (head & OBJ_ALLOCATED_TAG) {\n"
+ "+\t\t\thandle = head & ~OBJ_ALLOCATED_TAG;\n"
+ "+\t\t\tif (!testpin_tag(handle))\n"
+ "+\t\t\t\tBUG();\n"
+ "+\t\t\tunpin_tag(handle);\n"
+ "+\t\t}\n"
+ "+\t}\n"
+ "+\tkunmap_atomic(s_addr);\n"
+ "+unlock_class:\n"
+ "+\tspin_unlock(&class->lock);\n"
+ "+\tmigrate_write_unlock(zspage);\n"
+ "+\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+void zs_page_putback(struct page *page)\n"
+ "+{\n"
+ "+\tstruct zs_pool *pool;\n"
+ "+\tstruct size_class *class;\n"
+ "+\tint class_idx;\n"
+ "+\tenum fullness_group fg;\n"
+ "+\tstruct address_space *mapping;\n"
+ "+\tstruct zspage *zspage;\n"
+ "+\n"
+ "+\tVM_BUG_ON_PAGE(!PageMovable(page), page);\n"
+ "+\tVM_BUG_ON_PAGE(!PageIsolated(page), page);\n"
+ "+\n"
+ "+\tzspage = get_zspage(page);\n"
+ "+\tget_zspage_mapping(zspage, &class_idx, &fg);\n"
+ "+\tmapping = page_mapping(page);\n"
+ "+\tpool = mapping->private_data;\n"
+ "+\tclass = pool->size_class[class_idx];\n"
+ "+\n"
+ "+\tspin_lock(&class->lock);\n"
+ "+\tdec_zspage_isolation(zspage);\n"
+ "+\tif (!is_zspage_isolated(zspage)) {\n"
+ "+\t\tfg = putback_zspage(class, zspage);\n"
+ "+\t\t/*\n"
+ "+\t\t * Due to page_lock, we cannot free zspage immediately\n"
+ "+\t\t * so let's defer.\n"
+ "+\t\t */\n"
+ "+\t\tif (fg == ZS_EMPTY)\n"
+ "+\t\t\tschedule_work(&pool->free_work);\n"
+ "+\t}\n"
+ "+\tspin_unlock(&class->lock);\n"
+ "+}\n"
+ "+\n"
+ "+const struct address_space_operations zsmalloc_aops = {\n"
+ "+\t.isolate_page = zs_page_isolate,\n"
+ "+\t.migratepage = zs_page_migrate,\n"
+ "+\t.putback_page = zs_page_putback,\n"
+ "+};\n"
+ "+\n"
+ "+static int zs_register_migration(struct zs_pool *pool)\n"
+ "+{\n"
+ "+\tpool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb);\n"
+ "+\tif (IS_ERR(pool->inode)) {\n"
+ "+\t\tpool->inode = NULL;\n"
+ "+\t\treturn 1;\n"
+ "+\t}\n"
+ "+\n"
+ "+\tpool->inode->i_mapping->private_data = pool;\n"
+ "+\tpool->inode->i_mapping->a_ops = &zsmalloc_aops;\n"
+ "+\treturn 0;\n"
+ "+}\n"
+ "+\n"
+ "+static void zs_unregister_migration(struct zs_pool *pool)\n"
+ "+{\n"
+ "+\tflush_work(&pool->free_work);\n"
+ "+\tif (pool->inode)\n"
+ "+\t\tiput(pool->inode);\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * Caller should hold page_lock of all pages in the zspage\n"
+ "+ * In here, we cannot use zspage meta data.\n"
+ "+ */\n"
+ "+static void async_free_zspage(struct work_struct *work)\n"
+ "+{\n"
+ "+\tint i;\n"
+ "+\tstruct size_class *class;\n"
+ "+\tunsigned int class_idx;\n"
+ "+\tenum fullness_group fullness;\n"
+ "+\tstruct zspage *zspage, *tmp;\n"
+ "+\tLIST_HEAD(free_pages);\n"
+ "+\tstruct zs_pool *pool = container_of(work, struct zs_pool,\n"
+ "+\t\t\t\t\tfree_work);\n"
+ "+\n"
+ "+\tfor (i = 0; i < zs_size_classes; i++) {\n"
+ "+\t\tclass = pool->size_class[i];\n"
+ "+\t\tif (class->index != i)\n"
+ "+\t\t\tcontinue;\n"
+ "+\n"
+ "+\t\tspin_lock(&class->lock);\n"
+ "+\t\tlist_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);\n"
+ "+\t\tspin_unlock(&class->lock);\n"
+ "+\t}\n"
+ "+\n"
+ "+\n"
+ "+\tlist_for_each_entry_safe(zspage, tmp, &free_pages, list) {\n"
+ "+\t\tlist_del(&zspage->list);\n"
+ "+\t\tlock_zspage(zspage);\n"
+ "+\n"
+ "+\t\tget_zspage_mapping(zspage, &class_idx, &fullness);\n"
+ "+\t\tVM_BUG_ON(fullness != ZS_EMPTY);\n"
+ "+\t\tclass = pool->size_class[class_idx];\n"
+ "+\t\tspin_lock(&class->lock);\n"
+ "+\t\t__free_zspage(pool, pool->size_class[class_idx], zspage);\n"
+ "+\t\tspin_unlock(&class->lock);\n"
+ "+\t}\n"
+ "+};\n"
+ "+\n"
+ "+static void kick_deferred_free(struct zs_pool *pool)\n"
+ "+{\n"
+ "+\tschedule_work(&pool->free_work);\n"
+ "+}\n"
+ "+\n"
+ "+static void init_deferred_free(struct zs_pool *pool)\n"
+ "+{\n"
+ "+\tINIT_WORK(&pool->free_work, async_free_zspage);\n"
+ "+}\n"
+ "+\n"
+ "+static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage)\n"
+ "+{\n"
+ "+\tstruct page *page = get_first_page(zspage);\n"
+ "+\n"
+ "+\tdo {\n"
+ "+\t\tWARN_ON(!trylock_page(page));\n"
+ "+\t\t__SetPageMovable(page, pool->inode->i_mapping);\n"
+ "+\t\tunlock_page(page);\n"
+ "+\t} while ((page = get_next_page(page)) != NULL);\n"
+ "+}\n"
+ "+#endif\n"
+ "+\n"
+ " /*\n"
+ "  *\n"
+ "  * Based on the number of unused allocated objects calculate\n"
+ "@@ -1745,10 +2287,10 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)\n"
+ " \t\t\tbreak;\n"
+ " \n"
+ " \t\tcc.index = 0;\n"
+ "-\t\tcc.s_page = src_zspage->first_page;\n"
+ "+\t\tcc.s_page = get_first_page(src_zspage);\n"
+ " \n"
+ " \t\twhile ((dst_zspage = isolate_zspage(class, false))) {\n"
+ "-\t\t\tcc.d_page = dst_zspage->first_page;\n"
+ "+\t\t\tcc.d_page = get_first_page(dst_zspage);\n"
+ " \t\t\t/*\n"
+ " \t\t\t * If there is no more space in dst_page, resched\n"
+ " \t\t\t * and see if anyone had allocated another zspage.\n"
+ "@@ -1765,11 +2307,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)\n"
+ " \n"
+ " \t\tputback_zspage(class, dst_zspage);\n"
+ " \t\tif (putback_zspage(class, src_zspage) == ZS_EMPTY) {\n"
+ "-\t\t\tzs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(\n"
+ "-\t\t\t\t\tclass->size, class->pages_per_zspage));\n"
+ "-\t\t\tatomic_long_sub(class->pages_per_zspage,\n"
+ "-\t\t\t\t\t&pool->pages_allocated);\n"
+ "-\t\t\tfree_zspage(pool, src_zspage);\n"
+ "+\t\t\tfree_zspage(pool, class, src_zspage);\n"
+ " \t\t\tpool->stats.pages_compacted += class->pages_per_zspage;\n"
+ " \t\t}\n"
+ " \t\tspin_unlock(&class->lock);\n"
+ "@@ -1885,6 +2423,7 @@ struct zs_pool *zs_create_pool(const char *name)\n"
+ " \tif (!pool)\n"
+ " \t\treturn NULL;\n"
+ " \n"
+ "+\tinit_deferred_free(pool);\n"
+ " \tpool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),\n"
+ " \t\t\tGFP_KERNEL);\n"
+ " \tif (!pool->size_class) {\n"
+ "@@ -1939,12 +2478,10 @@ struct zs_pool *zs_create_pool(const char *name)\n"
+ " \t\tclass->pages_per_zspage = pages_per_zspage;\n"
+ " \t\tclass->objs_per_zspage = class->pages_per_zspage *\n"
+ " \t\t\t\t\t\tPAGE_SIZE / class->size;\n"
+ "-\t\tif (pages_per_zspage == 1 && class->objs_per_zspage == 1)\n"
+ "-\t\t\tclass->huge = true;\n"
+ " \t\tspin_lock_init(&class->lock);\n"
+ " \t\tpool->size_class[i] = class;\n"
+ "-\t\tfor (fullness = ZS_ALMOST_FULL; fullness <= ZS_ALMOST_EMPTY;\n"
+ "-\t\t\t\t\t\t\t\tfullness++)\n"
+ "+\t\tfor (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;\n"
+ "+\t\t\t\t\t\t\tfullness++)\n"
+ " \t\t\tINIT_LIST_HEAD(&class->fullness_list[fullness]);\n"
+ " \n"
+ " \t\tprev_class = class;\n"
+ "@@ -1953,6 +2490,9 @@ struct zs_pool *zs_create_pool(const char *name)\n"
+ " \t/* debug only, don't abort if it fails */\n"
+ " \tzs_pool_stat_create(pool, name);\n"
+ " \n"
+ "+\tif (zs_register_migration(pool))\n"
+ "+\t\tgoto err;\n"
+ "+\n"
+ " \t/*\n"
+ " \t * Not critical, we still can use the pool\n"
+ " \t * and user can trigger compaction manually.\n"
+ "@@ -1972,6 +2512,7 @@ void zs_destroy_pool(struct zs_pool *pool)\n"
+ " \tint i;\n"
+ " \n"
+ " \tzs_unregister_shrinker(pool);\n"
+ "+\tzs_unregister_migration(pool);\n"
+ " \tzs_pool_stat_destroy(pool);\n"
+ " \n"
+ " \tfor (i = 0; i < zs_size_classes; i++) {\n"
+ "@@ -1984,7 +2525,7 @@ void zs_destroy_pool(struct zs_pool *pool)\n"
+ " \t\tif (class->index != i)\n"
+ " \t\t\tcontinue;\n"
+ " \n"
+ "-\t\tfor (fg = ZS_ALMOST_FULL; fg <= ZS_ALMOST_EMPTY; fg++) {\n"
+ "+\t\tfor (fg = ZS_EMPTY; fg < NR_ZS_FULLNESS; fg++) {\n"
+ " \t\t\tif (!list_empty(&class->fullness_list[fg])) {\n"
+ " \t\t\t\tpr_info(\"Freeing non-empty class with size %db, fullness group %d\\n\",\n"
+ " \t\t\t\t\tclass->size, fg);\n"
+ "@@ -2002,7 +2543,13 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);\n"
+ " \n"
+ " static int __init zs_init(void)\n"
+ " {\n"
+ "-\tint ret = zs_register_cpu_notifier();\n"
+ "+\tint ret;\n"
+ "+\n"
+ "+\tret = zsmalloc_mount();\n"
+ "+\tif (ret)\n"
+ "+\t\tgoto out;\n"
+ "+\n"
+ "+\tret = zs_register_cpu_notifier();\n"
+ " \n"
+ " \tif (ret)\n"
+ " \t\tgoto notifier_fail;\n"
+ "@@ -2019,7 +2566,8 @@ static int __init zs_init(void)\n"
+ " \n"
+ " notifier_fail:\n"
+ " \tzs_unregister_cpu_notifier();\n"
+ "-\n"
+ "+\tzsmalloc_unmount();\n"
+ "+out:\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "@@ -2028,6 +2576,7 @@ static void __exit zs_exit(void)\n"
+ " #ifdef CONFIG_ZPOOL\n"
+ " \tzpool_unregister_driver(&zs_zpool_driver);\n"
+ " #endif\n"
+ "+\tzsmalloc_unmount();\n"
+ " \tzs_unregister_cpu_notifier();\n"
+ " \n"
+ " \tzs_stat_exit();\n"
+ "-- \n"
+ 1.9.1
 
-a1c2acab743285edd666efc0d67fbd835126a7da73632a7c5eac03d9c209f0dc
+6e6939e85ab37f8514cf4fca36b16778ab9643b1aada135fc6cbf491c610aaa9

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.