linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv4 0/3] z3fold: add shrinker
@ 2016-10-13 16:47 Vitaly Wool
  2016-10-13 16:49 ` [PATCH 1/3] z3fold: make counters atomic Vitaly Wool
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Vitaly Wool @ 2016-10-13 16:47 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton, Dave Chinner


This patch set implements shrinker for z3fold. The actual shrinker
implementation will follow some code optimizations and preparations
that I thought would be reasonable to have as separate patches.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] z3fold: make counters atomic
  2016-10-13 16:47 [PATCHv4 0/3] z3fold: add shrinker Vitaly Wool
@ 2016-10-13 16:49 ` Vitaly Wool
  2016-10-13 16:50 ` [PATCH 2/3] z3fold: remove redundant locking Vitaly Wool
  2016-10-13 16:52 ` [PATCH 3/3] z3fold: add shrinker Vitaly Wool
  2 siblings, 0 replies; 7+ messages in thread
From: Vitaly Wool @ 2016-10-13 16:49 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton, Dave Chinner

This patch converts pages_nr per-pool counter to atomic64_t.
It also introduces a new counter, unbuddied_nr, which is also
atomic64_t, to track the number of unbuddied (shrinkable) pages,
as a step to prepare for z3fold shrinker implementation.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8f9e89c..5197d7b 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -69,6 +69,7 @@ struct z3fold_ops {
  * @lru:	list tracking the z3fold pages in LRU order by most recently
  *		added buddy.
  * @pages_nr:	number of z3fold pages in the pool.
+ * @unbuddied_nr:	number of unbuddied z3fold pages in the pool.
  * @ops:	pointer to a structure of user defined operations specified at
  *		pool creation time.
  *
@@ -80,7 +81,8 @@ struct z3fold_pool {
 	struct list_head unbuddied[NCHUNKS];
 	struct list_head buddied;
 	struct list_head lru;
-	u64 pages_nr;
+	atomic64_t pages_nr;
+	atomic64_t unbuddied_nr;
 	const struct z3fold_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
@@ -234,7 +236,8 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
 	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
-	pool->pages_nr = 0;
+	atomic64_set(&pool->pages_nr, 0);
+	atomic64_set(&pool->unbuddied_nr, 0);
 	pool->ops = ops;
 	return pool;
 }
@@ -334,6 +337,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 					continue;
 				}
 				list_del(&zhdr->buddy);
+				atomic64_dec(&pool->unbuddied_nr);
 				goto found;
 			}
 		}
@@ -346,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 	if (!page)
 		return -ENOMEM;
 	spin_lock(&pool->lock);
-	pool->pages_nr++;
+	atomic64_inc(&pool->pages_nr);
 	zhdr = init_z3fold_page(page);
 
 	if (bud == HEADLESS) {
@@ -369,6 +373,7 @@ found:
 		/* Add to unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		atomic64_inc(&pool->unbuddied_nr);
 	} else {
 		/* Add to buddied list */
 		list_add(&zhdr->buddy, &pool->buddied);
@@ -412,6 +417,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		/* HEADLESS page stored */
 		bud = HEADLESS;
 	} else {
+		if (zhdr->first_chunks == 0 ||
+		    zhdr->middle_chunks == 0 ||
+		    zhdr->last_chunks == 0)
+			atomic64_dec(&pool->unbuddied_nr);
+
 		bud = handle_to_buddy(handle);
 
 		switch (bud) {
@@ -429,6 +439,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 			pr_err("%s: unknown bud %d\n", __func__, bud);
 			WARN_ON(1);
 			spin_unlock(&pool->lock);
+			atomic64_inc(&pool->unbuddied_nr);
 			return;
 		}
 	}
@@ -451,12 +462,13 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		list_del(&page->lru);
 		clear_bit(PAGE_HEADLESS, &page->private);
 		free_z3fold_page(zhdr);
-		pool->pages_nr--;
+		atomic64_dec(&pool->pages_nr);
 	} else {
 		z3fold_compact_page(zhdr);
 		/* Add to the unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		atomic64_inc(&pool->unbuddied_nr);
 	}
 
 	spin_unlock(&pool->lock);
@@ -520,6 +532,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		zhdr = page_address(page);
 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			list_del(&zhdr->buddy);
+			if (zhdr->first_chunks == 0 ||
+			    zhdr->middle_chunks == 0 ||
+			    zhdr->last_chunks == 0)
+				atomic64_dec(&pool->unbuddied_nr);
+
 			/*
 			 * We need encode the handles before unlocking, since
 			 * we can race with free that will set
@@ -569,7 +586,7 @@ next:
 			 */
 			clear_bit(PAGE_HEADLESS, &page->private);
 			free_z3fold_page(zhdr);
-			pool->pages_nr--;
+			atomic64_dec(&pool->pages_nr);
 			spin_unlock(&pool->lock);
 			return 0;
 		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
@@ -584,6 +601,7 @@ next:
 				freechunks = num_free_chunks(zhdr);
 				list_add(&zhdr->buddy,
 					 &pool->unbuddied[freechunks]);
+				atomic64_inc(&pool->unbuddied_nr);
 			}
 		}
 
@@ -672,12 +690,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
  * z3fold_get_pool_size() - gets the z3fold pool size in pages
  * @pool:	pool whose size is being queried
  *
- * Returns: size in pages of the given pool.  The pool lock need not be
- * taken to access pages_nr.
+ * Returns: size in pages of the given pool.
  */
 static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
 {
-	return pool->pages_nr;
+	return atomic64_read(&pool->pages_nr);
 }
 
 /*****************
-- 
2.4.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] z3fold: remove redundant locking
  2016-10-13 16:47 [PATCHv4 0/3] z3fold: add shrinker Vitaly Wool
  2016-10-13 16:49 ` [PATCH 1/3] z3fold: make counters atomic Vitaly Wool
@ 2016-10-13 16:50 ` Vitaly Wool
  2016-10-13 16:52 ` [PATCH 3/3] z3fold: add shrinker Vitaly Wool
  2 siblings, 0 replies; 7+ messages in thread
From: Vitaly Wool @ 2016-10-13 16:50 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton, Dave Chinner

The per-pool z3fold spinlock should generally be taken only when
a non-atomic pool variable is modified. There's no need to take it
to map/unmap an object.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 5197d7b..10513b5 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -580,6 +580,7 @@ next:
 		if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
 		    (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
 		     zhdr->middle_chunks == 0)) {
+			spin_unlock(&pool->lock);
 			/*
 			 * All buddies are now free, free the z3fold page and
 			 * return success.
@@ -587,7 +588,6 @@ next:
 			clear_bit(PAGE_HEADLESS, &page->private);
 			free_z3fold_page(zhdr);
 			atomic64_dec(&pool->pages_nr);
-			spin_unlock(&pool->lock);
 			return 0;
 		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			if (zhdr->first_chunks != 0 &&
@@ -629,7 +629,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 	void *addr;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	addr = zhdr;
 	page = virt_to_page(zhdr);
@@ -656,7 +655,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 		break;
 	}
 out:
-	spin_unlock(&pool->lock);
 	return addr;
 }
 
@@ -671,19 +669,14 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
 	struct page *page;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	page = virt_to_page(zhdr);
 
-	if (test_bit(PAGE_HEADLESS, &page->private)) {
-		spin_unlock(&pool->lock);
-		return;
+	if (!test_bit(PAGE_HEADLESS, &page->private)) {
+		buddy = handle_to_buddy(handle);
+		if (buddy == MIDDLE)
+			clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 	}
-
-	buddy = handle_to_buddy(handle);
-	if (buddy == MIDDLE)
-		clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
-	spin_unlock(&pool->lock);
 }
 
 /**
-- 
2.4.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] z3fold: add shrinker
  2016-10-13 16:47 [PATCHv4 0/3] z3fold: add shrinker Vitaly Wool
  2016-10-13 16:49 ` [PATCH 1/3] z3fold: make counters atomic Vitaly Wool
  2016-10-13 16:50 ` [PATCH 2/3] z3fold: remove redundant locking Vitaly Wool
@ 2016-10-13 16:52 ` Vitaly Wool
  2 siblings, 0 replies; 7+ messages in thread
From: Vitaly Wool @ 2016-10-13 16:52 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton, Dave Chinner

This patch implements shrinker for z3fold. This shrinker
implementation does not free up any pages directly but it allows
for a denser placement of compressed objects which results in
less actual pages consumed and higher compression ratio therefore.

This patch has been checked with the latest Linus's tree.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 111 insertions(+), 25 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 10513b5..0b2a0d3 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/preempt.h>
+#include <linux/shrinker.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/zpool.h>
@@ -72,6 +73,7 @@ struct z3fold_ops {
  * @unbuddied_nr:	number of unbuddied z3fold pages in the pool.
  * @ops:	pointer to a structure of user defined operations specified at
  *		pool creation time.
+ * @shrinker:	shrinker structure to optimize page layout in background
  *
  * This structure is allocated at pool creation time and maintains metadata
  * pertaining to a particular z3fold pool.
@@ -86,6 +88,7 @@ struct z3fold_pool {
 	const struct z3fold_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
+	struct shrinker shrinker;
 };
 
 enum buddy {
@@ -136,6 +139,9 @@ static int size_to_chunks(size_t size)
 #define for_each_unbuddied_list(_iter, _begin) \
 	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
 
+#define for_each_unbuddied_list_down(_iter, _end) \
+	for ((_iter) = (_end); (_iter) > 0; (_iter)--)
+
 /* Initializes the z3fold header of a newly allocated z3fold page */
 static struct z3fold_header *init_z3fold_page(struct page *page)
 {
@@ -211,6 +217,96 @@ static int num_free_chunks(struct z3fold_header *zhdr)
 	return nfree;
 }
 
+/* Has to be called with lock held */
+static int z3fold_compact_page(struct z3fold_header *zhdr, bool sync)
+{
+	struct page *page = virt_to_page(zhdr);
+	void *beg = zhdr;
+
+
+	if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) {
+		if (zhdr->middle_chunks != 0 &&
+		    zhdr->first_chunks == 0 &&
+		    zhdr->last_chunks == 0) {
+			memmove(beg + ZHDR_SIZE_ALIGNED,
+				beg + (zhdr->start_middle << CHUNK_SHIFT),
+				zhdr->middle_chunks << CHUNK_SHIFT);
+			zhdr->first_chunks = zhdr->middle_chunks;
+			zhdr->middle_chunks = 0;
+			zhdr->start_middle = 0;
+			zhdr->first_num++;
+			return 1;
+		}
+		if (sync)
+			goto out;
+
+		/* moving data is expensive, so let's only do that if
+		 * there's substantial gain (2+ chunks)
+		 */
+		if (zhdr->middle_chunks != 0 && zhdr->first_chunks != 0 &&
+		    zhdr->last_chunks == 0 &&
+		    zhdr->start_middle > zhdr->first_chunks + 2) {
+			unsigned short new_start = zhdr->first_chunks + 1;
+			memmove(beg + (new_start << CHUNK_SHIFT),
+				beg + (zhdr->start_middle << CHUNK_SHIFT),
+				zhdr->middle_chunks << CHUNK_SHIFT);
+			zhdr->start_middle = new_start;
+			return 1;
+		}
+		if (zhdr->middle_chunks != 0 && zhdr->last_chunks != 0 &&
+		    zhdr->first_chunks == 0 &&
+		    zhdr->middle_chunks + zhdr->last_chunks <=
+		    NCHUNKS - zhdr->start_middle - 2) {
+			unsigned short new_start = NCHUNKS - zhdr->last_chunks -
+				zhdr->middle_chunks;
+			memmove(beg + (new_start << CHUNK_SHIFT),
+				beg + (zhdr->start_middle << CHUNK_SHIFT),
+				zhdr->middle_chunks << CHUNK_SHIFT);
+			zhdr->start_middle = new_start;
+			return 1;
+		}
+	}
+out:
+	return 0;
+}
+
+static unsigned long z3fold_shrink_count(struct shrinker *shrink,
+				struct shrink_control *sc)
+{
+	struct z3fold_pool *pool = container_of(shrink, struct z3fold_pool,
+						shrinker);
+
+	return atomic64_read(&pool->unbuddied_nr);
+}
+
+static unsigned long z3fold_shrink_scan(struct shrinker *shrink,
+				struct shrink_control *sc)
+{
+	struct z3fold_pool *pool = container_of(shrink, struct z3fold_pool,
+						shrinker);
+	struct z3fold_header *zhdr;
+	int i, nr_to_scan = sc->nr_to_scan, nr_shrunk = 0;
+
+	spin_lock(&pool->lock);
+	for_each_unbuddied_list_down(i, NCHUNKS - 3) {
+		if (!list_empty(&pool->unbuddied[i])) {
+			zhdr = list_first_entry(&pool->unbuddied[i],
+						struct z3fold_header, buddy);
+			list_del(&zhdr->buddy);
+			spin_unlock(&pool->lock);
+			nr_shrunk += z3fold_compact_page(zhdr, false);
+			spin_lock(&pool->lock);
+			list_add(&zhdr->buddy,
+				&pool->unbuddied[num_free_chunks(zhdr)]);
+			if (!--nr_to_scan)
+				break;
+		}
+	}
+	spin_unlock(&pool->lock);
+	return nr_shrunk;
+}
+
+
 /*****************
  * API Functions
 *****************/
@@ -230,16 +326,27 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 
 	pool = kzalloc(sizeof(struct z3fold_pool), gfp);
 	if (!pool)
-		return NULL;
+		goto out;
 	spin_lock_init(&pool->lock);
 	for_each_unbuddied_list(i, 0)
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
 	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
+	pool->shrinker.count_objects = z3fold_shrink_count;
+	pool->shrinker.scan_objects = z3fold_shrink_scan;
+	pool->shrinker.seeks = DEFAULT_SEEKS;
+	pool->shrinker.batch = NCHUNKS - 4;
+	if (register_shrinker(&pool->shrinker))
+		goto out_free;
 	atomic64_set(&pool->pages_nr, 0);
 	atomic64_set(&pool->unbuddied_nr, 0);
 	pool->ops = ops;
 	return pool;
+
+out_free:
+	kfree(pool);
+out:
+	return NULL;
 }
 
 /**
@@ -250,31 +357,10 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
  */
 static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
+	unregister_shrinker(&pool->shrinker);
 	kfree(pool);
 }
 
-/* Has to be called with lock held */
-static int z3fold_compact_page(struct z3fold_header *zhdr)
-{
-	struct page *page = virt_to_page(zhdr);
-	void *beg = zhdr;
-
-
-	if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
-	    zhdr->middle_chunks != 0 &&
-	    zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
-		memmove(beg + ZHDR_SIZE_ALIGNED,
-			beg + (zhdr->start_middle << CHUNK_SHIFT),
-			zhdr->middle_chunks << CHUNK_SHIFT);
-		zhdr->first_chunks = zhdr->middle_chunks;
-		zhdr->middle_chunks = 0;
-		zhdr->start_middle = 0;
-		zhdr->first_num++;
-		return 1;
-	}
-	return 0;
-}
-
 /**
  * z3fold_alloc() - allocates a region of a given size
  * @pool:	z3fold pool from which to allocate
@@ -464,7 +550,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		free_z3fold_page(zhdr);
 		atomic64_dec(&pool->pages_nr);
 	} else {
-		z3fold_compact_page(zhdr);
+		z3fold_compact_page(zhdr, true);
 		/* Add to the unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
@@ -596,7 +682,7 @@ next:
 				/* Full, add to buddied list */
 				list_add(&zhdr->buddy, &pool->buddied);
 			} else {
-				z3fold_compact_page(zhdr);
+				z3fold_compact_page(zhdr, true);
 				/* add to unbuddied list */
 				freechunks = num_free_chunks(zhdr);
 				list_add(&zhdr->buddy,
-- 
2.4.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] z3fold: remove redundant locking
  2016-10-19 16:33 [PATCH 0/3] z3fold: background page compaction Vitaly Wool
@ 2016-10-19 16:35 ` Vitaly Wool
  2016-10-20 20:15   ` Dan Streetman
  0 siblings, 1 reply; 7+ messages in thread
From: Vitaly Wool @ 2016-10-19 16:35 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

The per-pool z3fold spinlock should generally be taken only when
a non-atomic pool variable is modified. There's no need to take it
to map/unmap an object. This patch introduces per-page lock that
will be used instead to protect per-page variables in map/unmap
functions.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 65 ++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 5ac325a..329bc26 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -104,6 +104,7 @@ enum buddy {
  * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
  * @last_chunks:	the size of the last buddy in chunks, 0 if free
  * @first_num:		the starting number (for the first handle)
+ * @page_lock:		per-page lock
  */
 struct z3fold_header {
 	struct list_head buddy;
@@ -112,6 +113,7 @@ struct z3fold_header {
 	unsigned short last_chunks;
 	unsigned short start_middle;
 	unsigned short first_num:NCHUNKS_ORDER;
+	raw_spinlock_t page_lock;
 };
 
 /*
@@ -152,6 +154,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 	zhdr->first_num = 0;
 	zhdr->start_middle = 0;
 	INIT_LIST_HEAD(&zhdr->buddy);
+	raw_spin_lock_init(&zhdr->page_lock);
 	return zhdr;
 }
 
@@ -163,15 +166,17 @@ static void free_z3fold_page(struct z3fold_header *zhdr)
 
 /*
  * Encodes the handle of a particular buddy within a z3fold page
- * Pool lock should be held as this function accesses first_num
  */
 static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
 {
 	unsigned long handle;
 
 	handle = (unsigned long)zhdr;
-	if (bud != HEADLESS)
+	if (bud != HEADLESS) {
+		raw_spin_lock(&zhdr->page_lock);
 		handle += (bud + zhdr->first_num) & BUDDY_MASK;
+		raw_spin_unlock(&zhdr->page_lock);
+	}
 	return handle;
 }
 
@@ -181,7 +186,10 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
 	return (struct z3fold_header *)(handle & PAGE_MASK);
 }
 
-/* Returns buddy number */
+/*
+ * Returns buddy number.
+ * NB: can't be used with HEADLESS pages.
+ */
 static enum buddy handle_to_buddy(unsigned long handle)
 {
 	struct z3fold_header *zhdr = handle_to_z3fold_header(handle);
@@ -253,7 +261,6 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
 	kfree(pool);
 }
 
-/* Has to be called with lock held */
 static int z3fold_compact_page(struct z3fold_header *zhdr)
 {
 	struct page *page = virt_to_page(zhdr);
@@ -263,6 +270,7 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
 	if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
 	    zhdr->middle_chunks != 0 &&
 	    zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+		raw_spin_lock(&zhdr->page_lock);
 		memmove(beg + ZHDR_SIZE_ALIGNED,
 			beg + (zhdr->start_middle << CHUNK_SHIFT),
 			zhdr->middle_chunks << CHUNK_SHIFT);
@@ -270,6 +278,7 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
 		zhdr->middle_chunks = 0;
 		zhdr->start_middle = 0;
 		zhdr->first_num++;
+		raw_spin_unlock(&zhdr->page_lock);
 		return 1;
 	}
 	return 0;
@@ -385,9 +394,9 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		list_del(&page->lru);
 
 	list_add(&page->lru, &pool->lru);
+	spin_unlock(&pool->lock);
 
 	*handle = encode_handle(zhdr, bud);
-	spin_unlock(&pool->lock);
 
 	return 0;
 }
@@ -409,15 +418,18 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 	struct page *page;
 	enum buddy bud;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	page = virt_to_page(zhdr);
 
 	if (test_bit(PAGE_HEADLESS, &page->private)) {
 		/* HEADLESS page stored */
 		bud = HEADLESS;
+		spin_lock(&pool->lock);
 	} else {
-		bool is_unbuddied = zhdr->first_chunks == 0 ||
+		bool is_unbuddied;
+
+		raw_spin_lock(&zhdr->page_lock);
+		is_unbuddied = zhdr->first_chunks == 0 ||
 				zhdr->middle_chunks == 0 ||
 				zhdr->last_chunks == 0;
 
@@ -436,12 +448,17 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 			break;
 		default:
 			pr_err("%s: unknown bud %d\n", __func__, bud);
+			raw_spin_unlock(&zhdr->page_lock);
 			WARN_ON(1);
-			spin_unlock(&pool->lock);
 			return;
 		}
+		raw_spin_unlock(&zhdr->page_lock);
 		if (is_unbuddied)
 			atomic64_dec(&pool->unbuddied_nr);
+
+		spin_lock(&pool->lock);
+		/* Remove from existing buddy list */
+		list_del(&zhdr->buddy);
 	}
 
 	if (test_bit(UNDER_RECLAIM, &page->private)) {
@@ -450,11 +467,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		return;
 	}
 
-	if (bud != HEADLESS) {
-		/* Remove from existing buddy list */
-		list_del(&zhdr->buddy);
-	}
-
+	/* We've got the page and it is not under reclaim */
 	if (bud == HEADLESS ||
 	    (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
 			zhdr->last_chunks == 0)) {
@@ -462,16 +475,16 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		list_del(&page->lru);
 		clear_bit(PAGE_HEADLESS, &page->private);
 		free_z3fold_page(zhdr);
+		spin_unlock(&pool->lock);
 		atomic64_dec(&pool->pages_nr);
 	} else {
 		z3fold_compact_page(zhdr);
 		/* Add to the unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		spin_unlock(&pool->lock);
 		atomic64_inc(&pool->unbuddied_nr);
 	}
-
-	spin_unlock(&pool->lock);
 }
 
 /**
@@ -580,6 +593,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
 		    (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
 		     zhdr->middle_chunks == 0)) {
+			spin_unlock(&pool->lock);
 			/*
 			 * All buddies are now free, free the z3fold page and
 			 * return success.
@@ -587,7 +601,6 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			clear_bit(PAGE_HEADLESS, &page->private);
 			free_z3fold_page(zhdr);
 			atomic64_dec(&pool->pages_nr);
-			spin_unlock(&pool->lock);
 			return 0;
 		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			if (zhdr->first_chunks != 0 &&
@@ -629,7 +642,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 	void *addr;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	addr = zhdr;
 	page = virt_to_page(zhdr);
@@ -637,7 +649,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 	if (test_bit(PAGE_HEADLESS, &page->private))
 		goto out;
 
+	raw_spin_lock(&zhdr->page_lock);
 	buddy = handle_to_buddy(handle);
+
 	switch (buddy) {
 	case FIRST:
 		addr += ZHDR_SIZE_ALIGNED;
@@ -655,8 +669,8 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 		addr = NULL;
 		break;
 	}
+	raw_spin_unlock(&zhdr->page_lock);
 out:
-	spin_unlock(&pool->lock);
 	return addr;
 }
 
@@ -671,19 +685,16 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
 	struct page *page;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	page = virt_to_page(zhdr);
 
-	if (test_bit(PAGE_HEADLESS, &page->private)) {
-		spin_unlock(&pool->lock);
-		return;
+	if (!test_bit(PAGE_HEADLESS, &page->private)) {
+		raw_spin_lock(&zhdr->page_lock);
+		buddy = handle_to_buddy(handle);
+		if (buddy == MIDDLE)
+			clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
+		raw_spin_unlock(&zhdr->page_lock);
 	}
-
-	buddy = handle_to_buddy(handle);
-	if (buddy == MIDDLE)
-		clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
-	spin_unlock(&pool->lock);
 }
 
 /**
-- 
2.4.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] z3fold: remove redundant locking
  2016-10-19 16:35 ` [PATCH 2/3] z3fold: remove redundant locking Vitaly Wool
@ 2016-10-20 20:15   ` Dan Streetman
  2016-10-22 18:51     ` Vitaly Wool
  0 siblings, 1 reply; 7+ messages in thread
From: Dan Streetman @ 2016-10-20 20:15 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Oct 19, 2016 at 12:35 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
> The per-pool z3fold spinlock should generally be taken only when
> a non-atomic pool variable is modified. There's no need to take it
> to map/unmap an object. This patch introduces per-page lock that
> will be used instead to protect per-page variables in map/unmap
> functions.

I think the per-page lock must be held around almost all access to any
page zhdr data; previously that was protected by the pool lock.

>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  mm/z3fold.c | 65 ++++++++++++++++++++++++++++++++++++-------------------------
>  1 file changed, 38 insertions(+), 27 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 5ac325a..329bc26 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -104,6 +104,7 @@ enum buddy {
>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
>   * @first_num:         the starting number (for the first handle)
> + * @page_lock:         per-page lock
>   */
>  struct z3fold_header {
>         struct list_head buddy;
> @@ -112,6 +113,7 @@ struct z3fold_header {
>         unsigned short last_chunks;
>         unsigned short start_middle;
>         unsigned short first_num:NCHUNKS_ORDER;
> +       raw_spinlock_t page_lock;
>  };
>
>  /*
> @@ -152,6 +154,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>         zhdr->first_num = 0;
>         zhdr->start_middle = 0;
>         INIT_LIST_HEAD(&zhdr->buddy);
> +       raw_spin_lock_init(&zhdr->page_lock);
>         return zhdr;
>  }
>
> @@ -163,15 +166,17 @@ static void free_z3fold_page(struct z3fold_header *zhdr)
>
>  /*
>   * Encodes the handle of a particular buddy within a z3fold page
> - * Pool lock should be held as this function accesses first_num
>   */
>  static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
>  {
>         unsigned long handle;
>
>         handle = (unsigned long)zhdr;
> -       if (bud != HEADLESS)
> +       if (bud != HEADLESS) {
> +               raw_spin_lock(&zhdr->page_lock);
>                 handle += (bud + zhdr->first_num) & BUDDY_MASK;
> +               raw_spin_unlock(&zhdr->page_lock);
> +       }
>         return handle;
>  }
>
> @@ -181,7 +186,10 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
>         return (struct z3fold_header *)(handle & PAGE_MASK);
>  }
>
> -/* Returns buddy number */
> +/*
> + * Returns buddy number.
> + * NB: can't be used with HEADLESS pages.

either indicate this needs to be called with zhdr->page_lock held, or
ensure it never is and take lock internally, like the encode_handle
function above.

> + */
>  static enum buddy handle_to_buddy(unsigned long handle)
>  {
>         struct z3fold_header *zhdr = handle_to_z3fold_header(handle);
> @@ -253,7 +261,6 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
>         kfree(pool);
>  }
>
> -/* Has to be called with lock held */
>  static int z3fold_compact_page(struct z3fold_header *zhdr)
>  {
>         struct page *page = virt_to_page(zhdr);
> @@ -263,6 +270,7 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
>         if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
>             zhdr->middle_chunks != 0 &&
>             zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
> +               raw_spin_lock(&zhdr->page_lock);

mapping and chunk checks above need to be protected by the per-page lock.

>                 memmove(beg + ZHDR_SIZE_ALIGNED,
>                         beg + (zhdr->start_middle << CHUNK_SHIFT),
>                         zhdr->middle_chunks << CHUNK_SHIFT);
> @@ -270,6 +278,7 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
>                 zhdr->middle_chunks = 0;
>                 zhdr->start_middle = 0;
>                 zhdr->first_num++;
> +               raw_spin_unlock(&zhdr->page_lock);
>                 return 1;
>         }
>         return 0;
> @@ -385,9 +394,9 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,

all the zhdr->*_chunks field checks in this function need to be
protected by the page_lock.

>                 list_del(&page->lru);
>
>         list_add(&page->lru, &pool->lru);
> +       spin_unlock(&pool->lock);
>
>         *handle = encode_handle(zhdr, bud);
> -       spin_unlock(&pool->lock);
>
>         return 0;
>  }
> @@ -409,15 +418,18 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>         struct page *page;
>         enum buddy bud;
>
> -       spin_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         page = virt_to_page(zhdr);
>
>         if (test_bit(PAGE_HEADLESS, &page->private)) {
>                 /* HEADLESS page stored */
>                 bud = HEADLESS;
> +               spin_lock(&pool->lock);
>         } else {
> -               bool is_unbuddied = zhdr->first_chunks == 0 ||
> +               bool is_unbuddied;
> +
> +               raw_spin_lock(&zhdr->page_lock);
> +               is_unbuddied = zhdr->first_chunks == 0 ||
>                                 zhdr->middle_chunks == 0 ||
>                                 zhdr->last_chunks == 0;
>
> @@ -436,12 +448,17 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                         break;
>                 default:
>                         pr_err("%s: unknown bud %d\n", __func__, bud);
> +                       raw_spin_unlock(&zhdr->page_lock);
>                         WARN_ON(1);
> -                       spin_unlock(&pool->lock);
>                         return;
>                 }
> +               raw_spin_unlock(&zhdr->page_lock);
>                 if (is_unbuddied)
>                         atomic64_dec(&pool->unbuddied_nr);
> +
> +               spin_lock(&pool->lock);
> +               /* Remove from existing buddy list */
> +               list_del(&zhdr->buddy);

I think this needs to be left in place below, after the UNDER_RECLAIM
check; if it is being reclaimed, it's already been taken off of its
buddy list.

>         }
>
>         if (test_bit(UNDER_RECLAIM, &page->private)) {
> @@ -450,11 +467,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 return;
>         }
>
> -       if (bud != HEADLESS) {
> -               /* Remove from existing buddy list */
> -               list_del(&zhdr->buddy);
> -       }
> -
> +       /* We've got the page and it is not under reclaim */

kind of.  another free for a different bud in this page could come in
at the same time, so we have no guarantee that we have exclusive
access to it.  must be careful going between the pool and page locks,
to avoid deadlock; and to avoid removing the page from its buddy list,
if it's already been removed.

>         if (bud == HEADLESS ||
>             (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
>                         zhdr->last_chunks == 0)) {
> @@ -462,16 +475,16 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 list_del(&page->lru);
>                 clear_bit(PAGE_HEADLESS, &page->private);
>                 free_z3fold_page(zhdr);
> +               spin_unlock(&pool->lock);
>                 atomic64_dec(&pool->pages_nr);
>         } else {
>                 z3fold_compact_page(zhdr);
>                 /* Add to the unbuddied list */
>                 freechunks = num_free_chunks(zhdr);
>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +               spin_unlock(&pool->lock);
>                 atomic64_inc(&pool->unbuddied_nr);
>         }
> -
> -       spin_unlock(&pool->lock);
>  }
>
>  /**
> @@ -580,6 +593,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                 if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>                     (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
>                      zhdr->middle_chunks == 0)) {
> +                       spin_unlock(&pool->lock);

as in compact, lock needs to protect all zhdr field access

>                         /*
>                          * All buddies are now free, free the z3fold page and
>                          * return success.
> @@ -587,7 +601,6 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                         clear_bit(PAGE_HEADLESS, &page->private);
>                         free_z3fold_page(zhdr);
>                         atomic64_dec(&pool->pages_nr);
> -                       spin_unlock(&pool->lock);
>                         return 0;
>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>                         if (zhdr->first_chunks != 0 &&
> @@ -629,7 +642,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>         void *addr;
>         enum buddy buddy;
>
> -       spin_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         addr = zhdr;
>         page = virt_to_page(zhdr);
> @@ -637,7 +649,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>         if (test_bit(PAGE_HEADLESS, &page->private))
>                 goto out;
>
> +       raw_spin_lock(&zhdr->page_lock);
>         buddy = handle_to_buddy(handle);
> +
>         switch (buddy) {
>         case FIRST:
>                 addr += ZHDR_SIZE_ALIGNED;
> @@ -655,8 +669,8 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>                 addr = NULL;
>                 break;
>         }
> +       raw_spin_unlock(&zhdr->page_lock);
>  out:
> -       spin_unlock(&pool->lock);
>         return addr;
>  }
>
> @@ -671,19 +685,16 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>         struct page *page;
>         enum buddy buddy;
>
> -       spin_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         page = virt_to_page(zhdr);
>
> -       if (test_bit(PAGE_HEADLESS, &page->private)) {
> -               spin_unlock(&pool->lock);
> -               return;
> +       if (!test_bit(PAGE_HEADLESS, &page->private)) {
> +               raw_spin_lock(&zhdr->page_lock);
> +               buddy = handle_to_buddy(handle);
> +               if (buddy == MIDDLE)
> +                       clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> +               raw_spin_unlock(&zhdr->page_lock);
>         }
> -
> -       buddy = handle_to_buddy(handle);
> -       if (buddy == MIDDLE)
> -               clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> -       spin_unlock(&pool->lock);
>  }
>
>  /**
> --
> 2.4.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] z3fold: remove redundant locking
  2016-10-20 20:15   ` Dan Streetman
@ 2016-10-22 18:51     ` Vitaly Wool
  0 siblings, 0 replies; 7+ messages in thread
From: Vitaly Wool @ 2016-10-22 18:51 UTC (permalink / raw)
  To: Dan Streetman; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Thu, Oct 20, 2016 at 10:15 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Wed, Oct 19, 2016 at 12:35 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> The per-pool z3fold spinlock should generally be taken only when
>> a non-atomic pool variable is modified. There's no need to take it
>> to map/unmap an object. This patch introduces per-page lock that
>> will be used instead to protect per-page variables in map/unmap
>> functions.
>
> I think the per-page lock must be held around almost all access to any
> page zhdr data; previously that was protected by the pool lock.

Right, except for list operations. At this point I think per-page
locks will have to be
thought over again, and there is some nice performance gain from making spinlock
a rwlock anyway, so I'll stick with the latest patchset, fixing tiny
bits like wrong
unbuddied_nr increment in the other patch.

Best regards,
   Vitaly

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-10-22 18:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-10-13 16:47 [PATCHv4 0/3] z3fold: add shrinker Vitaly Wool
2016-10-13 16:49 ` [PATCH 1/3] z3fold: make counters atomic Vitaly Wool
2016-10-13 16:50 ` [PATCH 2/3] z3fold: remove redundant locking Vitaly Wool
2016-10-13 16:52 ` [PATCH 3/3] z3fold: add shrinker Vitaly Wool
  -- strict thread matches above, loose matches on Subject: below --
2016-10-19 16:33 [PATCH 0/3] z3fold: background page compaction Vitaly Wool
2016-10-19 16:35 ` [PATCH 2/3] z3fold: remove redundant locking Vitaly Wool
2016-10-20 20:15   ` Dan Streetman
2016-10-22 18:51     ` Vitaly Wool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).