[PATCH 0/2] prepare zbud to be used by zram as underlying allocator

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/2] prepare zbud to be used by zram as underlying allocator
@ 2015-09-16 11:48 Vitaly Wool
  2015-09-16 11:50 ` [PATCH 1/2] zbud: allow PAGE_SIZE allocations Vitaly Wool
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Vitaly Wool @ 2015-09-16 11:48 UTC (permalink / raw)
  To: ddstreet, akpm, minchan, sergey.senozhatsky; +Cc: linux-kernel, linux-mm

Hi,

as a follow-up to my previous patchset, I decided to first prepare
zbud/zpool related patches and then have some testing rounds and
performance measurements for zram running over both, and come up
with improved/verified zram/zpool patches then. So for now, here
comes the zbud/zpool part.

-- 
Vitaly Wool <vitalywool@gmail.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/2] zbud: allow PAGE_SIZE allocations
  2015-09-16 11:48 [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Vitaly Wool
@ 2015-09-16 11:50 ` Vitaly Wool
  2015-09-17 13:00   ` Vlastimil Babka
  2015-09-21 16:17   ` Dan Streetman
  2015-09-16 11:53 ` [PATCH 2/2] zpool/zsmalloc/zbud: align on interfaces Vitaly Wool
  2015-09-17  1:30 ` [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Sergey Senozhatsky
  2 siblings, 2 replies; 16+ messages in thread
From: Vitaly Wool @ 2015-09-16 11:50 UTC (permalink / raw)
  To: ddstreet, akpm, minchan, sergey.senozhatsky; +Cc: linux-kernel, linux-mm

For zram to be able to use zbud via the common zpool API,
allocations of size PAGE_SIZE should be allowed by zpool.
zbud uses the beginning of an allocated page for its internal
structure but it is not a problem as long as we keep track of
such special pages using a newly introduced page flag.
To be able to keep track of zbud pages in any case, struct page's
lru pointer will be used for zbud page lists instead of the one
that used to be part of the aforementioned internal structure.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 include/linux/page-flags.h |  3 ++
 mm/zbud.c                  | 71 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 416509e..dd47cf0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -134,6 +134,9 @@ enum pageflags {
 
 	/* SLOB */
 	PG_slob_free = PG_private,
+
+	/* ZBUD */
+	PG_uncompressed = PG_owner_priv_1,
 };
 
 #ifndef __GENERATING_BOUNDS_H
diff --git a/mm/zbud.c b/mm/zbud.c
index fa48bcdf..ee8b5d6 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -107,13 +107,11 @@ struct zbud_pool {
  * struct zbud_header - zbud page metadata occupying the first chunk of each
  *			zbud page.
  * @buddy:	links the zbud page into the unbuddied/buddied lists in the pool
- * @lru:	links the zbud page into the lru list in the pool
  * @first_chunks:	the size of the first buddy in chunks, 0 if free
  * @last_chunks:	the size of the last buddy in chunks, 0 if free
  */
 struct zbud_header {
 	struct list_head buddy;
-	struct list_head lru;
 	unsigned int first_chunks;
 	unsigned int last_chunks;
 	bool under_reclaim;
@@ -221,6 +219,7 @@ MODULE_ALIAS("zpool-zbud");
 *****************/
 /* Just to make the code easier to read */
 enum buddy {
+	FULL,
 	FIRST,
 	LAST
 };
@@ -241,7 +240,7 @@ static struct zbud_header *init_zbud_page(struct page *page)
 	zhdr->first_chunks = 0;
 	zhdr->last_chunks = 0;
 	INIT_LIST_HEAD(&zhdr->buddy);
-	INIT_LIST_HEAD(&zhdr->lru);
+	INIT_LIST_HEAD(&page->lru);
 	zhdr->under_reclaim = 0;
 	return zhdr;
 }
@@ -267,11 +266,18 @@ static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
 	 * over the zbud header in the first chunk.
 	 */
 	handle = (unsigned long)zhdr;
-	if (bud == FIRST)
+	switch (bud) {
+	case FIRST:
 		/* skip over zbud header */
 		handle += ZHDR_SIZE_ALIGNED;
-	else /* bud == LAST */
+		break;
+	case LAST:
 		handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
+		break;
+	case FULL:
+	default:
+		break;
+	}
 	return handle;
 }
 
@@ -360,6 +366,24 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 
 	if (!size || (gfp & __GFP_HIGHMEM))
 		return -EINVAL;
+
+	if (size == PAGE_SIZE) {
+		/*
+		 * This is a special case. The page will be allocated
+		 * and used to store uncompressed data
+		 */
+		page = alloc_page(gfp);
+		if (!page)
+			return -ENOMEM;
+		spin_lock(&pool->lock);
+		pool->pages_nr++;
+		INIT_LIST_HEAD(&page->lru);
+		page->flags |= PG_uncompressed;
+		list_add(&page->lru, &pool->lru);
+		spin_unlock(&pool->lock);
+		*handle = encode_handle(page_address(page), FULL);
+		return 0;
+	}
 	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
 		return -ENOSPC;
 	chunks = size_to_chunks(size);
@@ -372,6 +396,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 			zhdr = list_first_entry(&pool->unbuddied[i],
 					struct zbud_header, buddy);
 			list_del(&zhdr->buddy);
+			page = virt_to_page(zhdr);
 			if (zhdr->first_chunks == 0)
 				bud = FIRST;
 			else
@@ -406,9 +431,9 @@ found:
 	}
 
 	/* Add/move zbud page to beginning of LRU */
-	if (!list_empty(&zhdr->lru))
-		list_del(&zhdr->lru);
-	list_add(&zhdr->lru, &pool->lru);
+	if (!list_empty(&page->lru))
+		list_del(&page->lru);
+	list_add(&page->lru, &pool->lru);
 
 	*handle = encode_handle(zhdr, bud);
 	spin_unlock(&pool->lock);
@@ -430,9 +455,21 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
 {
 	struct zbud_header *zhdr;
 	int freechunks;
+	struct page *page;
 
 	spin_lock(&pool->lock);
 	zhdr = handle_to_zbud_header(handle);
+	page = virt_to_page(zhdr);
+
+	/* If it was an uncompressed full page, just free it */
+	if (page->flags & PG_uncompressed) {
+		page->flags &= ~PG_uncompressed;
+		list_del(&page->lru);
+		__free_page(page);
+		pool->pages_nr--;
+		spin_unlock(&pool->lock);
+		return;
+	}
 
 	/* If first buddy, handle will be page aligned */
 	if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
@@ -451,7 +488,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
 
 	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
 		/* zbud page is empty, free */
-		list_del(&zhdr->lru);
+		list_del(&page->lru);
 		free_zbud_page(zhdr);
 		pool->pages_nr--;
 	} else {
@@ -505,6 +542,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 {
 	int i, ret, freechunks;
 	struct zbud_header *zhdr;
+	struct page *page;
 	unsigned long first_handle = 0, last_handle = 0;
 
 	spin_lock(&pool->lock);
@@ -514,8 +552,17 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 		return -EINVAL;
 	}
 	for (i = 0; i < retries; i++) {
-		zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
-		list_del(&zhdr->lru);
+		page = list_tail_entry(&pool->lru, struct page, lru);
+		zhdr = page_address(page);
+		list_del(&page->lru);
+		/* Uncompressed zbud page? just run eviction and free it */
+		if (page->flags & PG_uncompressed) {
+			page->flags &= ~PG_uncompressed;
+			spin_unlock(&pool->lock);
+			pool->ops->evict(pool, encode_handle(zhdr, FULL));
+			__free_page(page);
+			return 0;
+		}
 		list_del(&zhdr->buddy);
 		/* Protect zbud page against free */
 		zhdr->under_reclaim = true;
@@ -565,7 +612,7 @@ next:
 		}
 
 		/* add to beginning of LRU */
-		list_add(&zhdr->lru, &pool->lru);
+		list_add(&page->lru, &pool->lru);
 	}
 	spin_unlock(&pool->lock);
 	return -EAGAIN;
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] zbud: allow PAGE_SIZE allocations
  2015-09-16 11:50 ` [PATCH 1/2] zbud: allow PAGE_SIZE allocations Vitaly Wool
@ 2015-09-17 13:00   ` Vlastimil Babka
  2015-09-18  8:03     ` Vitaly Wool
  2015-09-21 16:17   ` Dan Streetman
  1 sibling, 1 reply; 16+ messages in thread
From: Vlastimil Babka @ 2015-09-17 13:00 UTC (permalink / raw)
  To: Vitaly Wool, ddstreet, akpm, minchan, sergey.senozhatsky
  Cc: linux-kernel, linux-mm

On 09/16/2015 01:50 PM, Vitaly Wool wrote:
> For zram to be able to use zbud via the common zpool API,
> allocations of size PAGE_SIZE should be allowed by zpool.
> zbud uses the beginning of an allocated page for its internal
> structure but it is not a problem as long as we keep track of
> such special pages using a newly introduced page flag.
> To be able to keep track of zbud pages in any case, struct page's
> lru pointer will be used for zbud page lists instead of the one
> that used to be part of the aforementioned internal structure.

I don't know how zsmalloc handles uncompressible PAGE_SIZE allocations, 
but I wouldn't expect it to be any more clever than this? So why 
duplicate the functionality in zswap and zbud? This could be handled 
e.g. at the zpool level? Or maybe just in zram, as IIRC in zswap 
(frontswap) it's valid just to reject a page and it goes to physical swap.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] zbud: allow PAGE_SIZE allocations
  2015-09-17 13:00   ` Vlastimil Babka
@ 2015-09-18  8:03     ` Vitaly Wool
  2015-09-21 15:27       ` Dan Streetman
  0 siblings, 1 reply; 16+ messages in thread
From: Vitaly Wool @ 2015-09-18  8:03 UTC (permalink / raw)
  To: Dan Streetman, Andrew Morton, Minchan Kim, Sergey Senozhatsky
  Cc: LKML, linux-mm

> I don't know how zsmalloc handles uncompressible PAGE_SIZE allocations, but
> I wouldn't expect it to be any more clever than this? So why duplicate the
> functionality in zswap and zbud? This could be handled e.g. at the zpool
> level? Or maybe just in zram, as IIRC in zswap (frontswap) it's valid just
> to reject a page and it goes to physical swap.

>From what I can see, zsmalloc just allocates pages and puts them into
a linked list. Using the beginning of a page for storing an internal
struct is zbud-specific, and so is this patch.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] zbud: allow PAGE_SIZE allocations
  2015-09-18  8:03     ` Vitaly Wool
@ 2015-09-21 15:27       ` Dan Streetman
  0 siblings, 0 replies; 16+ messages in thread
From: Dan Streetman @ 2015-09-21 15:27 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Andrew Morton, Minchan Kim, Sergey Senozhatsky, LKML, Linux-MM

On Fri, Sep 18, 2015 at 4:03 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> I don't know how zsmalloc handles uncompressible PAGE_SIZE allocations, but
>> I wouldn't expect it to be any more clever than this? So why duplicate the
>> functionality in zswap and zbud? This could be handled e.g. at the zpool
>> level? Or maybe just in zram, as IIRC in zswap (frontswap) it's valid just
>> to reject a page and it goes to physical swap.

zpool doesn't actually store pages anywhere; zbud and zsmalloc do the
storing, and they do it in completely different ways.  Storing an
uncompressed page has to be done in zbud and zsmalloc, not zpool.  And
zram can't do it either; zram doesn't actually store pages either, it
relies on zsmalloc to store all its pages.

>
> From what I can see, zsmalloc just allocates pages and puts them into
> a linked list. Using the beginning of a page for storing an internal
> struct is zbud-specific, and so is this patch.

zsmalloc has size "classes" that allow storing "objects" of a specific
size range (i.e. the last class size + 1, up to class size).  the max
size class is:
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE

so zsmalloc is able to store "objects" up to, and including, PAGE_SIZE.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] zbud: allow PAGE_SIZE allocations
  2015-09-16 11:50 ` [PATCH 1/2] zbud: allow PAGE_SIZE allocations Vitaly Wool
  2015-09-17 13:00   ` Vlastimil Babka
@ 2015-09-21 16:17   ` Dan Streetman
  2015-09-22 10:18     ` Vitaly Wool
  1 sibling, 1 reply; 16+ messages in thread
From: Dan Streetman @ 2015-09-21 16:17 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Andrew Morton, Minchan Kim, Sergey Senozhatsky, linux-kernel,
	Linux-MM, Seth Jennings

Please make sure to cc Seth also, he's the owner of zbud.

On Wed, Sep 16, 2015 at 7:50 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> For zram to be able to use zbud via the common zpool API,
> allocations of size PAGE_SIZE should be allowed by zpool.
> zbud uses the beginning of an allocated page for its internal
> structure but it is not a problem as long as we keep track of
> such special pages using a newly introduced page flag.
> To be able to keep track of zbud pages in any case, struct page's
> lru pointer will be used for zbud page lists instead of the one
> that used to be part of the aforementioned internal structure.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  include/linux/page-flags.h |  3 ++
>  mm/zbud.c                  | 71 ++++++++++++++++++++++++++++++++++++++--------
>  2 files changed, 62 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 416509e..dd47cf0 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -134,6 +134,9 @@ enum pageflags {
>
>         /* SLOB */
>         PG_slob_free = PG_private,
> +
> +       /* ZBUD */
> +       PG_uncompressed = PG_owner_priv_1,

you don't need a new page flag.  and there's 0% chance it would be
accepted even if you did.

>  };
>
>  #ifndef __GENERATING_BOUNDS_H
> diff --git a/mm/zbud.c b/mm/zbud.c
> index fa48bcdf..ee8b5d6 100644
> --- a/mm/zbud.c
> +++ b/mm/zbud.c
> @@ -107,13 +107,11 @@ struct zbud_pool {
>   * struct zbud_header - zbud page metadata occupying the first chunk of each
>   *                     zbud page.
>   * @buddy:     links the zbud page into the unbuddied/buddied lists in the pool
> - * @lru:       links the zbud page into the lru list in the pool
>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
>   */
>  struct zbud_header {
>         struct list_head buddy;
> -       struct list_head lru;
>         unsigned int first_chunks;
>         unsigned int last_chunks;
>         bool under_reclaim;
> @@ -221,6 +219,7 @@ MODULE_ALIAS("zpool-zbud");
>  *****************/
>  /* Just to make the code easier to read */
>  enum buddy {
> +       FULL,
>         FIRST,
>         LAST
>  };
> @@ -241,7 +240,7 @@ static struct zbud_header *init_zbud_page(struct page *page)
>         zhdr->first_chunks = 0;
>         zhdr->last_chunks = 0;
>         INIT_LIST_HEAD(&zhdr->buddy);
> -       INIT_LIST_HEAD(&zhdr->lru);
> +       INIT_LIST_HEAD(&page->lru);
>         zhdr->under_reclaim = 0;
>         return zhdr;
>  }
> @@ -267,11 +266,18 @@ static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
>          * over the zbud header in the first chunk.
>          */
>         handle = (unsigned long)zhdr;
> -       if (bud == FIRST)
> +       switch (bud) {
> +       case FIRST:
>                 /* skip over zbud header */
>                 handle += ZHDR_SIZE_ALIGNED;
> -       else /* bud == LAST */
> +               break;
> +       case LAST:
>                 handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
> +               break;
> +       case FULL:
> +       default:

Hmm, while it should be ok to treat a default (invalid) bud value as a
full page (assuming the caller treats it as such), you should at least
add a pr_warn() or pr_warn_ratelimited(), or maybe a WARN_ON() or
WARN_ON_ONCE().  the default case should never happen, and a warning
should be printed if it does.

> +               break;
> +       }
>         return handle;
>  }
>
> @@ -360,6 +366,24 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
>
>         if (!size || (gfp & __GFP_HIGHMEM))
>                 return -EINVAL;
> +
> +       if (size == PAGE_SIZE) {
> +               /*
> +                * This is a special case. The page will be allocated
> +                * and used to store uncompressed data
> +                */

well you shouldn't special case only PAGE_SIZE.  If zram increases its
max_zpage_size to a value > (PAGE_SIZE - ZHDR_SIZE_ALIGNED -
CHUNK_SIZE) then those compressed pages will fail to store here.

I think it would be better to change the size check to a simple
if (size > PAGE_SIZE)
  return -ENOSPC;

then use the existing
>         if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)

to store the object (which is either a large compressed page, or an
uncompressed page) into the full zbud page.  And don't duplicate
everything the function does inside an if (), just update the function
to handle PAGE_SIZE storage.

> +               page = alloc_page(gfp);
> +               if (!page)
> +                       return -ENOMEM;
> +               spin_lock(&pool->lock);
> +               pool->pages_nr++;
> +               INIT_LIST_HEAD(&page->lru);
> +               page->flags |= PG_uncompressed;
> +               list_add(&page->lru, &pool->lru);
> +               spin_unlock(&pool->lock);
> +               *handle = encode_handle(page_address(page), FULL);
> +               return 0;
> +       }
>         if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
>                 return -ENOSPC;
>         chunks = size_to_chunks(size);
> @@ -372,6 +396,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
>                         zhdr = list_first_entry(&pool->unbuddied[i],
>                                         struct zbud_header, buddy);
>                         list_del(&zhdr->buddy);
> +                       page = virt_to_page(zhdr);
>                         if (zhdr->first_chunks == 0)
>                                 bud = FIRST;
>                         else
> @@ -406,9 +431,9 @@ found:
>         }
>
>         /* Add/move zbud page to beginning of LRU */
> -       if (!list_empty(&zhdr->lru))
> -               list_del(&zhdr->lru);
> -       list_add(&zhdr->lru, &pool->lru);
> +       if (!list_empty(&page->lru))
> +               list_del(&page->lru);
> +       list_add(&page->lru, &pool->lru);
>
>         *handle = encode_handle(zhdr, bud);
>         spin_unlock(&pool->lock);
> @@ -430,9 +455,21 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
>  {
>         struct zbud_header *zhdr;
>         int freechunks;
> +       struct page *page;
>
>         spin_lock(&pool->lock);
>         zhdr = handle_to_zbud_header(handle);
> +       page = virt_to_page(zhdr);
> +
> +       /* If it was an uncompressed full page, just free it */
> +       if (page->flags & PG_uncompressed) {
> +               page->flags &= ~PG_uncompressed;
> +               list_del(&page->lru);
> +               __free_page(page);
> +               pool->pages_nr--;
> +               spin_unlock(&pool->lock);
> +               return;
> +       }

don't repeat this function inside an if() block.  update the actual
function to handle the new case.

and you don't need a new page flag.  you have 3 distinct cases:

switch (handle & ~PAGE_MASK) {
case 0: /* this is a full-sized page */
case ZHDR_SIZE_ALIGNED: /* this is the first buddy */
default: /* this is the last buddy */
}


>
>         /* If first buddy, handle will be page aligned */
>         if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
> @@ -451,7 +488,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
>
>         if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
>                 /* zbud page is empty, free */
> -               list_del(&zhdr->lru);
> +               list_del(&page->lru);
>                 free_zbud_page(zhdr);
>                 pool->pages_nr--;
>         } else {
> @@ -505,6 +542,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
>  {
>         int i, ret, freechunks;
>         struct zbud_header *zhdr;
> +       struct page *page;
>         unsigned long first_handle = 0, last_handle = 0;
>
>         spin_lock(&pool->lock);
> @@ -514,8 +552,17 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
>                 return -EINVAL;
>         }
>         for (i = 0; i < retries; i++) {
> -               zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
> -               list_del(&zhdr->lru);
> +               page = list_tail_entry(&pool->lru, struct page, lru);
> +               zhdr = page_address(page);
> +               list_del(&page->lru);
> +               /* Uncompressed zbud page? just run eviction and free it */
> +               if (page->flags & PG_uncompressed) {
> +                       page->flags &= ~PG_uncompressed;
> +                       spin_unlock(&pool->lock);
> +                       pool->ops->evict(pool, encode_handle(zhdr, FULL));
> +                       __free_page(page);
> +                       return 0;

again, don't be redundant.  change the function to handle full-sized
pages, don't repeat the function in an if() block for a special case.

> +               }
>                 list_del(&zhdr->buddy);
>                 /* Protect zbud page against free */
>                 zhdr->under_reclaim = true;
> @@ -565,7 +612,7 @@ next:
>                 }
>
>                 /* add to beginning of LRU */
> -               list_add(&zhdr->lru, &pool->lru);
> +               list_add(&page->lru, &pool->lru);
>         }
>         spin_unlock(&pool->lock);
>         return -EAGAIN;
> --
> 1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] zbud: allow PAGE_SIZE allocations
  2015-09-21 16:17   ` Dan Streetman
@ 2015-09-22 10:18     ` Vitaly Wool
  2015-09-22 17:09       ` Dan Streetman
  0 siblings, 1 reply; 16+ messages in thread
From: Vitaly Wool @ 2015-09-22 10:18 UTC (permalink / raw)
  To: Dan Streetman
  Cc: Andrew Morton, Minchan Kim, Sergey Senozhatsky, linux-kernel,
	Linux-MM, Seth Jennings

Hi Dan,

On Mon, Sep 21, 2015 at 6:17 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> Please make sure to cc Seth also, he's the owner of zbud.

Sure :)

<snip>
>> @@ -514,8 +552,17 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
>>                 return -EINVAL;
>>         }
>>         for (i = 0; i < retries; i++) {
>> -               zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
>> -               list_del(&zhdr->lru);
>> +               page = list_tail_entry(&pool->lru, struct page, lru);
>> +               zhdr = page_address(page);
>> +               list_del(&page->lru);
>> +               /* Uncompressed zbud page? just run eviction and free it */
>> +               if (page->flags & PG_uncompressed) {
>> +                       page->flags &= ~PG_uncompressed;
>> +                       spin_unlock(&pool->lock);
>> +                       pool->ops->evict(pool, encode_handle(zhdr, FULL));
>> +                       __free_page(page);
>> +                       return 0;
>
> again, don't be redundant.  change the function to handle full-sized
> pages, don't repeat the function in an if() block for a special case.

Well, this case is a little tricky. How to process a zbud page in
zbud_reclaim_page() is defined basing on the assumption there is a
zhdr at the beginning of the page. What can be done here IMV is either
of the following:
* add a constant magic number to zhdr and check for it, if the check
fails, it is a type FULL page
* add a CRC field to zhdr, if CRC check over assumed zhdr fails,  it
is a type FULL page
* use a field from struct page to indicate its type

The last option still looks better to me.

~vitaly

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] zbud: allow PAGE_SIZE allocations
  2015-09-22 10:18     ` Vitaly Wool
@ 2015-09-22 17:09       ` Dan Streetman
  0 siblings, 0 replies; 16+ messages in thread
From: Dan Streetman @ 2015-09-22 17:09 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Andrew Morton, Minchan Kim, Sergey Senozhatsky, linux-kernel,
	Linux-MM, Seth Jennings

On Tue, Sep 22, 2015 at 6:18 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> Hi Dan,
>
> On Mon, Sep 21, 2015 at 6:17 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>> Please make sure to cc Seth also, he's the owner of zbud.
>
> Sure :)
>
> <snip>
>>> @@ -514,8 +552,17 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
>>>                 return -EINVAL;
>>>         }
>>>         for (i = 0; i < retries; i++) {
>>> -               zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
>>> -               list_del(&zhdr->lru);
>>> +               page = list_tail_entry(&pool->lru, struct page, lru);
>>> +               zhdr = page_address(page);
>>> +               list_del(&page->lru);
>>> +               /* Uncompressed zbud page? just run eviction and free it */
>>> +               if (page->flags & PG_uncompressed) {
>>> +                       page->flags &= ~PG_uncompressed;
>>> +                       spin_unlock(&pool->lock);
>>> +                       pool->ops->evict(pool, encode_handle(zhdr, FULL));
>>> +                       __free_page(page);
>>> +                       return 0;
>>
>> again, don't be redundant.  change the function to handle full-sized
>> pages, don't repeat the function in an if() block for a special case.
>
> Well, this case is a little tricky. How to process a zbud page in
> zbud_reclaim_page() is defined basing on the assumption there is a
> zhdr at the beginning of the page. What can be done here IMV is either
> of the following:

aha, this is why you used the page flag.

> * add a constant magic number to zhdr and check for it, if the check
> fails, it is a type FULL page
> * add a CRC field to zhdr, if CRC check over assumed zhdr fails,  it
> is a type FULL page

neither of those; you can't guarantee the magic number won't naturally
occur in a page.

> * use a field from struct page to indicate its type

sure, you could use a pre-existing field from struct page, like the
page->private field.

>
> The last option still looks better to me.
>
> ~vitaly

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/2] zpool/zsmalloc/zbud: align on interfaces
  2015-09-16 11:48 [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Vitaly Wool
  2015-09-16 11:50 ` [PATCH 1/2] zbud: allow PAGE_SIZE allocations Vitaly Wool
@ 2015-09-16 11:53 ` Vitaly Wool
  2015-09-21 17:15   ` Dan Streetman
  2015-09-17  1:30 ` [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Sergey Senozhatsky
  2 siblings, 1 reply; 16+ messages in thread
From: Vitaly Wool @ 2015-09-16 11:53 UTC (permalink / raw)
  To: ddstreet, akpm, minchan, sergey.senozhatsky; +Cc: linux-kernel, linux-mm

As a preparation step for zram to be able to use common zpool API,
there has to be some alignment done on it. This patch adds
functions that correspond to zsmalloc-specific API to the common
zpool API and takes care of the callbacks that have to be
introduced, too.

This version of the patch uses simplified 'compact' API/callbacks.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 drivers/block/zram/zram_drv.c |  4 ++--
 include/linux/zpool.h         | 14 ++++++++++++++
 include/linux/zsmalloc.h      |  8 ++------
 mm/zbud.c                     | 12 ++++++++++++
 mm/zpool.c                    | 21 +++++++++++++++++++++
 mm/zsmalloc.c                 | 19 ++++++++++++++++---
 6 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9fa15bb..a0a786e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -426,12 +426,12 @@ static ssize_t mm_stat_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
 	struct zram *zram = dev_to_zram(dev);
-	struct zs_pool_stats pool_stats;
+	struct zpool_stats pool_stats;
 	u64 orig_size, mem_used = 0;
 	long max_used;
 	ssize_t ret;
 
-	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
+	memset(&pool_stats, 0x00, sizeof(struct zpool_stats));
 
 	down_read(&zram->init_lock);
 	if (init_done(zram)) {
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 42f8ec9..a2a5bc4 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -17,6 +17,11 @@ struct zpool_ops {
 	int (*evict)(struct zpool *pool, unsigned long handle);
 };
 
+struct zpool_stats {
+	/* How many pages were migrated (freed) */
+	unsigned long pages_compacted;
+};
+
 /*
  * Control how a handle is mapped.  It will be ignored if the
  * implementation does not support it.  Its use is optional.
@@ -58,6 +63,10 @@ void *zpool_map_handle(struct zpool *pool, unsigned long handle,
 
 void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
 
+unsigned long zpool_compact(struct zpool *pool);
+
+void zpool_stats(struct zpool *pool, struct zpool_stats *zstats);
+
 u64 zpool_get_total_size(struct zpool *pool);
 
 
@@ -72,6 +81,8 @@ u64 zpool_get_total_size(struct zpool *pool);
  * @shrink:	shrink the pool.
  * @map:	map a handle.
  * @unmap:	unmap a handle.
+ * @compact:	try to run compaction for the pool
+ * @stats:	get statistics for the pool
  * @total_size:	get total size of a pool.
  *
  * This is created by a zpool implementation and registered
@@ -98,6 +109,9 @@ struct zpool_driver {
 				enum zpool_mapmode mm);
 	void (*unmap)(void *pool, unsigned long handle);
 
+	unsigned long (*compact)(void *pool);
+	void (*stats)(void *pool, struct zpool_stats *stats);
+
 	u64 (*total_size)(void *pool);
 };
 
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 6398dfa..5aee1c7 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -15,6 +15,7 @@
 #define _ZS_MALLOC_H_
 
 #include <linux/types.h>
+#include <linux/zpool.h>
 
 /*
  * zsmalloc mapping modes
@@ -34,11 +35,6 @@ enum zs_mapmode {
 	 */
 };
 
-struct zs_pool_stats {
-	/* How many pages were migrated (freed) */
-	unsigned long pages_compacted;
-};
-
 struct zs_pool;
 
 struct zs_pool *zs_create_pool(char *name, gfp_t flags);
@@ -54,5 +50,5 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 unsigned long zs_get_total_pages(struct zs_pool *pool);
 unsigned long zs_compact(struct zs_pool *pool);
 
-void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
+void zs_pool_stats(struct zs_pool *pool, struct zpool_stats *stats);
 #endif
diff --git a/mm/zbud.c b/mm/zbud.c
index ee8b5d6..23cfc76 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -193,6 +193,16 @@ static void zbud_zpool_unmap(void *pool, unsigned long handle)
 	zbud_unmap(pool, handle);
 }
 
+static unsigned long zbud_zpool_compact(void *pool)
+{
+	return 0;
+}
+
+static void zbud_zpool_stats(void *pool, struct zpool_stats *stats)
+{
+	/* no-op */
+}
+
 static u64 zbud_zpool_total_size(void *pool)
 {
 	return zbud_get_pool_size(pool) * PAGE_SIZE;
@@ -208,6 +218,8 @@ static struct zpool_driver zbud_zpool_driver = {
 	.shrink =	zbud_zpool_shrink,
 	.map =		zbud_zpool_map,
 	.unmap =	zbud_zpool_unmap,
+	.compact =	zbud_zpool_compact,
+	.stats =	zbud_zpool_stats,
 	.total_size =	zbud_zpool_total_size,
 };
 
diff --git a/mm/zpool.c b/mm/zpool.c
index 8f670d3..d454f37 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -341,6 +341,27 @@ void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
 }
 
 /**
+ * zpool_compact() - try to run compaction over zpool
+ * @pool	The zpool to compact
+ *
+ * Returns: the number of migrated pages (0 if nothing happened)
+ */
+unsigned long zpool_compact(struct zpool *zpool)
+{
+	return zpool->driver->compact(zpool->pool);
+}
+
+/**
+ * zpool_stats() - obtain zpool statistics
+ * @pool	The zpool to get statistics for
+ * @zstats	stats to fill in
+ */
+void zpool_stats(struct zpool *zpool, struct zpool_stats *zstats)
+{
+	zpool->driver->stats(zpool->pool, zstats);
+}
+
+/**
  * zpool_get_total_size() - The total size of the pool
  * @pool	The zpool to check
  *
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f135b1b..3ab0515 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -245,7 +245,7 @@ struct zs_pool {
 	gfp_t flags;	/* allocation flags used when growing pool */
 	atomic_long_t pages_allocated;
 
-	struct zs_pool_stats stats;
+	struct zpool_stats stats;
 
 	/* Compact classes */
 	struct shrinker shrinker;
@@ -365,6 +365,17 @@ static void zs_zpool_unmap(void *pool, unsigned long handle)
 	zs_unmap_object(pool, handle);
 }
 
+static unsigned long zs_zpool_compact(void *pool)
+{
+	return zs_compact(pool);
+}
+
+
+static void zs_zpool_stats(void *pool, struct zpool_stats *stats)
+{
+	zs_pool_stats(pool, stats);
+}
+
 static u64 zs_zpool_total_size(void *pool)
 {
 	return zs_get_total_pages(pool) << PAGE_SHIFT;
@@ -380,6 +391,8 @@ static struct zpool_driver zs_zpool_driver = {
 	.shrink =	zs_zpool_shrink,
 	.map =		zs_zpool_map,
 	.unmap =	zs_zpool_unmap,
+	.compact =	zs_zpool_compact,
+	.stats =	zs_zpool_stats,
 	.total_size =	zs_zpool_total_size,
 };
 
@@ -1789,9 +1802,9 @@ unsigned long zs_compact(struct zs_pool *pool)
 }
 EXPORT_SYMBOL_GPL(zs_compact);
 
-void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats)
+void zs_pool_stats(struct zs_pool *pool, struct zpool_stats *stats)
 {
-	memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats));
+	memcpy(stats, &pool->stats, sizeof(struct zpool_stats));
 }
 EXPORT_SYMBOL_GPL(zs_pool_stats);
 
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] zpool/zsmalloc/zbud: align on interfaces
  2015-09-16 11:53 ` [PATCH 2/2] zpool/zsmalloc/zbud: align on interfaces Vitaly Wool
@ 2015-09-21 17:15   ` Dan Streetman
  0 siblings, 0 replies; 16+ messages in thread
From: Dan Streetman @ 2015-09-21 17:15 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Andrew Morton, Minchan Kim, Sergey Senozhatsky, linux-kernel,
	Linux-MM, Seth Jennings

On Wed, Sep 16, 2015 at 7:53 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> As a preparation step for zram to be able to use common zpool API,
> there has to be some alignment done on it. This patch adds
> functions that correspond to zsmalloc-specific API to the common
> zpool API and takes care of the callbacks that have to be
> introduced, too.
>
> This version of the patch uses simplified 'compact' API/callbacks.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  drivers/block/zram/zram_drv.c |  4 ++--
>  include/linux/zpool.h         | 14 ++++++++++++++
>  include/linux/zsmalloc.h      |  8 ++------
>  mm/zbud.c                     | 12 ++++++++++++
>  mm/zpool.c                    | 21 +++++++++++++++++++++
>  mm/zsmalloc.c                 | 19 ++++++++++++++++---
>  6 files changed, 67 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 9fa15bb..a0a786e 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -426,12 +426,12 @@ static ssize_t mm_stat_show(struct device *dev,
>                 struct device_attribute *attr, char *buf)
>  {
>         struct zram *zram = dev_to_zram(dev);
> -       struct zs_pool_stats pool_stats;
> +       struct zpool_stats pool_stats;
>         u64 orig_size, mem_used = 0;
>         long max_used;
>         ssize_t ret;
>
> -       memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
> +       memset(&pool_stats, 0x00, sizeof(struct zpool_stats));
>
>         down_read(&zram->init_lock);
>         if (init_done(zram)) {

you don't need to change zram in this patch.  save this part for the
patch to update zram to use zpool.


> diff --git a/include/linux/zpool.h b/include/linux/zpool.h
> index 42f8ec9..a2a5bc4 100644
> --- a/include/linux/zpool.h
> +++ b/include/linux/zpool.h
> @@ -17,6 +17,11 @@ struct zpool_ops {
>         int (*evict)(struct zpool *pool, unsigned long handle);
>  };
>
> +struct zpool_stats {
> +       /* How many pages were migrated (freed) */
> +       unsigned long pages_compacted;
> +};
> +
>  /*
>   * Control how a handle is mapped.  It will be ignored if the
>   * implementation does not support it.  Its use is optional.
> @@ -58,6 +63,10 @@ void *zpool_map_handle(struct zpool *pool, unsigned long handle,
>
>  void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
>
> +unsigned long zpool_compact(struct zpool *pool);
> +
> +void zpool_stats(struct zpool *pool, struct zpool_stats *zstats);
> +
>  u64 zpool_get_total_size(struct zpool *pool);
>
>
> @@ -72,6 +81,8 @@ u64 zpool_get_total_size(struct zpool *pool);
>   * @shrink:    shrink the pool.
>   * @map:       map a handle.
>   * @unmap:     unmap a handle.
> + * @compact:   try to run compaction for the pool
> + * @stats:     get statistics for the pool
>   * @total_size:        get total size of a pool.
>   *
>   * This is created by a zpool implementation and registered
> @@ -98,6 +109,9 @@ struct zpool_driver {
>                                 enum zpool_mapmode mm);
>         void (*unmap)(void *pool, unsigned long handle);
>
> +       unsigned long (*compact)(void *pool);
> +       void (*stats)(void *pool, struct zpool_stats *stats);
> +
>         u64 (*total_size)(void *pool);
>  };
>
> diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
> index 6398dfa..5aee1c7 100644
> --- a/include/linux/zsmalloc.h
> +++ b/include/linux/zsmalloc.h
> @@ -15,6 +15,7 @@
>  #define _ZS_MALLOC_H_
>
>  #include <linux/types.h>
> +#include <linux/zpool.h>
>
>  /*
>   * zsmalloc mapping modes
> @@ -34,11 +35,6 @@ enum zs_mapmode {
>          */
>  };
>
> -struct zs_pool_stats {
> -       /* How many pages were migrated (freed) */
> -       unsigned long pages_compacted;
> -};
> -
>  struct zs_pool;
>
>  struct zs_pool *zs_create_pool(char *name, gfp_t flags);
> @@ -54,5 +50,5 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
>  unsigned long zs_get_total_pages(struct zs_pool *pool);
>  unsigned long zs_compact(struct zs_pool *pool);
>
> -void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
> +void zs_pool_stats(struct zs_pool *pool, struct zpool_stats *stats);
>  #endif
> diff --git a/mm/zbud.c b/mm/zbud.c
> index ee8b5d6..23cfc76 100644
> --- a/mm/zbud.c
> +++ b/mm/zbud.c
> @@ -193,6 +193,16 @@ static void zbud_zpool_unmap(void *pool, unsigned long handle)
>         zbud_unmap(pool, handle);
>  }
>
> +static unsigned long zbud_zpool_compact(void *pool)
> +{
> +       return 0;
> +}
> +
> +static void zbud_zpool_stats(void *pool, struct zpool_stats *stats)
> +{
> +       /* no-op */
> +}
> +
>  static u64 zbud_zpool_total_size(void *pool)
>  {
>         return zbud_get_pool_size(pool) * PAGE_SIZE;
> @@ -208,6 +218,8 @@ static struct zpool_driver zbud_zpool_driver = {
>         .shrink =       zbud_zpool_shrink,
>         .map =          zbud_zpool_map,
>         .unmap =        zbud_zpool_unmap,
> +       .compact =      zbud_zpool_compact,
> +       .stats =        zbud_zpool_stats,
>         .total_size =   zbud_zpool_total_size,
>  };
>
> diff --git a/mm/zpool.c b/mm/zpool.c
> index 8f670d3..d454f37 100644
> --- a/mm/zpool.c
> +++ b/mm/zpool.c
> @@ -341,6 +341,27 @@ void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
>  }
>
>  /**
> + * zpool_compact() - try to run compaction over zpool
> + * @pool       The zpool to compact
> + *
> + * Returns: the number of migrated pages (0 if nothing happened)

don't say "nothing happened", 0 doesn't necessarily mean that.  0
means no pages were compacted.

> + */
> +unsigned long zpool_compact(struct zpool *zpool)
> +{
> +       return zpool->driver->compact(zpool->pool);
> +}
> +
> +/**
> + * zpool_stats() - obtain zpool statistics
> + * @pool       The zpool to get statistics for
> + * @zstats     stats to fill in

this doc needs more.  how is this function used?  what should the
caller expect in the zstats fields before and after the call?

for the common zpool interface, it might make more sense to just
include a direct function to access the pages_compacted, instead of
the indirect stats call.  If more stats get added in the future, the
call can be changed into a zpool_stats function.

> + */
> +void zpool_stats(struct zpool *zpool, struct zpool_stats *zstats)
> +{
> +       zpool->driver->stats(zpool->pool, zstats);
> +}
> +
> +/**
>   * zpool_get_total_size() - The total size of the pool
>   * @pool       The zpool to check
>   *
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index f135b1b..3ab0515 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -245,7 +245,7 @@ struct zs_pool {
>         gfp_t flags;    /* allocation flags used when growing pool */
>         atomic_long_t pages_allocated;
>
> -       struct zs_pool_stats stats;
> +       struct zpool_stats stats;

we haven't made zsmalloc completely dependent on zpool just yet :-)
this won't compile when CONFIG_ZPOOL isn't set.

leave this as a zs_pool_stats type and just handle the translation in
the zs_zpool_stats function call.

>
>         /* Compact classes */
>         struct shrinker shrinker;
> @@ -365,6 +365,17 @@ static void zs_zpool_unmap(void *pool, unsigned long handle)
>         zs_unmap_object(pool, handle);
>  }
>
> +static unsigned long zs_zpool_compact(void *pool)
> +{
> +       return zs_compact(pool);
> +}
> +
> +
> +static void zs_zpool_stats(void *pool, struct zpool_stats *stats)
> +{
> +       zs_pool_stats(pool, stats);
> +}
> +
>  static u64 zs_zpool_total_size(void *pool)
>  {
>         return zs_get_total_pages(pool) << PAGE_SHIFT;
> @@ -380,6 +391,8 @@ static struct zpool_driver zs_zpool_driver = {
>         .shrink =       zs_zpool_shrink,
>         .map =          zs_zpool_map,
>         .unmap =        zs_zpool_unmap,
> +       .compact =      zs_zpool_compact,
> +       .stats =        zs_zpool_stats,
>         .total_size =   zs_zpool_total_size,
>  };
>
> @@ -1789,9 +1802,9 @@ unsigned long zs_compact(struct zs_pool *pool)
>  }
>  EXPORT_SYMBOL_GPL(zs_compact);
>
> -void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats)
> +void zs_pool_stats(struct zs_pool *pool, struct zpool_stats *stats)
>  {
> -       memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats));
> +       memcpy(stats, &pool->stats, sizeof(struct zpool_stats));
>  }
>  EXPORT_SYMBOL_GPL(zs_pool_stats);
>
> --
> 1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/2] prepare zbud to be used by zram as underlying allocator
  2015-09-16 11:48 [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Vitaly Wool
  2015-09-16 11:50 ` [PATCH 1/2] zbud: allow PAGE_SIZE allocations Vitaly Wool
  2015-09-16 11:53 ` [PATCH 2/2] zpool/zsmalloc/zbud: align on interfaces Vitaly Wool
@ 2015-09-17  1:30 ` Sergey Senozhatsky
  2015-09-17 10:26   ` Vitaly Wool
  2 siblings, 1 reply; 16+ messages in thread
From: Sergey Senozhatsky @ 2015-09-17  1:30 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: ddstreet, akpm, minchan, sergey.senozhatsky, linux-kernel,
	linux-mm

On (09/16/15 13:48), Vitaly Wool wrote:
> as a follow-up to my previous patchset, I decided to first prepare
> zbud/zpool related patches and then have some testing rounds and
> performance measurements for zram running over both, and come up
> with improved/verified zram/zpool patches then.

Hi,

just a side note,
I'm afraid this is not how it works. numbers go first, to justify
the patch set.

	-ss

>
> So for now, here comes the zbud/zpool part.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/2] prepare zbud to be used by zram as underlying allocator
  2015-09-17  1:30 ` [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Sergey Senozhatsky
@ 2015-09-17 10:26   ` Vitaly Wool
  2015-09-21  4:18     ` Minchan Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Vitaly Wool @ 2015-09-17 10:26 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Dan Streetman, Andrew Morton, Minchan Kim, Sergey Senozhatsky,
	LKML, linux-mm

On Thu, Sep 17, 2015 at 1:30 AM, Sergey Senozhatsky
<sergey.senozhatsky.work@gmail.com> wrote:

>
> just a side note,
> I'm afraid this is not how it works. numbers go first, to justify
> the patch set.
>

These patches are extension/alignment patches, why would anyone need
to justify that?

But just to help you understand where I am coming from, here are some numbers:
                               zsmalloc   zbud
kswapd_low_wmark_hit_quickly   4513       5696
kswapd_high_wmark_hit_quickly  861        902
allocstall                     2236       1122
pgmigrate_success              78229      31244
compact_stall                  1172       634
compact_fail                   194        95
compact_success                464        210

These are results from an Android device having run 3 'monkey' tests
each 20 minutes, with user switch to guest and back in between.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/2] prepare zbud to be used by zram as underlying allocator
  2015-09-17 10:26   ` Vitaly Wool
@ 2015-09-21  4:18     ` Minchan Kim
  2015-09-21 21:11       ` Vitaly Wool
  0 siblings, 1 reply; 16+ messages in thread
From: Minchan Kim @ 2015-09-21  4:18 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Sergey Senozhatsky, Dan Streetman, Andrew Morton,
	Sergey Senozhatsky, LKML, linux-mm

Hello Vitaly,

On Thu, Sep 17, 2015 at 12:26:12PM +0200, Vitaly Wool wrote:
> On Thu, Sep 17, 2015 at 1:30 AM, Sergey Senozhatsky
> <sergey.senozhatsky.work@gmail.com> wrote:
> 
> >
> > just a side note,
> > I'm afraid this is not how it works. numbers go first, to justify
> > the patch set.

I totally agree Sergey's opinion.

> >
> 
> These patches are extension/alignment patches, why would anyone need
> to justify that?

Sorry, because you wrote up "zram" in the title.
As I said earlier, we need several numbers to investigate.

First of all, what is culprit of your latency?
It seems you are thinking about compaction. so compaction what?
Frequent scanning? lock collision? or frequent sleeping in compaction
code somewhere? And then why does zbud solve it? If we use zbud for zram,
we lose memory efficiency so there is something to justify it.

The reason I am asking is I have investigated similar problems
in android and other plaforms and the reason of latency was not zsmalloc
but agressive high-order allocations from subsystems, watermark check
race, deferring of compaction, LMK not working and too much swapout so
it causes to reclaim lots of page cache pages which was main culprit
in my cases. When I checks with perf, compaction stall count is increased,
the time spent in there is not huge so it was not main factor of latency.

Your problem might be differnt with me so convincing us, you should
give us real data and investigation story.

Thanks.

> 
> But just to help you understand where I am coming from, here are some numbers:
>                                zsmalloc   zbud
> kswapd_low_wmark_hit_quickly   4513       5696
> kswapd_high_wmark_hit_quickly  861        902
> allocstall                     2236       1122
> pgmigrate_success              78229      31244
> compact_stall                  1172       634
> compact_fail                   194        95
> compact_success                464        210
> 
> These are results from an Android device having run 3 'monkey' tests
> each 20 minutes, with user switch to guest and back in between.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/2] prepare zbud to be used by zram as underlying allocator
  2015-09-21  4:18     ` Minchan Kim
@ 2015-09-21 21:11       ` Vitaly Wool
  2015-09-22 15:36         ` Minchan Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Vitaly Wool @ 2015-09-21 21:11 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Sergey Senozhatsky, Dan Streetman, Andrew Morton,
	Sergey Senozhatsky, LKML, linux-mm

Hello Minchan,

> Sorry, because you wrote up "zram" in the title.
> As I said earlier, we need several numbers to investigate.
>
> First of all, what is culprit of your latency?
> It seems you are thinking about compaction. so compaction what?
> Frequent scanning? lock collision? or frequent sleeping in compaction
> code somewhere? And then why does zbud solve it? If we use zbud for zram,
> we lose memory efficiency so there is something to justify it.

The data I've got so far strongly suggests that in some use cases (see
below) with zsmalloc
* there are more allocstalls
* memory compaction is triggered more frequently
* allocstalls happen more often
* page migrations are way more frequent, too.

Please also keep in mind that I do not advise you or anyone to use
zbud instead of zsmalloc. The point I'm trying to make is that zbud
fits my particular case better and I want to be able to choose it in
the kernel without hacking it with my private patches.
FWIW, given that I am not an author of either, I don't see why anyone
would consider me biased. :-)

As of the memory efficiency, you seem to be quite comfortable with
storing uncompressed pages when they compress to more than 3/4 of a
page. I observed ~13% reported ratio increase (3.8x to 4.3x) when I
increased max_zpage_size to PAGE_SIZE / 32 * 31. Doesn't look like a
fight for every byte to me.

> The reason I am asking is I have investigated similar problems
> in android and other plaforms and the reason of latency was not zsmalloc
> but agressive high-order allocations from subsystems, watermark check
> race, deferring of compaction, LMK not working and too much swapout so
> it causes to reclaim lots of page cache pages which was main culprit
> in my cases. When I checks with perf, compaction stall count is increased,
> the time spent in there is not huge so it was not main factor of latency.

The main use case where the difference is seen is switching between
users on an Android device. It does cause a lot of reclaim, too, as
you say, but this is in the nature of zbud that reclaim happens in a
more deterministic way and worst-case looks substantially nicer. That
said, the standard deviation calculated over 20 iterations of a
change-user-multiple-times-test is 2x less for zbud than the one of
zsmalloc.

I'll post some numbers in the next patch respin so they won't get lost :)

~vitaly

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/2] prepare zbud to be used by zram as underlying allocator
  2015-09-21 21:11       ` Vitaly Wool
@ 2015-09-22 15:36         ` Minchan Kim
  2015-09-22 16:49           ` Austin S Hemmelgarn
  0 siblings, 1 reply; 16+ messages in thread
From: Minchan Kim @ 2015-09-22 15:36 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Sergey Senozhatsky, Dan Streetman, Andrew Morton,
	Sergey Senozhatsky, LKML, linux-mm

Hi Vitaly,

On Mon, Sep 21, 2015 at 11:11:00PM +0200, Vitaly Wool wrote:
> Hello Minchan,
> 
> > Sorry, because you wrote up "zram" in the title.
> > As I said earlier, we need several numbers to investigate.
> >
> > First of all, what is culprit of your latency?
> > It seems you are thinking about compaction. so compaction what?
> > Frequent scanning? lock collision? or frequent sleeping in compaction
> > code somewhere? And then why does zbud solve it? If we use zbud for zram,
> > we lose memory efficiency so there is something to justify it.
> 
> The data I've got so far strongly suggests that in some use cases (see
> below) with zsmalloc
> * there are more allocstalls
> * memory compaction is triggered more frequently
> * allocstalls happen more often
> * page migrations are way more frequent, too.
> 
> Please also keep in mind that I do not advise you or anyone to use
> zbud instead of zsmalloc. The point I'm trying to make is that zbud
> fits my particular case better and I want to be able to choose it in
> the kernel without hacking it with my private patches.

I understand your goal well. ;-) But, please understand my goal which
is to find fundamental reason why zbud removes latency.

You gave some compaction-related stats but it is just one of result,
not the cause. I guess you could find another stats as well as compaction
stats which affect your workload. Once you find them all, please
investigate what is major factor for your latency among them.
Then, we should think over what is best solution for it and if zbud is
best to remove the cause, yes, why not. I can merge it into zram.

IOW, I should maintain zram so I need to know when,where,how to use zbud
with zram is useful so that I can guide it to zram users and you should
*justify* the overhead to me. Overhead means I should maintain two allocators
for zram from now on. It means when I want to add some feature for zsmalloc,
I should take care of zbud and I should watch zbud patches, too which could
be very painful and starting point of diverge for zram.

Compared to zsmalloc, zsmalloc packs lots of compressed objects into
a page while zbud just stores two objects so if there are different
life time objects in a page, zsmalloc may make higher fragmentation
but zbud is not a good choice for memory efficiency either so my concern
starts from here.

For solving such problem, we added compaction into recent zram to
reduce waste memory space so it should solve internal fragment problem.
Other problem we don't solve now is external fragmentation which
is related to compaction stats you are seeing now.
Although you are seeing mitigation with zbud, it would be still problem
if you begin to use more memory for zbud. One of example, a few years
ago, some guys tried to support zbud page migration.

If external fragmentation is really problem in here, we should proivde
a feature VM can migrate zsmalloc page and it was alomost done as I told
you previous thread and I think it is really way to go.

Even, we are trying to add zlib which is better compress ratio algorithm
to reduce working memory size so without the feature, the problem would be
more severe.

So, I am thinking now we should enhance it rather than hiding a problem
by merging zbud.

> FWIW, given that I am not an author of either, I don't see why anyone
> would consider me biased. :-)
> 
> As of the memory efficiency, you seem to be quite comfortable with
> storing uncompressed pages when they compress to more than 3/4 of a
> page. I observed ~13% reported ratio increase (3.8x to 4.3x) when I
> increased max_zpage_size to PAGE_SIZE / 32 * 31. Doesn't look like a
> fight for every byte to me.

Thanks for the report. It could be another patch. :)

> 
> > The reason I am asking is I have investigated similar problems
> > in android and other plaforms and the reason of latency was not zsmalloc
> > but agressive high-order allocations from subsystems, watermark check
> > race, deferring of compaction, LMK not working and too much swapout so
> > it causes to reclaim lots of page cache pages which was main culprit
> > in my cases. When I checks with perf, compaction stall count is increased,
> > the time spent in there is not huge so it was not main factor of latency.
> 
> The main use case where the difference is seen is switching between
> users on an Android device. It does cause a lot of reclaim, too, as
> you say, but this is in the nature of zbud that reclaim happens in a
> more deterministic way and worst-case looks substantially nicer. That

Interesting!
Why is reclaim more deterministic with zbud?
That's really one of part what I want with data.

> said, the standard deviation calculated over 20 iterations of a
> change-user-multiple-times-test is 2x less for zbud than the one of
> zsmalloc.

One thing I can guess is a page could be freed easily if just two objects
in a page are freed by munmap or kill. IOW, we could remove pinned page
easily so we could get higher-order page easily.

However, it would be different once zbud's memory usgae is higher
as I mentioned. As well, we lose memory efficieny significantly for zram. :(

IMO, more fundamentatal solution is to support VM-aware compaction of
zsmalloc/zbud rather than hiding a problem with zbud.

Thanks.

> 
> I'll post some numbers in the next patch respin so they won't get lost :)
> 
> ~vitaly

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/2] prepare zbud to be used by zram as underlying allocator
  2015-09-22 15:36         ` Minchan Kim
@ 2015-09-22 16:49           ` Austin S Hemmelgarn
  0 siblings, 0 replies; 16+ messages in thread
From: Austin S Hemmelgarn @ 2015-09-22 16:49 UTC (permalink / raw)
  To: Minchan Kim, Vitaly Wool
  Cc: Sergey Senozhatsky, Dan Streetman, Andrew Morton,
	Sergey Senozhatsky, LKML, linux-mm

[-- Attachment #1: Type: text/plain, Size: 6558 bytes --]

On 2015-09-22 11:36, Minchan Kim wrote:
> Hi Vitaly,
>
> On Mon, Sep 21, 2015 at 11:11:00PM +0200, Vitaly Wool wrote:
>> Hello Minchan,
>>
>>> Sorry, because you wrote up "zram" in the title.
>>> As I said earlier, we need several numbers to investigate.
>>>
>>> First of all, what is culprit of your latency?
>>> It seems you are thinking about compaction. so compaction what?
>>> Frequent scanning? lock collision? or frequent sleeping in compaction
>>> code somewhere? And then why does zbud solve it? If we use zbud for zram,
>>> we lose memory efficiency so there is something to justify it.
>>
>> The data I've got so far strongly suggests that in some use cases (see
>> below) with zsmalloc
>> * there are more allocstalls
>> * memory compaction is triggered more frequently
>> * allocstalls happen more often
>> * page migrations are way more frequent, too.
>>
>> Please also keep in mind that I do not advise you or anyone to use
>> zbud instead of zsmalloc. The point I'm trying to make is that zbud
>> fits my particular case better and I want to be able to choose it in
>> the kernel without hacking it with my private patches.
>
> I understand your goal well. ;-) But, please understand my goal which
> is to find fundamental reason why zbud removes latency.
>
> You gave some compaction-related stats but it is just one of result,
> not the cause. I guess you could find another stats as well as compaction
> stats which affect your workload. Once you find them all, please
> investigate what is major factor for your latency among them.
> Then, we should think over what is best solution for it and if zbud is
> best to remove the cause, yes, why not. I can merge it into zram.
>
> IOW, I should maintain zram so I need to know when,where,how to use zbud
> with zram is useful so that I can guide it to zram users and you should
> *justify* the overhead to me. Overhead means I should maintain two allocators
> for zram from now on. It means when I want to add some feature for zsmalloc,
> I should take care of zbud and I should watch zbud patches, too which could
> be very painful and starting point of diverge for zram.
>
> Compared to zsmalloc, zsmalloc packs lots of compressed objects into
> a page while zbud just stores two objects so if there are different
> life time objects in a page, zsmalloc may make higher fragmentation
> but zbud is not a good choice for memory efficiency either so my concern
> starts from here.
>
> For solving such problem, we added compaction into recent zram to
> reduce waste memory space so it should solve internal fragment problem.
> Other problem we don't solve now is external fragmentation which
> is related to compaction stats you are seeing now.
> Although you are seeing mitigation with zbud, it would be still problem
> if you begin to use more memory for zbud. One of example, a few years
> ago, some guys tried to support zbud page migration.
>
> If external fragmentation is really problem in here, we should proivde
> a feature VM can migrate zsmalloc page and it was alomost done as I told
> you previous thread and I think it is really way to go.
>
> Even, we are trying to add zlib which is better compress ratio algorithm
> to reduce working memory size so without the feature, the problem would be
> more severe.
>
> So, I am thinking now we should enhance it rather than hiding a problem
> by merging zbud.
You know,from my perspective, most of the above argument 'against' zbud 
being supported really provides no actual reason to not support zbud 
without reading between the lines (which as far as I can tell, is 'I 
just don't want to support it'), it just points out all the differences 
between zsmalloc and zbud.  They are different API's, they have 
different semantics, they are intended for different workloads and use 
cases, and this will always be the case.  To be entirely honest, the 
more complicated you make zsmalloc in an attempt to obviate the need to 
support zbud, the more attractive zbud looks as far as I'm concerned.
>
>> FWIW, given that I am not an author of either, I don't see why anyone
>> would consider me biased. :-)
>>
>> As of the memory efficiency, you seem to be quite comfortable with
>> storing uncompressed pages when they compress to more than 3/4 of a
>> page. I observed ~13% reported ratio increase (3.8x to 4.3x) when I
>> increased max_zpage_size to PAGE_SIZE / 32 * 31. Doesn't look like a
>> fight for every byte to me.
>
> Thanks for the report. It could be another patch. :)
>
>>
>>> The reason I am asking is I have investigated similar problems
>>> in android and other plaforms and the reason of latency was not zsmalloc
>>> but agressive high-order allocations from subsystems, watermark check
>>> race, deferring of compaction, LMK not working and too much swapout so
>>> it causes to reclaim lots of page cache pages which was main culprit
>>> in my cases. When I checks with perf, compaction stall count is increased,
>>> the time spent in there is not huge so it was not main factor of latency.
>>
>> The main use case where the difference is seen is switching between
>> users on an Android device. It does cause a lot of reclaim, too, as
>> you say, but this is in the nature of zbud that reclaim happens in a
>> more deterministic way and worst-case looks substantially nicer. That
>
> Interesting!
> Why is reclaim more deterministic with zbud?
> That's really one of part what I want with data.
I'm pretty sure this is due to the fact that zbud stores at most 2 
compressed pages in a given page, combined with fewer conditionals in 
the reclaim path.
>
>
>> said, the standard deviation calculated over 20 iterations of a
>> change-user-multiple-times-test is 2x less for zbud than the one of
>> zsmalloc.
>
> One thing I can guess is a page could be freed easily if just two objects
> in a page are freed by munmap or kill. IOW, we could remove pinned page
> easily so we could get higher-order page easily.
>
> However, it would be different once zbud's memory usgae is higher
> as I mentioned. As well, we lose memory efficieny significantly for zram. :(
Not everyone who uses zram cares hugely about memory efficiency, I for 
one would rather have a more deterministic compression ratio (which zbud 
achieves) than slightly better memory efficiencyg.
>
> IMO, more fundamentatal solution is to support VM-aware compaction of
> zsmalloc/zbud rather than hiding a problem with zbud.



[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 3019 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-09-22 17:09 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-16 11:48 [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Vitaly Wool
2015-09-16 11:50 ` [PATCH 1/2] zbud: allow PAGE_SIZE allocations Vitaly Wool
2015-09-17 13:00   ` Vlastimil Babka
2015-09-18  8:03     ` Vitaly Wool
2015-09-21 15:27       ` Dan Streetman
2015-09-21 16:17   ` Dan Streetman
2015-09-22 10:18     ` Vitaly Wool
2015-09-22 17:09       ` Dan Streetman
2015-09-16 11:53 ` [PATCH 2/2] zpool/zsmalloc/zbud: align on interfaces Vitaly Wool
2015-09-21 17:15   ` Dan Streetman
2015-09-17  1:30 ` [PATCH 0/2] prepare zbud to be used by zram as underlying allocator Sergey Senozhatsky
2015-09-17 10:26   ` Vitaly Wool
2015-09-21  4:18     ` Minchan Kim
2015-09-21 21:11       ` Vitaly Wool
2015-09-22 15:36         ` Minchan Kim
2015-09-22 16:49           ` Austin S Hemmelgarn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).