* [PATCH 1/2] btrfs: use page alloc/free wrapeprs for compression pages
2023-11-15 16:59 [PATCH 0/2] Pool for compression pages David Sterba
@ 2023-11-15 16:59 ` David Sterba
2023-11-16 9:15 ` Johannes Thumshirn
2023-11-15 16:59 ` [PATCH 2/2] btrfs: use shrinker for compression page pool David Sterba
2023-11-20 17:05 ` [PATCH 0/2] Pool for compression pages Josef Bacik
2 siblings, 1 reply; 6+ messages in thread
From: David Sterba @ 2023-11-15 16:59 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
This is a preparation for managing compression pages in a cache-like
manner, instead of asking the allocator each time. The common allocation
and free wrappers are introduced and are functionally equivalent to the
current code.
The freeing helpers need to be carefully placed where the last reference
is dropped. This is either after directly allocating (error handling)
or when there are no other users of the pages (after copying the contents).
It's safe to not use the helper and use put_page() that will handle the
reference count. Not using the helper means there's lower number of
pages that could be reused without passing them back to allocator.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/compression.c | 16 +++++++++++++++-
fs/btrfs/compression.h | 5 +++++
fs/btrfs/inode.c | 4 ++--
fs/btrfs/lzo.c | 4 ++--
fs/btrfs/zlib.c | 6 +++---
fs/btrfs/zstd.c | 7 +++----
6 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 19b22b4653c8..1cd15d6a9c49 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -163,12 +163,26 @@ static int compression_decompress(int type, struct list_head *ws,
static void btrfs_free_compressed_pages(struct compressed_bio *cb)
{
for (unsigned int i = 0; i < cb->nr_pages; i++)
- put_page(cb->compressed_pages[i]);
+ btrfs_free_compr_page(cb->compressed_pages[i]);
kfree(cb->compressed_pages);
}
static int btrfs_decompress_bio(struct compressed_bio *cb);
+/*
+ * Common wrappers for page allocation from compression wrappers
+ */
+struct page *btrfs_alloc_compr_page(void)
+{
+ return alloc_page(GFP_NOFS);
+}
+
+void btrfs_free_compr_page(struct page *page)
+{
+ ASSERT(page_ref_count(page) == 1);
+ put_page(page);
+}
+
static void end_compressed_bio_read(struct btrfs_bio *bbio)
{
struct compressed_bio *cb = to_compressed_bio(bbio);
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 03bb9d143fa7..93cc92974dee 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -32,6 +32,8 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
+struct page;
+
struct compressed_bio {
/* Number of compressed pages in the array */
unsigned int nr_pages;
@@ -96,6 +98,9 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
+struct page *btrfs_alloc_compr_page(void);
+void btrfs_free_compr_page(struct page *page);
+
enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9f5a9894f88f..82cb811a597f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1037,7 +1037,7 @@ static void compress_file_range(struct btrfs_work *work)
if (pages) {
for (i = 0; i < nr_pages; i++) {
WARN_ON(pages[i]->mapping);
- put_page(pages[i]);
+ btrfs_free_compr_page(pages[i]);
}
kfree(pages);
}
@@ -1052,7 +1052,7 @@ static void free_async_extent_pages(struct async_extent *async_extent)
for (i = 0; i < async_extent->nr_pages; i++) {
WARN_ON(async_extent->pages[i]->mapping);
- put_page(async_extent->pages[i]);
+ btrfs_free_compr_page(async_extent->pages[i]);
}
kfree(async_extent->pages);
async_extent->nr_pages = 0;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index d3fcfc628a4f..1131d5a29d61 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -152,7 +152,7 @@ static int copy_compressed_data_to_page(char *compressed_data,
cur_page = out_pages[*cur_out / PAGE_SIZE];
/* Allocate a new page */
if (!cur_page) {
- cur_page = alloc_page(GFP_NOFS);
+ cur_page = btrfs_alloc_compr_page();
if (!cur_page)
return -ENOMEM;
out_pages[*cur_out / PAGE_SIZE] = cur_page;
@@ -178,7 +178,7 @@ static int copy_compressed_data_to_page(char *compressed_data,
cur_page = out_pages[*cur_out / PAGE_SIZE];
/* Allocate a new page */
if (!cur_page) {
- cur_page = alloc_page(GFP_NOFS);
+ cur_page = btrfs_alloc_compr_page();
if (!cur_page)
return -ENOMEM;
out_pages[*cur_out / PAGE_SIZE] = cur_page;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 6c231a116a29..36cf1f0e338e 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -121,7 +121,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
- out_page = alloc_page(GFP_NOFS);
+ out_page = btrfs_alloc_compr_page();
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
@@ -200,7 +200,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS);
+ out_page = btrfs_alloc_compr_page();
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
@@ -236,7 +236,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS);
+ out_page = btrfs_alloc_compr_page();
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 5511766485cd..0d66db8bc1d4 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -410,9 +410,8 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
-
/* Allocate and map in the output buffer */
- out_page = alloc_page(GFP_NOFS);
+ out_page = btrfs_alloc_compr_page();
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
@@ -457,7 +456,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS);
+ out_page = btrfs_alloc_compr_page();
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
@@ -514,7 +513,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS);
+ out_page = btrfs_alloc_compr_page();
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
--
2.42.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/2] btrfs: use shrinker for compression page pool
2023-11-15 16:59 [PATCH 0/2] Pool for compression pages David Sterba
2023-11-15 16:59 ` [PATCH 1/2] btrfs: use page alloc/free wrapeprs " David Sterba
@ 2023-11-15 16:59 ` David Sterba
2023-11-20 17:05 ` [PATCH 0/2] Pool for compression pages Josef Bacik
2 siblings, 0 replies; 6+ messages in thread
From: David Sterba @ 2023-11-15 16:59 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The pages are now allocated and freed centrally, so we can extend the
logic to manage the lifetime. The main idea is to keep a few recently
used pages and hand them to all writers. Ideally we won't have to go to
allocator at all (a slight performance gain) and also raise chance that
we'll have the pages available (slightly increased reliability).
In order to avoid gathering too many pages, the shrinker is attached to
the cache so we can free them on when MM demands that. The first
implementation will drain the whole cache. Further this can be refined
to keep some minimal number of pages for emergency purposes. The
ultimate goal to avoid memory allocation failures on the write out path
from the compression.
The pool threshold is set to cover full BTRFS_MAX_COMPRESSED / PAGE_SIZE
for minimal thread pool, which is 8 (btrfs_init_fs_info()). This is 128K
/ 4K * 8 = 256 pages at maximum, which is 1MiB.
This is for all filesystems currently mounted, with heavy use of
compression IO the allocator is still needed. The cache helps for short
burst IO.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/compression.c | 102 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 102 insertions(+)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1cd15d6a9c49..05595d113ff8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -20,6 +20,7 @@
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/log2.h>
+#include <linux/shrinker.h>
#include <crypto/hash.h>
#include "misc.h"
#include "ctree.h"
@@ -169,16 +170,96 @@ static void btrfs_free_compressed_pages(struct compressed_bio *cb)
static int btrfs_decompress_bio(struct compressed_bio *cb);
+/*
+ * Global cache of last unused pages for compression/decompression.
+ */
+static struct btrfs_compr_pool {
+ struct shrinker *shrinker;
+ spinlock_t lock;
+ struct list_head list;
+ int count;
+ int thresh;
+} compr_pool;
+
+static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc)
+{
+ int ret;
+
+ /*
+ * We must not read the values more than once if 'ret' gets expanded in
+ * the return statement so we don't accidentally return a negative
+ * number, even if the first condition finds it positive.
+ */
+ ret = READ_ONCE(compr_pool.count) - READ_ONCE(compr_pool.thresh);
+
+ return ret > 0 ? ret : 0;
+}
+
+static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc)
+{
+ struct list_head remove;
+ struct list_head *tmp, *next;
+ int freed;
+
+ if (compr_pool.count == 0)
+ return SHRINK_STOP;
+
+ INIT_LIST_HEAD(&remove);
+
+ /* For now, just simply drain the whole list. */
+ spin_lock(&compr_pool.lock);
+ list_splice_init(&compr_pool.list, &remove);
+ freed = compr_pool.count;
+ compr_pool.count = 0;
+ spin_unlock(&compr_pool.lock);
+
+ list_for_each_safe(tmp, next, &remove) {
+ struct page *page = list_entry(tmp, struct page, lru);
+
+ ASSERT(page_ref_count(page) == 1);
+ put_page(page);
+ }
+
+ return freed;
+}
+
/*
* Common wrappers for page allocation from compression wrappers
*/
struct page *btrfs_alloc_compr_page(void)
{
+ struct page *page = NULL;
+
+ spin_lock(&compr_pool.lock);
+ if (compr_pool.count > 0) {
+ page = list_first_entry(&compr_pool.list, struct page, lru);
+ list_del_init(&page->lru);
+ compr_pool.count--;
+ }
+ spin_unlock(&compr_pool.lock);
+
+ if (page)
+ return page;
+
return alloc_page(GFP_NOFS);
}
void btrfs_free_compr_page(struct page *page)
{
+ bool do_free = false;
+
+ spin_lock(&compr_pool.lock);
+ if (compr_pool.count > compr_pool.thresh) {
+ do_free = true;
+ } else {
+ list_add(&page->lru, &compr_pool.list);
+ compr_pool.count++;
+ }
+ spin_unlock(&compr_pool.lock);
+
+ if (!do_free)
+ return;
+
ASSERT(page_ref_count(page) == 1);
put_page(page);
}
@@ -974,15 +1055,36 @@ int __init btrfs_init_compress(void)
offsetof(struct compressed_bio, bbio.bio),
BIOSET_NEED_BVECS))
return -ENOMEM;
+
+ compr_pool.shrinker = shrinker_alloc(SHRINKER_NONSLAB, "btrfs-compr-pages");
+ if (!compr_pool.shrinker)
+ return -ENOMEM;
+
btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
zstd_init_workspace_manager();
+
+ spin_lock_init(&compr_pool.lock);
+ INIT_LIST_HEAD(&compr_pool.list);
+ compr_pool.count = 0;
+ /* 128K / 4K = 32, for 8 threads is 256 pages. */
+ compr_pool.thresh = BTRFS_MAX_COMPRESSED / PAGE_SIZE * 8;
+ compr_pool.shrinker->count_objects = btrfs_compr_pool_count;
+ compr_pool.shrinker->scan_objects = btrfs_compr_pool_scan;
+ compr_pool.shrinker->batch = 32;
+ compr_pool.shrinker->seeks = DEFAULT_SEEKS;
+ shrinker_register(compr_pool.shrinker);
+
return 0;
}
void __cold btrfs_exit_compress(void)
{
+ /* For now scan drains all pages and does not touch the parameters. */
+ btrfs_compr_pool_scan(NULL, NULL);
+ shrinker_free(compr_pool.shrinker);
+
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
--
2.42.1
^ permalink raw reply related [flat|nested] 6+ messages in thread