public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* zram: Optimize LZ4 dictionary compression performance
@ 2026-03-10  2:54 gao xu
  2026-03-10  5:39 ` Sergey Senozhatsky
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: gao xu @ 2026-03-10  2:54 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Minchan Kim, Jens Axboe, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org, Andrew Morton, surenb@google.com,
	zhouxiaolong

Calling `LZ4_loadDict()` repeatedly in Zram causes significant overhead
due to its internal dictionary pre-processing. This commit introduces a
template stream mechanism to pre-process the dictionary only once when
the dictionary is initially set or modified. It then efficiently copies
this state for subsequent compressions.

This optimization improves LZ4 dictionary compression performance by
over 50% in Zram I/O tests.
---
 drivers/block/zram/backend_lz4.c | 55 ++++++++++++++++++++++++++++++--
 drivers/block/zram/zcomp.h       |  1 +
 drivers/block/zram/zram_drv.c    |  1 +
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/drivers/block/zram/backend_lz4.c b/drivers/block/zram/backend_lz4.c
index 04e186614..b353f74a8 100644
--- a/drivers/block/zram/backend_lz4.c
+++ b/drivers/block/zram/backend_lz4.c
@@ -12,15 +12,36 @@ struct lz4_ctx {
 	LZ4_stream_t *cstrm;
 };
 
+struct lz4_drv {
+	/* template compression stream with dictionary */
+	LZ4_stream_t base_cstream;
+	/* dictionary index of the current template */
+	u32 dict_gen;
+	bool base_c_valid;
+};
+
 static void lz4_release_params(struct zcomp_params *params)
 {
+	kfree(params->drv_data);
+	params->drv_data = NULL;
 }
 
 static int lz4_setup_params(struct zcomp_params *params)
 {
+	struct lz4_drv *drv;
+
 	if (params->level == ZCOMP_PARAM_NOT_SET)
 		params->level = LZ4_ACCELERATION_DEFAULT;
 
+	drv = kzalloc_obj(drv, GFP_KERNEL);
+	if (!drv)
+		return -ENOMEM;
+
+	drv->dict_gen = 0;
+	drv->base_c_valid = false;
+
+	params->drv_data = drv;
+
 	return 0;
 }
 
@@ -67,10 +88,32 @@ static int lz4_create(struct zcomp_params *params, struct zcomp_ctx *ctx)
 	return -ENOMEM;
 }
 
+static int lz4_build_base_cstream(struct zcomp_params *params)
+{
+	struct lz4_drv *drv = params->drv_data;
+	int ret;
+
+	if (!params->dict || !params->dict_sz)
+		return -EINVAL;
+
+	memset(&drv->base_cstream, 0, sizeof(drv->base_cstream));
+
+	ret = LZ4_loadDict(&drv->base_cstream,
+			   params->dict, params->dict_sz);
+	if (ret != params->dict_sz)
+		return -EINVAL;
+
+	drv->dict_gen = params->dict_gen;
+	drv->base_c_valid = true;
+
+	return 0;
+}
+
 static int lz4_compress(struct zcomp_params *params, struct zcomp_ctx *ctx,
 			struct zcomp_req *req)
 {
 	struct lz4_ctx *zctx = ctx->context;
+	struct lz4_drv *drv  = params->drv_data;
 	int ret;
 
 	if (!zctx->cstrm) {
@@ -78,10 +121,16 @@ static int lz4_compress(struct zcomp_params *params, struct zcomp_ctx *ctx,
 					req->dst_len, params->level,
 					zctx->mem);
 	} else {
+		/* rebuild base_cstream when the dictionary changes */
+		if (!drv->base_c_valid || drv->dict_gen != params->dict_gen) {
+			ret = lz4_build_base_cstream(params);
+			if (ret)
+				return ret;
+		}
+
 		/* Cstrm needs to be reset */
-		ret = LZ4_loadDict(zctx->cstrm, params->dict, params->dict_sz);
-		if (ret != params->dict_sz)
-			return -EINVAL;
+		memcpy(zctx->cstrm, &drv->base_cstream,
+		   sizeof(*zctx->cstrm));
 		ret = LZ4_compress_fast_continue(zctx->cstrm, req->src,
 						 req->dst, req->src_len,
 						 req->dst_len, params->level);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index eacfd3f7d..5b7ff9fdd 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -21,6 +21,7 @@ struct zcomp_params {
 	void *dict;
 	size_t dict_sz;
 	s32 level;
+	u32 dict_gen;
 	union {
 		struct deflate_params deflate;
 	};
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index bca33403f..f34f3fa43 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1709,6 +1709,7 @@ static int comp_params_store(struct zram *zram, u32 prio, s32 level,
 	zram->params[prio].dict_sz = sz;
 	zram->params[prio].level = level;
 	zram->params[prio].deflate.winbits = deflate_params->winbits;
+	zram->params[prio].dict_gen++;
 	return 0;
 }
 
--

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2026-03-11  5:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-10  2:54 zram: Optimize LZ4 dictionary compression performance gao xu
2026-03-10  5:39 ` Sergey Senozhatsky
2026-03-10  6:13 ` Sergey Senozhatsky
2026-03-10  6:22   ` Sergey Senozhatsky
2026-03-10  8:32   ` gao xu
2026-03-11  1:22     ` Sergey Senozhatsky
2026-03-11  2:55       ` gao xu
2026-03-10  6:15 ` Sergey Senozhatsky
2026-03-11  4:00   ` gao xu
2026-03-11  5:22     ` Sergey Senozhatsky
2026-03-11  1:25 ` Sergey Senozhatsky
2026-03-11  2:56   ` gao xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox