From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wr0-f194.google.com ([209.85.128.194]:36285 "EHLO mail-wr0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753332AbdHTSL3 (ORCPT ); Sun, 20 Aug 2017 14:11:29 -0400 Received: by mail-wr0-f194.google.com with SMTP id f8so9040557wrf.3 for ; Sun, 20 Aug 2017 11:11:28 -0700 (PDT) From: Timofey Titovets To: linux-btrfs@vger.kernel.org Cc: Timofey Titovets Subject: [PATCH v4 3/3] Btrfs: heuristic add byte core set calculation Date: Sun, 20 Aug 2017 21:11:16 +0300 Message-Id: <20170820181116.5131-4-nefelim4ag@gmail.com> In-Reply-To: <20170820181116.5131-1-nefelim4ag@gmail.com> References: <20170820181116.5131-1-nefelim4ag@gmail.com> Sender: linux-btrfs-owner@vger.kernel.org List-ID: Calculate byte core set for data sample: Sort bucket's numbers in decreasing order Count how many numbers use 90% of sample If core set are low (<=25%), data are easily compressible If core set high (>=80%), data are not compressible Signed-off-by: Timofey Titovets --- fs/btrfs/compression.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/compression.h | 2 ++ 2 files changed, 60 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index fe26a44bcc9b..8dc92f42f7c2 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1069,6 +1070,42 @@ static inline int byte_set_size(const struct heuristic_bucket_item *bucket) return byte_set_size; } +/* For bucket sorting */ +static inline int heuristic_bucket_compare(const void *lv, const void *rv) +{ + struct heuristic_bucket_item *l = (struct heuristic_bucket_item *)(lv); + struct heuristic_bucket_item *r = (struct heuristic_bucket_item *)(rv); + + return r->count - l->count; +} + +/* + * Byte Core set size + * How many bytes use 90% of sample + */ +static inline int byte_core_set_size(struct heuristic_bucket_item *bucket, + u32 core_set_threshold) +{ + int a = 0; + u32 coreset_sum = 0; + + for (; a < BTRFS_HEURISTIC_BYTE_CORE_SET_LOW; a++) + coreset_sum += bucket[a].count; + + if (coreset_sum > core_set_threshold) + return a; + + for (; a < BTRFS_HEURISTIC_BYTE_CORE_SET_HIGH; a++) { + if (bucket[a].count == 0) + break; + coreset_sum += bucket[a].count; + if (coreset_sum > core_set_threshold) + break; + } + + return a; +} + /* * Compression heuristic. * @@ -1092,6 +1129,8 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) struct heuristic_bucket_item *bucket; int a, b, ret; u8 symbol, *input_data; + u32 core_set_threshold; + u32 input_size = end - start; ret = 1; @@ -1123,6 +1162,25 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) goto out; } + /* Sort in reverse order */ + sort(bucket, BTRFS_HEURISTIC_BUCKET_SIZE, + sizeof(struct heuristic_bucket_item), &heuristic_bucket_compare, + NULL); + + core_set_threshold = (input_size*90)/(BTRFS_HEURISTIC_ITER_OFFSET*100); + core_set_threshold *= BTRFS_HEURISTIC_READ_SIZE; + + a = byte_core_set_size(bucket, core_set_threshold); + if (a <= BTRFS_HEURISTIC_BYTE_CORE_SET_LOW) { + ret = 2; + goto out; + } + + if (a >= BTRFS_HEURISTIC_BYTE_CORE_SET_HIGH) { + ret = 0; + goto out; + } + out: kfree(bucket); return ret; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 07e3d0652e62..b369a95c1241 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -136,6 +136,8 @@ struct heuristic_bucket_item { #define BTRFS_HEURISTIC_ITER_OFFSET 256 #define BTRFS_HEURISTIC_BUCKET_SIZE 256 #define BTRFS_HEURISTIC_BYTE_SET_THRESHOLD 64 +#define BTRFS_HEURISTIC_BYTE_CORE_SET_LOW BTRFS_HEURISTIC_BYTE_SET_THRESHOLD +#define BTRFS_HEURISTIC_BYTE_CORE_SET_HIGH 200 // 80% int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); -- 2.14.1