* [PATCH v4 1/3] Btrfs: heuristic add simple sampling logic
2017-08-20 18:11 [PATCH v4 0/3] Btrfs: populate heuristic with detection logic Timofey Titovets
@ 2017-08-20 18:11 ` Timofey Titovets
2017-08-20 18:11 ` [PATCH v4 2/3] Btrfs: heuristic add byte set calculation Timofey Titovets
2017-08-20 18:11 ` [PATCH v4 3/3] Btrfs: heuristic add byte core " Timofey Titovets
2 siblings, 0 replies; 4+ messages in thread
From: Timofey Titovets @ 2017-08-20 18:11 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Get small sample from input data and calculate
byte type count for that sample into bucket.
Bucket will store info about which bytes
and how many has been detected in sample
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/compression.c | 24 ++++++++++++++++++++++--
fs/btrfs/compression.h | 8 ++++++++
2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 883ecc58fd0d..c078c8d8c034 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1068,15 +1068,35 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
u64 index = start >> PAGE_SHIFT;
u64 end_index = end >> PAGE_SHIFT;
struct page *page;
- int ret = 1;
+ struct heuristic_bucket_item *bucket;
+ int a, b, ret;
+ u8 symbol, *input_data;
+
+ ret = 1;
+
+ bucket = kcalloc(BTRFS_HEURISTIC_BUCKET_SIZE,
+ sizeof(struct heuristic_bucket_item), GFP_NOFS);
+
+ if (!bucket)
+ goto out;
while (index <= end_index) {
page = find_get_page(inode->i_mapping, index);
- kmap(page);
+ input_data = kmap(page);
+ a = 0;
+ while (a < PAGE_SIZE) {
+ for (b = 0; b < BTRFS_HEURISTIC_READ_SIZE; b++) {
+ symbol = input_data[a+b];
+ bucket[symbol].count++;
+ }
+ a += BTRFS_HEURISTIC_ITER_OFFSET;
+ }
kunmap(page);
put_page(page);
index++;
}
+out:
+ kfree(bucket);
return ret;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 3b1b0ac15fdc..e0421705b80b 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -128,6 +128,14 @@ struct btrfs_compress_op {
extern const struct btrfs_compress_op btrfs_zlib_compress;
extern const struct btrfs_compress_op btrfs_lzo_compress;
+struct heuristic_bucket_item {
+ u32 count;
+};
+
+#define BTRFS_HEURISTIC_READ_SIZE 16
+#define BTRFS_HEURISTIC_ITER_OFFSET 256
+#define BTRFS_HEURISTIC_BUCKET_SIZE 256
+
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
#endif
--
2.14.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v4 2/3] Btrfs: heuristic add byte set calculation
2017-08-20 18:11 [PATCH v4 0/3] Btrfs: populate heuristic with detection logic Timofey Titovets
2017-08-20 18:11 ` [PATCH v4 1/3] Btrfs: heuristic add simple sampling logic Timofey Titovets
@ 2017-08-20 18:11 ` Timofey Titovets
2017-08-20 18:11 ` [PATCH v4 3/3] Btrfs: heuristic add byte core " Timofey Titovets
2 siblings, 0 replies; 4+ messages in thread
From: Timofey Titovets @ 2017-08-20 18:11 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Calculate byte set size for data sample:
Calculate how many unique bytes has been in sample
By count all bytes in bucket with count > 0
If byte set low (~25%), data are easily compressible
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/compression.c | 27 +++++++++++++++++++++++++++
fs/btrfs/compression.h | 1 +
2 files changed, 28 insertions(+)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c078c8d8c034..fe26a44bcc9b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1048,6 +1048,27 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
return 1;
}
+static inline int byte_set_size(const struct heuristic_bucket_item *bucket)
+{
+ int a = 0;
+ int byte_set_size = 0;
+
+ for (; a < BTRFS_HEURISTIC_BYTE_SET_THRESHOLD; a++) {
+ if (bucket[a].count > 0)
+ byte_set_size++;
+ }
+
+ for (; a < BTRFS_HEURISTIC_BUCKET_SIZE; a++) {
+ if (bucket[a].count > 0) {
+ byte_set_size++;
+ if (byte_set_size > BTRFS_HEURISTIC_BYTE_SET_THRESHOLD)
+ return byte_set_size;
+ }
+ }
+
+ return byte_set_size;
+}
+
/*
* Compression heuristic.
*
@@ -1096,6 +1117,12 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
index++;
}
+ a = byte_set_size(bucket);
+ if (a > BTRFS_HEURISTIC_BYTE_SET_THRESHOLD) {
+ ret = 1;
+ goto out;
+ }
+
out:
kfree(bucket);
return ret;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index e0421705b80b..07e3d0652e62 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -135,6 +135,7 @@ struct heuristic_bucket_item {
#define BTRFS_HEURISTIC_READ_SIZE 16
#define BTRFS_HEURISTIC_ITER_OFFSET 256
#define BTRFS_HEURISTIC_BUCKET_SIZE 256
+#define BTRFS_HEURISTIC_BYTE_SET_THRESHOLD 64
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
--
2.14.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v4 3/3] Btrfs: heuristic add byte core set calculation
2017-08-20 18:11 [PATCH v4 0/3] Btrfs: populate heuristic with detection logic Timofey Titovets
2017-08-20 18:11 ` [PATCH v4 1/3] Btrfs: heuristic add simple sampling logic Timofey Titovets
2017-08-20 18:11 ` [PATCH v4 2/3] Btrfs: heuristic add byte set calculation Timofey Titovets
@ 2017-08-20 18:11 ` Timofey Titovets
2 siblings, 0 replies; 4+ messages in thread
From: Timofey Titovets @ 2017-08-20 18:11 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Calculate byte core set for data sample:
Sort bucket's numbers in decreasing order
Count how many numbers use 90% of sample
If core set are low (<=25%), data are easily compressible
If core set high (>=80%), data are not compressible
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/compression.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/compression.h | 2 ++
2 files changed, 60 insertions(+)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index fe26a44bcc9b..8dc92f42f7c2 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -33,6 +33,7 @@
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
+#include <linux/sort.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -1069,6 +1070,42 @@ static inline int byte_set_size(const struct heuristic_bucket_item *bucket)
return byte_set_size;
}
+/* For bucket sorting */
+static inline int heuristic_bucket_compare(const void *lv, const void *rv)
+{
+ struct heuristic_bucket_item *l = (struct heuristic_bucket_item *)(lv);
+ struct heuristic_bucket_item *r = (struct heuristic_bucket_item *)(rv);
+
+ return r->count - l->count;
+}
+
+/*
+ * Byte Core set size
+ * How many bytes use 90% of sample
+ */
+static inline int byte_core_set_size(struct heuristic_bucket_item *bucket,
+ u32 core_set_threshold)
+{
+ int a = 0;
+ u32 coreset_sum = 0;
+
+ for (; a < BTRFS_HEURISTIC_BYTE_CORE_SET_LOW; a++)
+ coreset_sum += bucket[a].count;
+
+ if (coreset_sum > core_set_threshold)
+ return a;
+
+ for (; a < BTRFS_HEURISTIC_BYTE_CORE_SET_HIGH; a++) {
+ if (bucket[a].count == 0)
+ break;
+ coreset_sum += bucket[a].count;
+ if (coreset_sum > core_set_threshold)
+ break;
+ }
+
+ return a;
+}
+
/*
* Compression heuristic.
*
@@ -1092,6 +1129,8 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
struct heuristic_bucket_item *bucket;
int a, b, ret;
u8 symbol, *input_data;
+ u32 core_set_threshold;
+ u32 input_size = end - start;
ret = 1;
@@ -1123,6 +1162,25 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
goto out;
}
+ /* Sort in reverse order */
+ sort(bucket, BTRFS_HEURISTIC_BUCKET_SIZE,
+ sizeof(struct heuristic_bucket_item), &heuristic_bucket_compare,
+ NULL);
+
+ core_set_threshold = (input_size*90)/(BTRFS_HEURISTIC_ITER_OFFSET*100);
+ core_set_threshold *= BTRFS_HEURISTIC_READ_SIZE;
+
+ a = byte_core_set_size(bucket, core_set_threshold);
+ if (a <= BTRFS_HEURISTIC_BYTE_CORE_SET_LOW) {
+ ret = 2;
+ goto out;
+ }
+
+ if (a >= BTRFS_HEURISTIC_BYTE_CORE_SET_HIGH) {
+ ret = 0;
+ goto out;
+ }
+
out:
kfree(bucket);
return ret;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 07e3d0652e62..b369a95c1241 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -136,6 +136,8 @@ struct heuristic_bucket_item {
#define BTRFS_HEURISTIC_ITER_OFFSET 256
#define BTRFS_HEURISTIC_BUCKET_SIZE 256
#define BTRFS_HEURISTIC_BYTE_SET_THRESHOLD 64
+#define BTRFS_HEURISTIC_BYTE_CORE_SET_LOW BTRFS_HEURISTIC_BYTE_SET_THRESHOLD
+#define BTRFS_HEURISTIC_BYTE_CORE_SET_HIGH 200 // 80%
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
--
2.14.1
^ permalink raw reply related [flat|nested] 4+ messages in thread