* [PATCH v6 1/6] Btrfs: heuristic make use compression workspaces
2017-08-23 22:45 [PATCH v6 0/6] Btrfs: populate heuristic with code Timofey Titovets
@ 2017-08-23 22:45 ` Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 2/6] Btrfs: heuristic workspace add bucket and sample items Timofey Titovets
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Timofey Titovets @ 2017-08-23 22:45 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Move heuristic to external file
Implement compression workspaces support for
heuristic resources
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/heuristic.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c
index 96ae3e9334bc..d0e9f9112b7e 100644
--- a/fs/btrfs/heuristic.c
+++ b/fs/btrfs/heuristic.c
@@ -48,14 +48,20 @@ static int heuristic(struct list_head *ws, struct inode *inode,
{
struct page *page;
u64 index, index_end;
- u8 *input_data;
index = start >> PAGE_SHIFT;
index_end = end >> PAGE_SHIFT;
+ /*
+ * If end aligned to PAGE_SIZE
+ * It's useless to check +1 PAGE
+ */
+ if(end%PAGE_SIZE == 0)
+ index_end--;
+
for (; index <= index_end; index++) {
page = find_get_page(inode->i_mapping, index);
- input_data = kmap(page);
+ kmap(page);
kunmap(page);
put_page(page);
}
--
2.14.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v6 2/6] Btrfs: heuristic workspace add bucket and sample items
2017-08-23 22:45 [PATCH v6 0/6] Btrfs: populate heuristic with code Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 1/6] Btrfs: heuristic make use compression workspaces Timofey Titovets
@ 2017-08-23 22:45 ` Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 3/6] Btrfs: implement heuristic sampling logic Timofey Titovets
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Timofey Titovets @ 2017-08-23 22:45 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Heuristic workspace:
- Add bucket for storing byte type counters
- Add sample array for storing partial copy of
input data range
- Add counter for store current sample size to workspace
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/heuristic.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c
index d0e9f9112b7e..9a212674f527 100644
--- a/fs/btrfs/heuristic.c
+++ b/fs/btrfs/heuristic.c
@@ -20,13 +20,29 @@
#include <linux/bio.h>
#include "compression.h"
+#define READ_SIZE 16
+#define ITER_SHIFT 256
+#define BUCKET_SIZE 256
+#define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED*READ_SIZE/ITER_SHIFT)
+
+struct bucket_item {
+ u32 count;
+};
+
struct workspace {
+ u8 *sample;
+ /* Partial copy of input data */
+ u32 sample_size;
+ /* Bucket store counter for each byte type */
+ struct bucket_item bucket[BUCKET_SIZE];
struct list_head list;
};
static void heuristic_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
+
+ kvfree(workspace->sample);
kfree(workspace);
}
@@ -38,9 +54,16 @@ static struct list_head *heuristic_alloc_workspace(void)
if (!workspace)
return ERR_PTR(-ENOMEM);
+ workspace->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
+ if (!workspace->sample)
+ goto fail;
+
INIT_LIST_HEAD(&workspace->list);
return &workspace->list;
+fail:
+ heuristic_free_workspace(&workspace->list);
+ return ERR_PTR(-ENOMEM);
}
static int heuristic(struct list_head *ws, struct inode *inode,
--
2.14.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v6 3/6] Btrfs: implement heuristic sampling logic
2017-08-23 22:45 [PATCH v6 0/6] Btrfs: populate heuristic with code Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 1/6] Btrfs: heuristic make use compression workspaces Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 2/6] Btrfs: heuristic workspace add bucket and sample items Timofey Titovets
@ 2017-08-23 22:45 ` Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 4/6] Btrfs: heuristic add detection of repeated data patterns Timofey Titovets
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Timofey Titovets @ 2017-08-23 22:45 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Copy sample data from input data range to sample buffer
then calculate byte type count for that sample into bucket.
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/heuristic.c | 38 +++++++++++++++++++++++++++++++++++++-
1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c
index 9a212674f527..001118f98143 100644
--- a/fs/btrfs/heuristic.c
+++ b/fs/btrfs/heuristic.c
@@ -69,8 +69,20 @@ static struct list_head *heuristic_alloc_workspace(void)
static int heuristic(struct list_head *ws, struct inode *inode,
u64 start, u64 end)
{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
struct page *page;
u64 index, index_end;
+ u32 a, b;
+ u8 *in_data, *sample = workspace->sample;
+ u8 byte;
+
+ /*
+ * Compression only handle first 128kb of input range
+ * And just shift over range in loop for compressing it.
+ * Let's do the same.
+ */
+ if (end - start > BTRFS_MAX_UNCOMPRESSED)
+ end = start + BTRFS_MAX_UNCOMPRESSED;
index = start >> PAGE_SHIFT;
index_end = end >> PAGE_SHIFT;
@@ -82,13 +94,37 @@ static int heuristic(struct list_head *ws, struct inode *inode,
if(end%PAGE_SIZE == 0)
index_end--;
+ b = 0;
for (; index <= index_end; index++) {
page = find_get_page(inode->i_mapping, index);
- kmap(page);
+ in_data = kmap(page);
+ /* Handle case where start unaligned to PAGE_SIZE */
+ a = start%PAGE_SIZE;
+ while (a < PAGE_SIZE - READ_SIZE) {
+ /* Prevent sample overflow */
+ if (b >= MAX_SAMPLE_SIZE)
+ break;
+ /* Don't sample mem trash from last page */
+ if (start > end - READ_SIZE)
+ break;
+ memcpy(&sample[b], &in_data[a], READ_SIZE);
+ a += ITER_SHIFT;
+ start += ITER_SHIFT;
+ b += READ_SIZE;
+ }
kunmap(page);
put_page(page);
}
+ workspace->sample_size = b;
+
+ memset(workspace->bucket, 0, sizeof(*workspace->bucket)*BUCKET_SIZE);
+
+ for (a = 0; a < workspace->sample_size; a++) {
+ byte = sample[a];
+ workspace->bucket[byte].count++;
+ }
+
return 1;
}
--
2.14.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v6 4/6] Btrfs: heuristic add detection of repeated data patterns
2017-08-23 22:45 [PATCH v6 0/6] Btrfs: populate heuristic with code Timofey Titovets
` (2 preceding siblings ...)
2017-08-23 22:45 ` [PATCH v6 3/6] Btrfs: implement heuristic sampling logic Timofey Titovets
@ 2017-08-23 22:45 ` Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 5/6] Btrfs: heuristic add byte set calculation Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 6/6] Btrfs: heuristic add byte core " Timofey Titovets
5 siblings, 0 replies; 7+ messages in thread
From: Timofey Titovets @ 2017-08-23 22:45 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Walk over data sample and use memcmp to detect
repeated data (like zeroed)
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/heuristic.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c
index 001118f98143..8df56ea93064 100644
--- a/fs/btrfs/heuristic.c
+++ b/fs/btrfs/heuristic.c
@@ -66,6 +66,19 @@ static struct list_head *heuristic_alloc_workspace(void)
return ERR_PTR(-ENOMEM);
}
+static bool sample_repeated_patterns(struct workspace *ws)
+{
+ u32 i = 0;
+ u8 *p = ws->sample;
+
+ for (; i < ws->sample_size - READ_SIZE; i += READ_SIZE) {
+ if(memcpy(&p[i], &p[i + READ_SIZE], READ_SIZE))
+ return false;
+ }
+
+ return true;
+}
+
static int heuristic(struct list_head *ws, struct inode *inode,
u64 start, u64 end)
{
@@ -118,6 +131,9 @@ static int heuristic(struct list_head *ws, struct inode *inode,
workspace->sample_size = b;
+ if (sample_repeated_patterns(workspace))
+ return 1;
+
memset(workspace->bucket, 0, sizeof(*workspace->bucket)*BUCKET_SIZE);
for (a = 0; a < workspace->sample_size; a++) {
--
2.14.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v6 5/6] Btrfs: heuristic add byte set calculation
2017-08-23 22:45 [PATCH v6 0/6] Btrfs: populate heuristic with code Timofey Titovets
` (3 preceding siblings ...)
2017-08-23 22:45 ` [PATCH v6 4/6] Btrfs: heuristic add detection of repeated data patterns Timofey Titovets
@ 2017-08-23 22:45 ` Timofey Titovets
2017-08-23 22:45 ` [PATCH v6 6/6] Btrfs: heuristic add byte core " Timofey Titovets
5 siblings, 0 replies; 7+ messages in thread
From: Timofey Titovets @ 2017-08-23 22:45 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Calculate byte set size for data sample:
Calculate how many unique bytes has been in sample
By count all bytes in bucket with count > 0
If byte set low (~25%), data are easily compressible
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/heuristic.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c
index 8df56ea93064..4815d1becf7e 100644
--- a/fs/btrfs/heuristic.c
+++ b/fs/btrfs/heuristic.c
@@ -24,6 +24,7 @@
#define ITER_SHIFT 256
#define BUCKET_SIZE 256
#define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED*READ_SIZE/ITER_SHIFT)
+#define BYTE_SET_THRESHOLD 64
struct bucket_item {
u32 count;
@@ -66,6 +67,27 @@ static struct list_head *heuristic_alloc_workspace(void)
return ERR_PTR(-ENOMEM);
}
+static u32 byte_set_size(const struct workspace *ws)
+{
+ u32 a = 0;
+ u32 byte_set_size = 0;
+
+ for (; a < BYTE_SET_THRESHOLD; a++) {
+ if (ws->bucket[a].count > 0)
+ byte_set_size++;
+ }
+
+ for (; a < BUCKET_SIZE; a++) {
+ if (ws->bucket[a].count > 0) {
+ byte_set_size++;
+ if (byte_set_size > BYTE_SET_THRESHOLD)
+ return byte_set_size;
+ }
+ }
+
+ return byte_set_size;
+}
+
static bool sample_repeated_patterns(struct workspace *ws)
{
u32 i = 0;
@@ -141,6 +163,10 @@ static int heuristic(struct list_head *ws, struct inode *inode,
workspace->bucket[byte].count++;
}
+ a = byte_set_size(workspace);
+ if (a > BYTE_SET_THRESHOLD)
+ return 2;
+
return 1;
}
--
2.14.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v6 6/6] Btrfs: heuristic add byte core set calculation
2017-08-23 22:45 [PATCH v6 0/6] Btrfs: populate heuristic with code Timofey Titovets
` (4 preceding siblings ...)
2017-08-23 22:45 ` [PATCH v6 5/6] Btrfs: heuristic add byte set calculation Timofey Titovets
@ 2017-08-23 22:45 ` Timofey Titovets
5 siblings, 0 replies; 7+ messages in thread
From: Timofey Titovets @ 2017-08-23 22:45 UTC (permalink / raw)
To: linux-btrfs; +Cc: Timofey Titovets
Calculate byte core set for data sample:
Sort bucket's numbers in decreasing order
Count how many numbers use 90% of sample
If core set are low (<=25%), data are easily compressible
If core set high (>=80%), data are not compressible
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
fs/btrfs/heuristic.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 50 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c
index 4815d1becf7e..8a5397b74597 100644
--- a/fs/btrfs/heuristic.c
+++ b/fs/btrfs/heuristic.c
@@ -18,6 +18,7 @@
#include <linux/pagemap.h>
#include <linux/string.h>
#include <linux/bio.h>
+#include <linux/sort.h>
#include "compression.h"
#define READ_SIZE 16
@@ -25,6 +26,8 @@
#define BUCKET_SIZE 256
#define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED*READ_SIZE/ITER_SHIFT)
#define BYTE_SET_THRESHOLD 64
+#define BYTE_CORE_SET_LOW BYTE_SET_THRESHOLD
+#define BYTE_CORE_SET_HIGH 200 // ~80%
struct bucket_item {
u32 count;
@@ -67,6 +70,45 @@ static struct list_head *heuristic_alloc_workspace(void)
return ERR_PTR(-ENOMEM);
}
+/* For bucket sorting */
+static inline int bucket_compare(const void *lv, const void *rv)
+{
+ struct bucket_item *l = (struct bucket_item *)(lv);
+ struct bucket_item *r = (struct bucket_item *)(rv);
+
+ return r->count - l->count;
+}
+
+/*
+ * Byte Core set size
+ * How many bytes use 90% of sample
+ */
+static int byte_core_set_size(struct workspace *ws)
+{
+ u32 a = 0;
+ u32 coreset_sum = 0;
+ u32 core_set_threshold = ws->sample_size*90/100;
+ struct bucket_item *bucket = ws->bucket;
+
+ /* Sort in reverse order */
+ sort(bucket, BUCKET_SIZE, sizeof(*bucket),
+ &bucket_compare, NULL);
+
+ for (; a < BYTE_CORE_SET_LOW; a++)
+ coreset_sum += bucket[a].count;
+
+ if (coreset_sum > core_set_threshold)
+ return a;
+
+ for (; a < BYTE_CORE_SET_HIGH && bucket[a].count > 0; a++) {
+ coreset_sum += bucket[a].count;
+ if (coreset_sum > core_set_threshold)
+ break;
+ }
+
+ return a;
+}
+
static u32 byte_set_size(const struct workspace *ws)
{
u32 a = 0;
@@ -167,7 +209,14 @@ static int heuristic(struct list_head *ws, struct inode *inode,
if (a > BYTE_SET_THRESHOLD)
return 2;
- return 1;
+ a = byte_core_set_size(workspace);
+ if (a <= BYTE_CORE_SET_LOW)
+ return 3;
+
+ if (a >= BYTE_CORE_SET_HIGH)
+ return 0;
+
+ return 4;
}
const struct btrfs_compress_op btrfs_heuristic = {
--
2.14.1
^ permalink raw reply related [flat|nested] 7+ messages in thread