From fb2a329828e64ad0e224a8cb97dbc17147149629 Mon Sep 17 00:00:00 2001 From: Timofey Titovets Date: Mon, 23 Oct 2017 21:24:29 +0300 Subject: [PATCH] Btrfs: heuristic try avoid bucket sorting on edge data cases Heap sort used in kernel are too slow and costly, So let's make some statistic assume about egde input data cases Based on observation of difference between min/max values in bucket. Signed-off-by: Timofey Titovets --- fs/btrfs/compression.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 0ca16909894e..56b67ec4fb5b 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1310,8 +1310,46 @@ static int byte_core_set_size(struct heuristic_ws *ws) u32 i; u32 coreset_sum = 0; const u32 core_set_threshold = ws->sample_size * 90 / 100; + struct bucket_item *max, *min; + struct bucket_item tmp; struct bucket_item *bucket = ws->bucket; + + /* Presort for find min/max value */ + max = &bucket[0]; + min = &bucket[BUCKET_SIZE - 1]; + for (i = 1; i < BUCKET_SIZE - 1; i++) { + if (bucket[i].count > max->count) { + tmp = *max; + *max = bucket[i]; + bucket[i] = tmp; + } + if (bucket[i].count < min->count) { + tmp = *min; + *min = bucket[i]; + bucket[i] = tmp; + } + } + + /* + * Hacks for avoid sorting on Edge data cases (sorting too constly) + * i.e. that will fast filter easy compressible + * and bad compressible data + * Based on observation of number distribution on different data sets + * + * Assume 1: For bad compressible data distribution between min/max + * will be less then 0.6% of sample size + * + * Assume 2: For good compressible data distribution between min/max + * will be far bigger then 4% of sample size + */ + + if (max->count - min->count < ws->sample_size * 6 / 1000) + return BYTE_CORE_SET_HIGH + 1; + + if (max->count - min->count > ws->sample_size * 4 / 100) + return BYTE_CORE_SET_LOW - 1; + /* Sort in reverse order */ sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL); -- 2.14.2