From: "Benoît Canet" <benoit@irqsave.net>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, "Benoît Canet" <benoit@irqsave.net>,
stefanha@redhat.com
Subject: [Qemu-devel] [RFC V3 06/24] qcow2: Add qcow2_dedup and related functions.
Date: Mon, 26 Nov 2012 14:05:05 +0100 [thread overview]
Message-ID: <1353935123-24199-7-git-send-email-benoit@irqsave.net> (raw)
In-Reply-To: <1353935123-24199-1-git-send-email-benoit@irqsave.net>
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/qcow2-dedup.c | 312 +++++++++++++++++++++++++++++++++++++++++++++++++++
block/qcow2.h | 13 +++
2 files changed, 325 insertions(+)
diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c
index e4e6108..a7c7202 100644
--- a/block/qcow2-dedup.c
+++ b/block/qcow2-dedup.c
@@ -29,6 +29,8 @@
#include "qemu-common.h"
#include "qcow2.h"
+#define HASH_LENGTH 32
+
/**
* Read some data from the QCOW2 file
*
@@ -148,3 +150,313 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
fail:
return ret;
}
+
+/*
+ * Build a QCowHashNode structure
+ *
+ * @hash: the given hash
+ * @physical_offset: the cluster offset in the QCOW2 file
+ * @first_offset: the first logical cluster offset written
+ * @ret: the build QCowHashNode
+ */
+static QCowHashNode *qcow2_dedup_build_qcow_hash_node(uint8_t *hash,
+ uint64_t physical_offset,
+ uint64_t first_offset)
+{
+ QCowHashNode *data;
+
+ data = g_new0(QCowHashNode, 1);
+ data->hash = hash;
+ data->offset = physical_offset;
+ data->first_logical_offset = first_offset;
+
+ return data;
+}
+
+/*
+ * Compute the hash of a given cluster
+ *
+ * @data: a buffer containing the cluster data
+ * @ret: a HASH_LENGTH long dynamically allocated array containing the hash
+ */
+static uint8_t *qcow2_compute_cluster_hash(BlockDriverState *bs,
+ uint8_t *data)
+{
+ return NULL;
+}
+
+/* Try to find the offset of a given cluster if it's duplicated
+ * Exceptionally we cast return value to int64_t to use as error code.
+ *
+ * @data: a buffer containing the cluster
+ * @skip_cluster_nr: the number of cluster to skip in the buffer
+ * @hash: if hash is provided it's used else it's computed
+ * @ret: QCowHashNode of the duplicated cluster or NULL
+ */
+static QCowHashNode *qcow2_build_hash_lookup_offset(BlockDriverState *bs,
+ uint8_t *data,
+ int skip_cluster_nr,
+ uint8_t **hash)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowHashNode *hash_node;
+ if (!*hash) {
+ /* no hash has been provided compute it and store it for caller usage */
+ *hash = qcow2_compute_cluster_hash(bs,
+ data + skip_cluster_nr *
+ s->cluster_size);
+ }
+ hash_node = g_tree_lookup(s->dedup_tree_by_hash, *hash);
+ if (hash_node) {
+ return hash_node;
+ }
+
+ /* cluster not duplicated */
+ hash_node = qcow2_dedup_build_qcow_hash_node(*hash,
+ QCOW_FLAG_EMPTY,
+ QCOW_FLAG_EMPTY);
+ g_tree_insert(s->dedup_tree_by_hash, *hash, hash_node);
+
+ return NULL;
+}
+
+/*
+ * Helper used to build a QCowHashElement
+ *
+ * @hash: the hash to use
+ * @ret: a newly allocated QCowHashElement pointing to the given hash
+ */
+static QCowHashElement *qcow2_build_dedup_hash(uint8_t *hash)
+{
+ QCowHashElement *dedup_hash;
+ dedup_hash = g_new0(QCowHashElement, 1);
+ dedup_hash->hash = hash;
+ return dedup_hash;
+}
+
+/*
+ * Helper used to link a deduplicated cluster in the l2
+ *
+ * @logical_cluster_offset: the cluster offset seen by the guest (in sectors)
+ * @physical_cluster_offset: the cluster offset in the QCOW2 file (in sectors)
+ * @overwrite: true if we are overwriting an l2 table
+ * @ret:
+ */
+static int qcow2_dedup_link_l2(BlockDriverState *bs,
+ uint64_t logical_cluster_offset,
+ uint64_t physical_cluster_offset,
+ bool overwrite)
+{
+ QCowL2Meta m;
+ /* function correctness regarding copy on write ? */
+ m.offset = logical_cluster_offset << 9;
+ m.alloc_offset = physical_cluster_offset << 9;
+ m.nb_clusters = 1; /* we are linking only one cluster in l2 */
+ m.cluster_offset = 0;
+ m.n_start = 0;
+ m.nb_available = 0;
+ m.oflag_copied = false;
+ m.overwrite = overwrite;
+ return qcow2_alloc_cluster_link_l2(bs, &m);
+}
+
+/* This function tries to deduplicate a given cluster.
+ *
+ * @sector_num: the logical sector number we are trying to deduplicate
+ * @precomputed_hash: Used instead of computing the hash if provided
+ * @data: the buffer in which to look for a duplicated cluster
+ * @skip_clusters_nr: the number of cluster that must be skipped in data
+ * @non_duplicated_dedup_hash: returned if the cluster is not deduplicated
+ * @ret: ret < 0 on error, 1 on deduplication else 0
+ */
+static int qcow2_dedup_cluster(BlockDriverState *bs,
+ uint64_t sector_num,
+ uint8_t **precomputed_hash,
+ uint8_t *data,
+ int skip_clusters_nr,
+ QCowHashElement **non_duplicated_dedup_hash)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowHashNode *hash_node;
+ int64_t physical_cluster_offset;
+ uint64_t first_logical_offset;
+ uint64_t logical_cluster_offset;
+ uint64_t existing_physical_cluster_offset;
+ int ret;
+ int pnum = s->cluster_sectors;
+ *non_duplicated_dedup_hash = NULL;
+
+ /* look if the cluster is duplicated */
+ hash_node = qcow2_build_hash_lookup_offset(bs,
+ data,
+ skip_clusters_nr,
+ precomputed_hash);
+
+ /* round the logical cluster offset to cluster boundaries */
+ logical_cluster_offset = sector_num & ~(s->cluster_sectors - 1);
+ ret = qcow2_get_cluster_offset(bs, logical_cluster_offset << 9,
+ &pnum, &existing_physical_cluster_offset);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ if (!hash_node) {
+ /* no duplicated cluster found, store the hash for later usage */
+ *non_duplicated_dedup_hash = qcow2_build_dedup_hash(*precomputed_hash);
+ return 0;
+ } else {
+ /* duplicated cluster found */
+ physical_cluster_offset = hash_node->offset;
+ first_logical_offset = hash_node->first_logical_offset;
+
+ if (existing_physical_cluster_offset != physical_cluster_offset << 9) {
+
+ ret = update_cluster_refcount(bs,
+ physical_cluster_offset /
+ s->cluster_sectors,
+ 1,
+ false);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ ret = qcow2_dedup_link_l2(bs, logical_cluster_offset,
+ physical_cluster_offset,
+ false);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ /* if refcount was one remove the QCOW_FLAG_FIRST flag */
+ if (first_logical_offset & QCOW_FLAG_FIRST) {
+ first_logical_offset &= ~QCOW_FLAG_FIRST;
+ ret = qcow2_dedup_link_l2(bs, first_logical_offset,
+ physical_cluster_offset,
+ true);
+ if (ret < 0) {
+ goto exit;
+ }
+ hash_node->first_logical_offset = first_logical_offset;
+ }
+ }
+ }
+
+ ret = 1;
+exit:
+ g_free(*precomputed_hash);
+ *precomputed_hash = NULL;
+ return ret;
+}
+
+/*
+ * Deduplicate all the cluster that can be deduplicated.
+ *
+ * Next it compute the number of non deduplicable sectors to come while storing
+ * the hashes of these sectors in a linked list for later usage.
+ * Then it compute the first duplicated cluster hash that come after non
+ * deduplicable cluster, this hash will be used at next call of the function
+ *
+ * @u: where to store the list of undedupable hashes
+ * @sector_num: the first sector to deduplicate (in sectors)
+ * @data: the buffer containing the data to deduplicate
+ * @data_nr: the size of the buffer in sectors
+ * @skip_cluster_nr: the number of cluster to skip at the begining of data
+ * @next_non_dedupable_sectors_nr: result containing the number of non
+ * deduplicable sectors to come
+ * next_call_first_hash: hash saved between the function call
+ *
+ *
+ *
+ */
+int qcow2_dedup(BlockDriverState *bs,
+ UndedupableHashes *u,
+ uint64_t sector_num,
+ uint8_t *data,
+ int data_nr,
+ int *skip_clusters_nr,
+ int *next_non_dedupable_sectors_nr,
+ uint8_t **next_call_first_hash)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+ int deduped_clusters_nr = 0;
+ int left_to_test_clusters_nr;
+ int begining_index;
+ uint8_t *hash = NULL;
+ QCowHashElement *non_duplicated_dedup_hash = NULL;
+
+ /* should already be zero when entering this function */
+ assert(*next_non_dedupable_sectors_nr == 0);
+
+ begining_index = sector_num & (s->cluster_sectors - 1);
+
+ left_to_test_clusters_nr = (data_nr / s->cluster_sectors) -
+ *skip_clusters_nr;
+
+ /* Deduplicate all that can be */
+ while (left_to_test_clusters_nr-- &&
+ (ret = qcow2_dedup_cluster(bs,
+ sector_num,
+ next_call_first_hash,
+ data,
+ (*skip_clusters_nr)++,
+ &non_duplicated_dedup_hash)) == 1) {
+ sector_num += s->cluster_sectors;
+ deduped_clusters_nr++;
+ }
+
+ if (ret < 0) {
+ *next_call_first_hash = NULL;
+ goto exit;
+ }
+
+ /* We deduped everything till the end */
+ if (!non_duplicated_dedup_hash) {
+ *next_call_first_hash = NULL;
+ *next_non_dedupable_sectors_nr = 0;
+ goto exit;
+ }
+
+ /* We consumed the precomputed hash */
+ *next_call_first_hash = NULL;
+
+ /* remember that the last sector we tested in non deduplicable */
+ *next_non_dedupable_sectors_nr += s->cluster_sectors;
+
+ /* Memorize the hash of the first non duplicated cluster.
+ * we will store it before writing the cluster to disk.
+ */
+ QTAILQ_INSERT_TAIL(&u->undedupable_hashes, non_duplicated_dedup_hash, next);
+
+ /* Count how many non duplicated sector can be written and memorize the
+ * hashes for later.
+ * We make sure we pass hash == NULL to force computation of the hash.
+ */
+ hash = NULL;
+ while (left_to_test_clusters_nr-- > 0 &&
+ !qcow2_build_hash_lookup_offset(bs,
+ data,
+ *skip_clusters_nr,
+ &hash)) {
+ *next_non_dedupable_sectors_nr += s->cluster_sectors;
+ non_duplicated_dedup_hash = qcow2_build_dedup_hash(hash);
+ QTAILQ_INSERT_TAIL(&u->undedupable_hashes,
+ non_duplicated_dedup_hash, next);
+ hash = NULL;
+ (*skip_clusters_nr)++;
+ }
+
+ /* We find a duplicated cluster before stopping iterating store it for
+ * next call.
+ */
+ if (hash && non_duplicated_dedup_hash &&
+ non_duplicated_dedup_hash->hash != hash) {
+ *next_call_first_hash = hash;
+ }
+
+exit:
+ if (!deduped_clusters_nr) {
+ return 0;
+ }
+ return deduped_clusters_nr * s->cluster_sectors - begining_index;
+}
diff --git a/block/qcow2.h b/block/qcow2.h
index ccb24ad..5c18425 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -58,6 +58,11 @@
#define DEFAULT_CLUSTER_SIZE 65536
+/* indicate that the hash structure is empty and miss offset */
+#define QCOW_FLAG_EMPTY (1LL << 62)
+/* indicate that the cluster for this hash has QCOW_OFLAG_COPIED on disk */
+#define QCOW_FLAG_FIRST (1LL << 63)
+
/* deduplication node */
typedef struct {
uint8_t *hash; /* 32 bytes hash of a given cluster */
@@ -372,5 +377,13 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
int sectors_nr,
uint8_t **dedup_cluster_data,
int *dedup_cluster_data_nr);
+int qcow2_dedup(BlockDriverState *bs,
+ UndedupableHashes *u,
+ uint64_t sector_num,
+ uint8_t *data,
+ int data_nr,
+ int *skip_clusters_nr,
+ int *next_non_dedupable_sectors_nr,
+ uint8_t **next_call_first_hash);
#endif
--
1.7.10.4
next prev parent reply other threads:[~2012-11-26 13:06 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-11-26 13:04 [Qemu-devel] [RFC V3 00/24] QCOW2 deduplication Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 01/24] qcow2: Add deduplication to the qcow2 specification Benoît Canet
2012-12-11 11:28 ` Stefan Hajnoczi
2012-12-11 11:32 ` Stefan Hajnoczi
2012-12-12 15:57 ` Benoît Canet
2012-12-18 13:38 ` Stefan Hajnoczi
2012-12-11 23:03 ` Eric Blake
2012-12-12 15:59 ` Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 02/24] qcow2: Add deduplication structures and fields Benoît Canet
2012-12-11 11:34 ` Stefan Hajnoczi
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 03/24] qcow2: Add qcow2_dedup_read_missing_and_concatenate Benoît Canet
2012-12-11 11:52 ` Stefan Hajnoczi
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 04/24] qcow2: Make update_cluster_refcount public Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 05/24] qcow2: Create a way to link to l2 tables in dedup Benoît Canet
2012-11-26 13:05 ` Benoît Canet [this message]
2012-12-11 13:16 ` [Qemu-devel] [RFC V3 06/24] qcow2: Add qcow2_dedup and related functions Stefan Hajnoczi
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 07/24] qcow2: Add qcow2_dedup_write_new_hashes Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 08/24] qcow2: Implement qcow2_compute_cluster_hash Benoît Canet
2012-12-11 13:28 ` Stefan Hajnoczi
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 09/24] qcow2: Extract qcow2_dedup_grow_table Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 10/24] qcow2: create function to load deduplication hashes at startup Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 11/24] qcow2: Load and save deduplication table header extension Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 12/24] qcow2: Extract qcow2_do_table_init Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 13/24] qcow2: Add qcow2_dedup_init and qcow2_dedup_close Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 14/24] qcow2: Extract qcow2_add_feature and qcow2_remove_feature Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 15/24] block: Add dedup image create option Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 16/24] qcow2: Allow creation of images using deduplication Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 17/24] qcow2: Behave correctly when refcount reach 0 or 2^16 Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 18/24] qcow2: Integrate deduplication in qcow2_co_writev loop Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 19/24] qcow2: Add verification of dedup table Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 20/24] qcow2: Adapt checking of QCOW_OFLAG_COPIED for dedup Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 21/24] qcow2: Add check_dedup_l2 in order to check l2 of dedup table Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 22/24] qcow2: Do not overwrite existing entries with QCOW_OFLAG_COPIED Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 23/24] qcow2: init and cleanup deduplication Benoît Canet
2012-11-26 13:05 ` [Qemu-devel] [RFC V3 24/24] qemu-iotests: Filter dedup=on/off so existing tests don't break Benoît Canet
2012-12-11 14:19 ` [Qemu-devel] [RFC V3 00/24] QCOW2 deduplication Stefan Hajnoczi
2012-12-11 14:38 ` Stefan Hajnoczi
2012-12-12 16:14 ` Benoît Canet
2012-12-18 13:42 ` Stefan Hajnoczi
2012-12-24 12:26 ` Benoît Canet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1353935123-24199-7-git-send-email-benoit@irqsave.net \
--to=benoit@irqsave.net \
--cc=kwolf@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).