qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Benoît Canet" <benoit@irqsave.net>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, pbonzini@redhat.com,
	"Benoît Canet" <benoit@irqsave.net>,
	stefanha@redhat.com
Subject: [Qemu-devel] [RFC V4 06/30] qcow2: Add qcow2_dedup and related functions
Date: Wed,  2 Jan 2013 17:16:09 +0100	[thread overview]
Message-ID: <1357143393-29832-7-git-send-email-benoit@irqsave.net> (raw)
In-Reply-To: <1357143393-29832-1-git-send-email-benoit@irqsave.net>

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/qcow2-dedup.c |  436 +++++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.h       |    5 +
 2 files changed, 441 insertions(+)

diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c
index 4e99eb1..5901749 100644
--- a/block/qcow2-dedup.c
+++ b/block/qcow2-dedup.c
@@ -117,3 +117,439 @@ fail:
     *data = NULL;
     return ret;
 }
+
+/*
+ * Build a QCowHashNode structure
+ *
+ * @hash:               the given hash
+ * @physical_sect:      the cluster offset in the QCOW2 file
+ * @first_logical_sect: the first logical cluster offset written
+ * @ret:                the build QCowHashNode
+ */
+static QCowHashNode *qcow2_dedup_build_qcow_hash_node(QCowHash *hash,
+                                                  uint64_t physical_sect,
+                                                  uint64_t first_logical_sect)
+{
+    QCowHashNode *hash_node;
+
+    hash_node = g_new0(QCowHashNode, 1);
+    memcpy(hash_node->hash.data, hash->data, HASH_LENGTH);
+    hash_node->physical_sect = physical_sect;
+    hash_node->first_logical_sect = first_logical_sect;
+
+    return hash_node;
+}
+
+/*
+ * Compute the hash of a given cluster
+ *
+ * @data: a buffer containing the cluster data
+ * @hash: a QCowHash where to store the computed hash
+ * @ret:  0 on success, negative on error
+ */
+static int qcow2_compute_cluster_hash(BlockDriverState *bs,
+                                       QCowHash *hash,
+                                       uint8_t *data)
+{
+    return 0;
+}
+
+/*
+ * Get a QCowHashNode corresponding to a cluster data
+ *
+ * @phash:           if phash can be used no hash is computed
+ * @data:            a buffer containing the cluster
+ * @nb_clusters_processed: the number of cluster to skip in the buffer
+ * @err:             Error code if any
+ * @ret:             QCowHashNode of the duplicated cluster or NULL if not found
+ */
+static QCowHashNode *qcow2_get_hash_node_for_cluster(BlockDriverState *bs,
+                                                     QcowPersistantHash *phash,
+                                                     uint8_t *data,
+                                                     int nb_clusters_processed,
+                                                     int *err)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret = 0;
+    *err = 0;
+
+    /* no hash has been provided compute it and store it for later usage */
+    if (!phash->reuse) {
+        ret = qcow2_compute_cluster_hash(bs,
+                                         &phash->hash,
+                                         data +
+                                         nb_clusters_processed *
+                                         s->cluster_size);
+    }
+
+    /* do not reuse the hash anymore if it was precomputed */
+    phash->reuse = false;
+
+    if (ret < 0) {
+        *err = ret;
+        return NULL;
+    }
+
+    return g_tree_lookup(s->dedup_tree_by_hash, &phash->hash);
+}
+
+/*
+ * Build a QCowHashNode from a given QCowHash and insert it into the tree
+ *
+ * @hash: the given QCowHash
+ */
+static void qcow2_build_and_insert_hash_node(BlockDriverState *bs,
+                                             QCowHash *hash)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowHashNode *hash_node;
+
+    /* build the hash node with QCOW_FLAG_EMPTY as offsets so we will remember
+     * to fill these field later with real values.
+     */
+    hash_node = qcow2_dedup_build_qcow_hash_node(hash,
+                                                 QCOW_FLAG_EMPTY,
+                                                 QCOW_FLAG_EMPTY);
+    g_tree_insert(s->dedup_tree_by_hash, &hash_node->hash, hash_node);
+}
+
+/*
+ * Helper used to build a QCowHashElement
+ *
+ * @hash: the QCowHash to use
+ * @ret:  a newly allocated QCowHashElement containing the given hash
+ */
+static QCowHashElement *qcow2_build_dedup_hash(QCowHash *hash)
+{
+    QCowHashElement *dedup_hash;
+    dedup_hash = g_new0(QCowHashElement, 1);
+    memcpy(dedup_hash->hash.data, hash->data, HASH_LENGTH);
+    return dedup_hash;
+}
+
+/*
+ * Helper used to link a deduplicated cluster in the l2
+ *
+ * @logical_sect:  the cluster sector seen by the guest
+ * @physical_sect: the cluster sector in the QCOW2 file
+ * @overwrite:     true if we must overwrite the L2 table entry
+ * @ret:
+ */
+static int qcow2_dedup_link_l2(BlockDriverState *bs,
+                               uint64_t logical_sect,
+                               uint64_t physical_sect,
+                               bool overwrite)
+{
+    QCowL2Meta m = {
+        .alloc_offset   = physical_sect << 9,
+        .offset         = logical_sect << 9,
+        .nb_clusters    = 1,
+        .nb_available   = 0,
+        .cow_start = {
+            .offset     = 0,
+            .nb_sectors = 0,
+        },
+        .cow_end = {
+            .offset     = 0,
+            .nb_sectors = 0,
+        },
+        .oflag_copied   = false,
+        .overwrite      = overwrite,
+    };
+    return qcow2_alloc_cluster_link_l2(bs, &m);
+}
+
+/* Clear the QCOW_OFLAG_COPIED from the first L2 entry written for a physical
+ * cluster.
+ *
+ * @hash_node: the duplicated hash node
+ * @ret:       0 on success, negative on error
+ */
+static int qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs,
+                                                QCowHashNode *hash_node)
+{
+    int ret = 0;
+    uint64_t first_logical_sect = hash_node->first_logical_sect;
+
+    /* QCOW_OFLAG_COPIED already cleared -> do nothing */
+    if (!(first_logical_sect & QCOW_FLAG_FIRST)) {
+        return 0;
+    }
+
+    /* note : QCOW_FLAG_FIRST == QCOW_OFLAG_COPIED */
+    first_logical_sect &= ~QCOW_FLAG_FIRST;
+
+    /* overwrite first L2 entry to clear QCOW_FLAG_COPIED */
+    ret = qcow2_dedup_link_l2(bs, first_logical_sect,
+                              hash_node->physical_sect,
+                              true);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */
+    hash_node->first_logical_sect &= first_logical_sect;
+
+    return 0;
+}
+
+/* This function deduplicate a cluster
+ *
+ * @logical_sect: The logical sector of the write
+ * @hash_node:    The duplicated cluster hash node
+ * @ret:          0 on success, negative on error
+ */
+static int qcow2_deduplicate_cluster(BlockDriverState *bs,
+                                     uint64_t logical_sect,
+                                     QCowHashNode *hash_node)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret = 0;
+
+    /* create new L2 entry */
+    ret = qcow2_dedup_link_l2(bs, logical_sect,
+                              hash_node->physical_sect,
+                              false);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Increment the refcount of the cluster */
+    return update_refcount(bs,
+                           (hash_node->physical_sect /
+                            s->cluster_sectors) << s->cluster_bits,
+                            1, 1);
+}
+
+/* This function tries to deduplicate a given cluster.
+ *
+ * @sector_num:           the logical sector number we are trying to deduplicate
+ * @phash:                Used instead of computing the hash if provided
+ * @data:                 the buffer in which to look for a duplicated cluster
+ * @nb_clusters_processed: the number of cluster that must be skipped in data
+ * @ret:                  ret < 0 on error, 1 on deduplication else 0
+ */
+static int qcow2_try_dedup_cluster(BlockDriverState *bs,
+                                   QcowPersistantHash *phash,
+                                   uint64_t sector_num,
+                                   uint8_t *data,
+                                   int nb_clusters_processed)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret = 0;
+    QCowHashNode *hash_node;
+    uint64_t logical_sect;
+    uint64_t existing_physical_offset;
+    int pnum = s->cluster_sectors;
+
+    /* search the tree for duplicated cluster */
+    hash_node = qcow2_get_hash_node_for_cluster(bs,
+                                                phash,
+                                                data,
+                                                nb_clusters_processed,
+                                                &ret);
+
+    /* we won't reuse the hash on error */
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* if cluster is not duplicated store hash for later usage */
+    if (!hash_node) {
+        qcow2_build_and_insert_hash_node(bs, &phash->hash);
+        return 0;
+    }
+
+    logical_sect = sector_num & ~(s->cluster_sectors - 1);
+    ret = qcow2_get_cluster_offset(bs, logical_sect << 9,
+                                   &pnum, &existing_physical_offset);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* if we are rewriting the same cluster at the same place do nothing */
+    if (existing_physical_offset == hash_node->physical_sect << 9) {
+        return 1;
+    }
+
+    /* take care of not having refcount > 1 and QCOW_OFLAG_COPIED at once */
+    ret = qcow2_clear_l2_copied_flag_if_needed(bs, hash_node);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* do the deduplication */
+    ret = qcow2_deduplicate_cluster(bs, logical_sect,
+                                    hash_node);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 1;
+}
+
+
+static void add_hash_to_undedupable_list(BlockDriverState *bs,
+                                                    QCowDedupState *ds)
+{
+    /* memorise hash for later storage in gtree and disk */
+    QCowHashElement *dedup_hash = qcow2_build_dedup_hash(&ds->phash.hash);
+    QTAILQ_INSERT_TAIL(&ds->undedupables, dedup_hash, next);
+}
+
+static int qcow2_dedup_starting_from_begining(BlockDriverState *bs,
+                                              QCowDedupState *ds,
+                                              uint64_t sector_num,
+                                              uint8_t *data,
+                                              int left_to_process)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i;
+    int ret = 0;
+
+    for (i = 0; i < left_to_process; i++) {
+        ret = qcow2_try_dedup_cluster(bs,
+                                      &ds->phash,
+                                      sector_num + i * s->cluster_sectors,
+                                      data,
+                                      ds->nb_clusters_processed + i);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        /* stop if a cluster has not been deduplicated */
+        if (ret != 1) {
+            break;
+        }
+    }
+
+    return i;
+}
+
+static int qcow2_count_next_non_dedupable_clusters(BlockDriverState *bs,
+                                                   QCowDedupState *ds,
+                                                   uint8_t *data,
+                                                   int left_to_process)
+{
+    int i;
+    int ret = 0;
+    QCowHashNode *hash_node;
+
+    for (i = 0; i < left_to_process; i++) {
+        hash_node = qcow2_get_hash_node_for_cluster(bs,
+                                                  &ds->phash,
+                                                  data,
+                                                  ds->nb_clusters_processed + i,
+                                                  &ret);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        /* found a duplicated cluster : stop here */
+        if (hash_node) {
+            break;
+        }
+
+        qcow2_build_and_insert_hash_node(bs, &ds->phash.hash);
+        add_hash_to_undedupable_list(bs, ds);
+    }
+
+    return i;
+}
+
+
+/* Deduplicate all the cluster that can be deduplicated.
+ *
+ * Next it compute the number of non deduplicable sectors to come while storing
+ * the hashes of these sectors in a linked list for later usage.
+ * Then it compute the first duplicated cluster hash that come after non
+ * deduplicable cluster, this hash will be used at next call of the function
+ *
+ * @ds:              a structure containing the state of the deduplication
+ *                   for this write request
+ * @sector_num:      The logical sector
+ * @data:            the buffer containing the data to deduplicate
+ * @data_nr:         the size of the buffer in sectors
+ *
+ */
+int qcow2_dedup(BlockDriverState *bs,
+                QCowDedupState *ds,
+                uint64_t sector_num,
+                uint8_t *data,
+                int data_nr)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret = 0;
+    int deduped_clusters_nr = 0;
+    int left_to_process;
+    int begining_index;
+
+    begining_index = sector_num & (s->cluster_sectors - 1);
+
+    left_to_process = (data_nr / s->cluster_sectors) -
+                      ds->nb_clusters_processed;
+
+    /* start deduplicating all that can be cluster after cluster */
+    ret = qcow2_dedup_starting_from_begining(bs,
+                                             ds,
+                                             sector_num,
+                                             data,
+                                             left_to_process);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    deduped_clusters_nr = ret;
+
+    left_to_process -= ret;
+    ds->nb_clusters_processed += ret;
+
+    /* We deduped everything till the end */
+    if (!left_to_process) {
+        ds->nb_undedupable_sectors = 0;
+        goto exit;
+    }
+
+    /* skip and account the first undedupable cluster found */
+    left_to_process--;
+    ds->nb_clusters_processed++;
+    ds->nb_undedupable_sectors += s->cluster_sectors;
+
+    add_hash_to_undedupable_list(bs, ds);
+
+    /* Count how many non duplicated sector can be written and memorize hashes
+     * to write them after data has reached disk.
+     */
+    ret = qcow2_count_next_non_dedupable_clusters(bs,
+                                                  ds,
+                                                  data,
+                                                  left_to_process);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    left_to_process -= ret;
+    ds->nb_clusters_processed += ret;
+    ds->nb_undedupable_sectors += ret * s->cluster_sectors;
+
+    /* remember to reuse the last hash computed at new qcow2_dedup call */
+    if (left_to_process) {
+        ds->phash.reuse = true;
+    }
+
+exit:
+    if (!deduped_clusters_nr) {
+        return 0;
+    }
+
+    return deduped_clusters_nr * s->cluster_sectors - begining_index;
+}
diff --git a/block/qcow2.h b/block/qcow2.h
index 9403431..a61e004 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -451,5 +451,10 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
                                              int sectors_nr,
                                              uint8_t **dedup_cluster_data,
                                              int *dedup_cluster_data_nr);
+int qcow2_dedup(BlockDriverState *bs,
+                QCowDedupState *ds,
+                uint64_t sector_num,
+                uint8_t *data,
+                int data_nr);
 
 #endif
-- 
1.7.10.4

  parent reply	other threads:[~2013-01-02 16:17 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-02 16:16 [Qemu-devel] [RFC V4 00/30] QCOW2 deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 01/30] qcow2: Add deduplication to the qcow2 specification Benoît Canet
2013-01-03 18:18   ` Eric Blake
2013-01-04 14:49     ` Benoît Canet
2013-01-16 14:50     ` Benoît Canet
2013-01-16 15:58       ` Eric Blake
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 02/30] qcow2: Add deduplication structures and fields Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 03/30] qcow2: Add qcow2_dedup_read_missing_and_concatenate Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 04/30] qcow2: Make update_refcount public Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 05/30] qcow2: Create a way to link to l2 tables when deduplicating Benoît Canet
2013-01-02 16:16 ` Benoît Canet [this message]
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 07/30] qcow2: Add qcow2_dedup_store_new_hashes Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 08/30] qcow2: Implement qcow2_compute_cluster_hash Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 09/30] qcow2: Extract qcow2_dedup_grow_table Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 10/30] qcow2: Add qcow2_dedup_grow_table and use it Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 11/30] qcow2: create function to load deduplication hashes at startup Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 12/30] qcow2: Load and save deduplication table header extension Benoît Canet
2013-01-05  0:02   ` Eric Blake
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 13/30] qcow2: Extract qcow2_do_table_init Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 14/30] qcow2-cache: Allow to choose table size at creation Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 15/30] qcow2: Add qcow2_dedup_init and qcow2_dedup_close Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 16/30] qcow2: Extract qcow2_add_feature and qcow2_remove_feature Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 17/30] block: Add qemu-img dedup create option Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 18/30] qcow2: Behave correctly when refcount reach 0 or 2^16 Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 19/30] qcow2: Integrate deduplication in qcow2_co_writev loop Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 20/30] qcow2: Serialize write requests when deduplication is activated Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 21/30] qcow2: Add verification of dedup table Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 22/30] qcow2: Adapt checking of QCOW_OFLAG_COPIED for dedup Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 23/30] qcow2: Add check_dedup_l2 in order to check l2 of dedup table Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 24/30] qcow2: Do not overwrite existing entries with QCOW_OFLAG_COPIED Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 25/30] qcow2: Integrate SKEIN hash algorithm in deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 26/30] qcow2: Add lazy refcounts to deduplication to prevent qcow2_cache_set_dependency loops Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 27/30] qcow2: Use large L2 table for deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 28/30] qcow: Set dedup cluster block size to 64KB Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 29/30] qcow2: init and cleanup deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 30/30] qemu-iotests: Filter dedup=on/off so existing tests don't break Benoît Canet
2013-01-02 16:42   ` Eric Blake
2013-01-02 16:50     ` Benoît Canet
2013-01-02 17:10 ` [Qemu-devel] [RFC V4 00/30] QCOW2 deduplication Troy Benjegerdes
2013-01-02 17:33   ` Benoît Canet
2013-01-02 18:01     ` Eric Blake
2013-01-02 18:16       ` Benoît Canet
2013-01-02 18:26     ` Troy Benjegerdes
2013-01-02 18:40       ` Benoît Canet
2013-01-02 18:47         ` ronnie sahlberg
2013-01-02 18:55           ` Benoît Canet
2013-01-02 19:18           ` Troy Benjegerdes
2013-01-03  2:16             ` ronnie sahlberg
2013-01-03 12:39       ` Stefan Hajnoczi
2013-01-03 19:51         ` Troy Benjegerdes
2013-01-04  7:09           ` Dietmar Maurer
2013-01-04  9:49           ` Stefan Hajnoczi
2013-01-03 17:18 ` Benoît Canet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1357143393-29832-7-git-send-email-benoit@irqsave.net \
    --to=benoit@irqsave.net \
    --cc=kwolf@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).