qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Benoît Canet" <benoit@irqsave.net>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, pbonzini@redhat.com,
	"Benoît Canet" <benoit@irqsave.net>,
	stefanha@redhat.com
Subject: [Qemu-devel] [RFC V4 07/30] qcow2: Add qcow2_dedup_store_new_hashes.
Date: Wed,  2 Jan 2013 17:16:10 +0100	[thread overview]
Message-ID: <1357143393-29832-8-git-send-email-benoit@irqsave.net> (raw)
In-Reply-To: <1357143393-29832-1-git-send-email-benoit@irqsave.net>

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/qcow2-dedup.c |  315 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 block/qcow2.h       |    5 +
 2 files changed, 319 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c
index 5901749..2a444f5 100644
--- a/block/qcow2-dedup.c
+++ b/block/qcow2-dedup.c
@@ -29,6 +29,12 @@
 #include "qemu-common.h"
 #include "qcow2.h"
 
+static int qcow2_dedup_read_write_hash(BlockDriverState *bs,
+                                       QCowHash *hash,
+                                       uint64_t *first_logical_sect,
+                                       uint64_t physical_sect,
+                                       bool write);
+
 /*
  * Prepare a buffer containing all the required data required to compute cluster
  * sized deduplication hashes.
@@ -291,7 +297,11 @@ static int qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs,
     /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */
     hash_node->first_logical_sect &= first_logical_sect;
 
-    return 0;
+    /* clear the QCOW_FLAG_FIRST flag from disk */
+    return qcow2_dedup_read_write_hash(bs, &hash_node->hash,
+                                       &hash_node->first_logical_sect,
+                                       hash_node->physical_sect,
+                                       true);
 }
 
 /* This function deduplicate a cluster
@@ -553,3 +563,306 @@ exit:
 
     return deduped_clusters_nr * s->cluster_sectors - begining_index;
 }
+
+
+/* Create a deduplication table hash block, write it's offset to disk and
+ * reference it in the RAM deduplication table
+ *
+ * sync this to disk and get the dedup cluster cache entry
+ *
+ * @index: index in the RAM deduplication table
+ * @ret:   offset on success, negative on error
+ */
+static uint64_t qcow2_create_block(BlockDriverState *bs,
+                                               int32_t index)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t offset;
+    uint64_t data64;
+    int ret = 0;
+
+    /* allocate a new dedup table hash block */
+    offset = qcow2_alloc_clusters(bs, s->hash_block_size);
+
+    if (offset < 0) {
+        return offset;
+    }
+
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+    if (ret < 0) {
+        goto free_fail;
+    }
+
+    /* write the new block offset in the dedup table L1 */
+    data64 = cpu_to_be64(offset);
+    ret = bdrv_pwrite_sync(bs->file,
+                           s->dedup_table_offset +
+                           index * sizeof(uint64_t),
+                           &data64, sizeof(data64));
+
+    if (ret < 0) {
+        goto free_fail;
+    }
+
+    s->dedup_table[index] = offset;
+
+    return offset;
+
+free_fail:
+    qcow2_free_clusters(bs, offset, s->hash_block_size);
+    return ret;
+}
+
+static int qcow2_create_and_get_block(BlockDriverState *bs,
+                                      uint32_t index,
+                                      uint8_t **block)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret = 0;
+    int64_t offset;
+
+    offset = qcow2_create_block(bs, index);
+
+    if (offset < 0) {
+        return offset;
+    }
+
+
+    /* get an empty cluster from the dedup cache */
+    ret = qcow2_cache_get_empty(bs, s->dedup_cluster_cache,
+                                offset,
+                                (void **) block);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* clear it */
+    memset(*block, 0, s->hash_block_size);
+
+    return 0;
+}
+
+static inline bool qcow2_has_dedup_block(BlockDriverState *bs,
+                                         uint32_t index)
+{
+    BDRVQcowState *s = bs->opaque;
+    return s->dedup_table[index] == 0 ? false : true;
+}
+
+static inline void qcow2_write_hash_to_block_and_dirty(BlockDriverState *bs,
+                                                       uint8_t *block,
+                                                       QCowHash *hash,
+                                                       int offset,
+                                                       uint64_t *logical_sect)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t first;
+    first = cpu_to_be64(*logical_sect);
+    memcpy(block + offset, hash->data, HASH_LENGTH);
+    memcpy(block + offset + HASH_LENGTH, &first, 8);
+    qcow2_cache_entry_mark_dirty(s->dedup_cluster_cache, block);
+}
+
+static inline uint64_t qcow2_read_hash_from_block(uint8_t *block,
+                                                  QCowHash *hash,
+                                                  int offset)
+{
+    uint64_t first;
+    memcpy(hash->data, block + offset, HASH_LENGTH);
+    memcpy(&first, block + offset + HASH_LENGTH, 8);
+    return be64_to_cpu(first);
+}
+
+/* Read/write a given hash and cluster_sect from/to the dedup table
+ *
+ * This function doesn't flush the dedup cache to disk
+ *
+ * @hash:                     the hash to read or store
+ * @first_logical_sect:       logical sector of the QCOW_FLAG_OCOPIED cluster
+ * @physical_sect:            sector of the cluster in QCOW2 file (in sectors)
+ * @write:                    true to write, false to read
+ * @ret:                      0 on succes, errno on error
+ */
+static int qcow2_dedup_read_write_hash(BlockDriverState *bs,
+                                       QCowHash *hash,
+                                       uint64_t *first_logical_sect,
+                                       uint64_t physical_sect,
+                                       bool write)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint8_t *block = NULL;
+    int ret = 0;
+    int64_t cluster_number;
+    uint32_t index_in_dedup_table;
+    int offset_in_block;
+    int nb_hash_in_block = s->hash_block_size / (HASH_LENGTH + 8);
+
+    cluster_number = physical_sect / s->cluster_sectors;
+    index_in_dedup_table = cluster_number / nb_hash_in_block;
+
+    if (s->dedup_table_size <= index_in_dedup_table) {
+        return -ENOSPC;
+    }
+
+    /* if we must read and there is nothing to read return a null hash */
+    if (!qcow2_has_dedup_block(bs, index_in_dedup_table) && !write) {
+        memset(hash->data, 0, HASH_LENGTH);
+        *first_logical_sect = 0;
+        return 0;
+    }
+
+    if (qcow2_has_dedup_block(bs, index_in_dedup_table)) {
+        ret = qcow2_cache_get(bs,
+                              s->dedup_cluster_cache,
+                              s->dedup_table[index_in_dedup_table],
+                              (void **) &block);
+    } else {
+        ret = qcow2_create_and_get_block(bs,
+                                         index_in_dedup_table,
+                                         &block);
+    }
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    offset_in_block = (cluster_number % nb_hash_in_block) *
+                      (HASH_LENGTH + 8);
+
+    if (write)  {
+        qcow2_write_hash_to_block_and_dirty(bs,
+                                            block,
+                                            hash,
+                                            offset_in_block,
+                                            first_logical_sect);
+    } else  {
+        *first_logical_sect = qcow2_read_hash_from_block(block,
+                                                         hash,
+                                                         offset_in_block);
+    }
+
+    qcow2_cache_put(bs, s->dedup_cluster_cache, (void **) &block);
+
+    return 0;
+}
+
+static inline bool is_hash_node_empty(QCowHashNode *hash_node)
+{
+    return hash_node->physical_sect & QCOW_FLAG_EMPTY;
+}
+
+/* This function removes a hash_node from the trees given a physical sector
+ *
+ * @physical_sect: The physical sector of the cluster corresponding to the hash
+ */
+static void qcow_remove_hash_node_by_sector(BlockDriverState *bs,
+                                            uint64_t physical_sect)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowHashNode *hash_node;
+
+    hash_node = g_tree_lookup(s->dedup_tree_by_sect, &physical_sect);
+
+    if (!hash_node) {
+        return;
+    }
+
+    g_tree_remove(s->dedup_tree_by_sect, &hash_node->physical_sect);
+    g_tree_remove(s->dedup_tree_by_hash, &hash_node->hash);
+}
+
+/* This function store a dedup hash information to disk and RAM
+ *
+ * @dedup_hash:     the QCowHashElement to process
+ * @logical_sect:   the logical sector of the cluster seen by the guest
+ * @physical_sect:  the physical sector of the stored cluster
+ * @ret:            0 on success, negative on error
+ */
+static int qcow2_store_dedup_hash(BlockDriverState *bs,
+                                  QCowHashElement *dedup_hash,
+                                  uint64_t logical_sect,
+                                  uint64_t physical_sect)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowHashNode *hash_node;
+
+    hash_node = g_tree_lookup(s->dedup_tree_by_hash, &dedup_hash->hash);
+
+    /* no hash node found for this hash */
+    if (!hash_node) {
+        return 0;
+    }
+
+    /* the hash node information are already completed */
+    if (!is_hash_node_empty(hash_node)) {
+        return 0;
+    }
+
+    /* Remember that this QCowHashNoderepresent the first occurence of the
+     * cluste so we will be able to clear QCOW_OFLAG_COPIED from the L2 table
+     * entry when refcount will go > 1.
+     */
+    logical_sect = logical_sect | QCOW_FLAG_FIRST;
+
+    /* remove stale hash node pointing to this physical sector from the trees */
+    qcow_remove_hash_node_by_sector(bs, physical_sect);
+
+    /* fill the missing fields of the hash node */
+    hash_node->physical_sect = physical_sect;
+    hash_node->first_logical_sect = logical_sect;
+
+    /* insert the hash node in the second tree: it's already in the first one */
+    g_tree_insert(s->dedup_tree_by_sect, &hash_node->physical_sect, hash_node);
+
+    /* write the hash to disk */
+    return qcow2_dedup_read_write_hash(bs,
+                                       &dedup_hash->hash,
+                                       &logical_sect,
+                                       physical_sect,
+                                       true);
+}
+
+/* This function store the hashes of the clusters which are not duplicated
+ *
+ * @ds:            The deduplication state
+ * @count:         the number of dedup hash to process
+ * @logical_sect:  logical offset of the first cluster (in sectors)
+ * @physical_sect: offset of the first cluster (in sectors)
+ * @ret:           0 on succes, errno on error
+ */
+int qcow2_dedup_store_new_hashes(BlockDriverState *bs,
+                                 QCowDedupState *ds,
+                                 int count,
+                                 uint64_t logical_sect,
+                                 uint64_t physical_sect)
+{
+    int ret = 0;
+    int i = 0;
+    BDRVQcowState *s = bs->opaque;
+    QCowHashElement *dedup_hash, *next_dedup_hash;
+
+
+    QTAILQ_FOREACH_SAFE(dedup_hash, &ds->undedupables, next, next_dedup_hash) {
+
+        ret = qcow2_store_dedup_hash(bs,
+                                     dedup_hash,
+                                     logical_sect + i * s->cluster_sectors,
+                                     physical_sect + i * s->cluster_sectors);
+
+        QTAILQ_REMOVE(&ds->undedupables, dedup_hash, next);
+        g_free(dedup_hash);
+
+        if (ret < 0) {
+            break;
+        }
+
+        i++;
+
+        if (i == count) {
+            break;
+        }
+    }
+
+    return ret;
+}
diff --git a/block/qcow2.h b/block/qcow2.h
index a61e004..2b23dc3 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -456,5 +456,10 @@ int qcow2_dedup(BlockDriverState *bs,
                 uint64_t sector_num,
                 uint8_t *data,
                 int data_nr);
+int qcow2_dedup_store_new_hashes(BlockDriverState *bs,
+                                 QCowDedupState *ds,
+                                 int count,
+                                 uint64_t logical_sect,
+                                 uint64_t physical_sect);
 
 #endif
-- 
1.7.10.4

  parent reply	other threads:[~2013-01-02 16:17 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-02 16:16 [Qemu-devel] [RFC V4 00/30] QCOW2 deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 01/30] qcow2: Add deduplication to the qcow2 specification Benoît Canet
2013-01-03 18:18   ` Eric Blake
2013-01-04 14:49     ` Benoît Canet
2013-01-16 14:50     ` Benoît Canet
2013-01-16 15:58       ` Eric Blake
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 02/30] qcow2: Add deduplication structures and fields Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 03/30] qcow2: Add qcow2_dedup_read_missing_and_concatenate Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 04/30] qcow2: Make update_refcount public Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 05/30] qcow2: Create a way to link to l2 tables when deduplicating Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 06/30] qcow2: Add qcow2_dedup and related functions Benoît Canet
2013-01-02 16:16 ` Benoît Canet [this message]
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 08/30] qcow2: Implement qcow2_compute_cluster_hash Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 09/30] qcow2: Extract qcow2_dedup_grow_table Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 10/30] qcow2: Add qcow2_dedup_grow_table and use it Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 11/30] qcow2: create function to load deduplication hashes at startup Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 12/30] qcow2: Load and save deduplication table header extension Benoît Canet
2013-01-05  0:02   ` Eric Blake
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 13/30] qcow2: Extract qcow2_do_table_init Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 14/30] qcow2-cache: Allow to choose table size at creation Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 15/30] qcow2: Add qcow2_dedup_init and qcow2_dedup_close Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 16/30] qcow2: Extract qcow2_add_feature and qcow2_remove_feature Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 17/30] block: Add qemu-img dedup create option Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 18/30] qcow2: Behave correctly when refcount reach 0 or 2^16 Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 19/30] qcow2: Integrate deduplication in qcow2_co_writev loop Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 20/30] qcow2: Serialize write requests when deduplication is activated Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 21/30] qcow2: Add verification of dedup table Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 22/30] qcow2: Adapt checking of QCOW_OFLAG_COPIED for dedup Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 23/30] qcow2: Add check_dedup_l2 in order to check l2 of dedup table Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 24/30] qcow2: Do not overwrite existing entries with QCOW_OFLAG_COPIED Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 25/30] qcow2: Integrate SKEIN hash algorithm in deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 26/30] qcow2: Add lazy refcounts to deduplication to prevent qcow2_cache_set_dependency loops Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 27/30] qcow2: Use large L2 table for deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 28/30] qcow: Set dedup cluster block size to 64KB Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 29/30] qcow2: init and cleanup deduplication Benoît Canet
2013-01-02 16:16 ` [Qemu-devel] [RFC V4 30/30] qemu-iotests: Filter dedup=on/off so existing tests don't break Benoît Canet
2013-01-02 16:42   ` Eric Blake
2013-01-02 16:50     ` Benoît Canet
2013-01-02 17:10 ` [Qemu-devel] [RFC V4 00/30] QCOW2 deduplication Troy Benjegerdes
2013-01-02 17:33   ` Benoît Canet
2013-01-02 18:01     ` Eric Blake
2013-01-02 18:16       ` Benoît Canet
2013-01-02 18:26     ` Troy Benjegerdes
2013-01-02 18:40       ` Benoît Canet
2013-01-02 18:47         ` ronnie sahlberg
2013-01-02 18:55           ` Benoît Canet
2013-01-02 19:18           ` Troy Benjegerdes
2013-01-03  2:16             ` ronnie sahlberg
2013-01-03 12:39       ` Stefan Hajnoczi
2013-01-03 19:51         ` Troy Benjegerdes
2013-01-04  7:09           ` Dietmar Maurer
2013-01-04  9:49           ` Stefan Hajnoczi
2013-01-03 17:18 ` Benoît Canet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1357143393-29832-8-git-send-email-benoit@irqsave.net \
    --to=benoit@irqsave.net \
    --cc=kwolf@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).