qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Jun Li <junmuzi@gmail.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, juli@redhat.com, famz@redhat.com,
	Jun Li <junmuzi@gmail.com>,
	stefanha@redhat.com
Subject: [Qemu-devel] [PATCH v5 1/3] qcow2: Add qcow2_shrink_l1_and_l2_table for qcow2 shrinking
Date: Sun, 26 Oct 2014 23:20:47 +0800	[thread overview]
Message-ID: <1414336849-21179-2-git-send-email-junmuzi@gmail.com> (raw)
In-Reply-To: <1414336849-21179-1-git-send-email-junmuzi@gmail.com>

This patch is the realization of new function qcow2_shrink_l1_and_l2_table.
This function will shrink/discard l1 and l2 table when do qcow2 shrinking.

Signed-off-by: Jun Li <junmuzi@gmail.com>
---
v5:
  Do some modifications based on MAX's suggestion. Thanks for MAX.
  In v5, do l2 shrinking firstly, then do l1 shrinking in function qcow2_shrink_l1_and_l2_table. As do l1 shrinking need to allocate some clusters for new l1 table, so in v5 it can re-use the freed clusters come from l2 shrinking.

v4:
  Add deal with COW clusters in l2 table. When using COW, some of (l2_entry >>
s->cluster_bits) will larger than s->refcount_table_size, so need to discard
this l2_entry.

v3:
  Fixed host cluster leak.
---
 block/qcow2-cluster.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.c         |  37 +++++++++-
 block/qcow2.h         |   2 +
 3 files changed, 218 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4d888c7..28d2d62 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -29,6 +29,9 @@
 #include "block/qcow2.h"
 #include "trace.h"
 
+static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
+                   uint64_t **l2_table);
+
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size)
 {
@@ -135,6 +138,185 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     return ret;
 }
 
+int qcow2_shrink_l1_and_l2_table(BlockDriverState *bs, uint64_t new_l1_size,
+                                 int new_l2_index, int64_t boundary_size)
+{
+    BDRVQcowState *s = bs->opaque;
+    int new_l1_size2, ret, i;
+    uint64_t *new_l1_table;
+    int64_t new_l1_table_offset;
+    int64_t old_l1_table_offset, old_l1_size;
+    uint8_t data[12];
+    uint64_t l2_offset;
+    uint64_t *l2_table, l2_entry;
+    int64_t l2_free_entry; /* The entry of l2 table need to free from */
+    uint64_t *old_l1_table = s->l1_table;
+    int num = s->l1_size - new_l1_size;
+
+    assert(new_l1_size <= s->l1_size);
+    while ((num >= -1) && (s->l1_size + num - 1 >= 0)) {
+        l2_free_entry = 0;
+        l2_offset = old_l1_table[s->l1_size + num - 1] & L1E_OFFSET_MASK;
+
+        if (l2_offset == 0) {
+            goto retry;
+        }
+
+        if (num == 0) {
+            if (new_l2_index == 0) {
+                goto retry;
+            }
+            l2_free_entry = new_l2_index;
+        }
+
+        /* load l2_table into cache */
+        ret = l2_load(bs, l2_offset, &l2_table);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        for (i = s->l2_size - 1; i >= 0; i--) {
+            l2_entry = be64_to_cpu(l2_table[i]);
+
+            /* Due to COW, the clusters in l2 table will
+             * not in sequential order, so there will be
+             * some l2_entry >= boundary_size when perform shrinking.
+             */
+            if (num == -1) {
+                if (l2_entry >= boundary_size) {
+                    goto free_cluster;
+                } else {
+                    continue;
+                }
+            }
+
+            /* Deal with COW clusters in l2 table when num == 0 */
+            if (i <= l2_free_entry - 1) {
+                if (l2_entry >= boundary_size) {
+                    goto free_cluster;
+                }
+                continue;
+            }
+
+            switch (qcow2_get_cluster_type(l2_entry)) {
+            case QCOW2_CLUSTER_UNALLOCATED:
+                if (!bs->backing_hd) {
+                    continue;
+                }
+                break;
+
+            case QCOW2_CLUSTER_ZERO:
+                continue;
+
+            case QCOW2_CLUSTER_NORMAL:
+            case QCOW2_CLUSTER_COMPRESSED:
+                break;
+
+            default:
+                abort();
+            }
+
+        free_cluster:
+            qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+
+            if (s->qcow_version >= 3) {
+                l2_table[i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+            } else {
+                l2_table[i] = cpu_to_be64(0);
+            }
+
+            /* Then decrease the refcount */
+            qcow2_free_any_clusters(bs, l2_entry, 1, QCOW2_DISCARD_MAX);
+        }
+
+        ret = qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+        if (ret < 0) {
+            return ret;
+        }
+        if (l2_free_entry == 0 && num != -1) {
+            qemu_vfree(l2_table);
+            qcow2_free_clusters(bs, l2_offset, s->cluster_size - 1,
+                                QCOW2_DISCARD_OTHER);
+        }
+    retry:
+        num--;
+    }
+
+    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
+    new_l1_table = qemu_try_blockalign(bs->file,
+                                       align_offset(new_l1_size2, 512));
+    if (new_l1_table == NULL) {
+        return -ENOMEM;
+    }
+    memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
+
+    /* shrinking l1 table */
+    memcpy(new_l1_table, s->l1_table, new_l1_size2);
+
+    /* write new table (align to cluster) */
+    new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
+
+    if ((new_l1_table_offset) >= boundary_size) {
+        goto fail;
+    }
+
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* the L1 position has not yet been updated, so these clusters must
+     * indeed be completely free */
+    ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
+                                        new_l1_size2);
+
+    if (ret < 0) {
+        goto fail;
+    }
+
+    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
+
+    for (i = 0; i < new_l1_size; i++) {
+        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
+    }
+
+    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset,
+                           new_l1_table, new_l1_size2);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    for (i = 0; i < new_l1_size; i++) {
+        new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
+    }
+
+    /* set new table */
+    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
+    cpu_to_be32w((uint32_t *)data, new_l1_size);
+    stq_be_p(data + 4, new_l1_table_offset);
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
+                           data, sizeof(data));
+    if (ret < 0) {
+        goto fail;
+    }
+
+    qemu_vfree(s->l1_table);
+    old_l1_table_offset = s->l1_table_offset;
+    s->l1_table_offset = new_l1_table_offset;
+    s->l1_table = new_l1_table;
+    old_l1_size = s->l1_size;
+    s->l1_size = new_l1_size;
+    qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t),
+                        QCOW2_DISCARD_OTHER);
+    return 0;
+ fail:
+    qemu_vfree(new_l1_table);
+    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
+                        QCOW2_DISCARD_OTHER);
+    return ret;
+}
+
 /*
  * l2_load
  *
diff --git a/block/qcow2.c b/block/qcow2.c
index d031515..d2b0dfe 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2111,10 +2111,41 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
         return -ENOTSUP;
     }
 
-    /* shrinking is currently not supported */
+    /* shrinking image */
     if (offset < bs->total_sectors * 512) {
-        error_report("qcow2 doesn't support shrinking images yet");
-        return -ENOTSUP;
+        /* As l1 table, l2 table, refcount table, refcount block table
+         * and file header of the qcow2 image need to use some clusters,
+         * so should subtract these metadata from offset.
+         */
+        int64_t nb_l1 = DIV_ROUND_UP((uint64_t)s->l1_size * sizeof(uint64_t),
+                                     s->cluster_size);
+        int64_t nb_l2 = DIV_ROUND_UP(offset, (uint64_t)s->l2_size <<
+                                     s->cluster_bits);
+        int64_t nb_refcount_block_table = DIV_ROUND_UP(offset, (uint64_t)
+                                                       s->cluster_size <<
+                                                       s->refcount_block_bits);
+        int64_t nb_refcount_table = DIV_ROUND_UP(nb_refcount_block_table << 3,
+                                                 s->cluster_size);
+        int64_t total_nb = 2 * nb_l2 + nb_l1 + nb_refcount_block_table +
+                           nb_refcount_table + 1;
+        int64_t offset_for_shrink = offset - (total_nb << s->cluster_bits);
+        int new_l2_index = offset_to_l2_index(s, offset_for_shrink);
+
+        new_l1_size = size_to_l1(s, offset_for_shrink);
+        ret = qcow2_shrink_l1_and_l2_table(bs, new_l1_size, new_l2_index,
+                                           offset);
+        if (ret < 0) {
+            return ret;
+        }
+
+        int64_t actual_size = bdrv_get_allocated_file_size(bs);
+
+        if (offset < actual_size) {
+            ret = bdrv_truncate(bs->file, offset);
+            if (ret < 0) {
+                return ret;
+            }
+        }
     }
 
     new_l1_size = size_to_l1(s, offset);
diff --git a/block/qcow2.h b/block/qcow2.h
index 577ccd1..be1237d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -516,6 +516,8 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size);
+int qcow2_shrink_l1_and_l2_table(BlockDriverState *bs, uint64_t new_l1_size,
+                                 int new_l2_index, int64_t boundary_size);
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 void qcow2_l2_cache_reset(BlockDriverState *bs);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-- 
1.9.3

  reply	other threads:[~2014-10-26 15:21 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-26 15:20 [Qemu-devel] [PATCH v5 0/3] qcow2: Patch for shrinking qcow2 disk image Jun Li
2014-10-26 15:20 ` Jun Li [this message]
2014-11-21 10:56   ` [Qemu-devel] [PATCH v5 1/3] qcow2: Add qcow2_shrink_l1_and_l2_table for qcow2 shrinking Max Reitz
2014-11-24 17:49     ` Eric Blake
2015-01-03 12:23     ` Jun Li
2015-01-15 18:47       ` Max Reitz
2015-01-19 13:16         ` Jun Li
2015-01-22 19:14           ` Max Reitz
2015-01-27 14:06             ` Jun Li
2014-10-26 15:20 ` [Qemu-devel] [PATCH v5 2/3] qcow2: add update refcount table realization for update_refcount Jun Li
2014-11-21 12:41   ` Max Reitz
2014-11-24 18:11     ` Eric Blake
2014-10-26 15:20 ` [Qemu-devel] [PATCH v5 3/3] qcow2: Add qemu-iotests for qcow2 shrinking Jun Li
2014-11-21 13:01   ` Max Reitz
2014-11-10  8:36 ` [Qemu-devel] [PATCH v5 0/3] qcow2: Patch for shrinking qcow2 disk image Jun Li
2014-11-10  9:17   ` Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1414336849-21179-2-git-send-email-junmuzi@gmail.com \
    --to=junmuzi@gmail.com \
    --cc=famz@redhat.com \
    --cc=juli@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).