[Qemu-devel] [PATCH] CPU consumption optimization of 'qemu-img convert' using bdrv_is_allocated()

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Dmitry Konishchev <konishchev@gmail.com>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>, Dmitry Konishchev <konishchev@gmail.com>
Subject: [Qemu-devel] [PATCH] CPU consumption optimization of 'qemu-img convert' using bdrv_is_allocated()
Date: Wed,  8 Jun 2011 18:50:25 +0400	[thread overview]
Message-ID: <1307544625-22907-1-git-send-email-konishchev@gmail.com> (raw)

This patch optimizes 'qemu-img convert' operation for volumes which are
almost fully unallocated. Here are the results of simple tests:

We have a snapshot of a volume:
$ qemu-img info snapshot.qcow2
image: snapshot.qcow2
file format: qcow2
virtual size: 5.0G (5372805120 bytes)
disk size: 4.0G
cluster_size: 65536

Create a volume from the snapshot and use it a little:
$ qemu-img create -f qcow2 -o backing_file=snapshot.qcow2 volume.qcow2

For volumes which are almost fully allocated we have a little regression:
$ time qemu-img convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  2m43.864s
user  0m9.257s
sys   0m40.559s
$ time qemu-img-patched convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  2m46.899s
user  0m9.749s
sys	  0m40.471s

But now create a volume which is almost fully unallocated:
$ qemu-img create -f qcow2 -o backing_file=snapshot.qcow2 volume.qcow2 1T

And now we have more than twice decreased CPU consumption:
$ time qemu-img convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  6m40.985s
user  4m13.832s
sys   0m33.738s
$ time qemu-img-patched convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  4m28.448s
user  1m43.882s
sys   0m33.894s

Signed-off-by: Dmitry Konishchev <konishchev@gmail.com>
---
 qemu-img.c |  168 +++++++++++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 127 insertions(+), 41 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..9d905ed 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -38,6 +38,8 @@ typedef struct img_cmd_t {
     int (*handler)(int argc, char **argv);
 } img_cmd_t;
 
+static const int SECTOR_SIZE = 512;
+
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. */
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
 
@@ -531,7 +533,7 @@ static int is_not_zero(const uint8_t *sector, int len)
 }
 
 /*
- * Returns true iff the first sector pointed to by 'buf' contains at least
+ * Returns true if the first sector pointed to by 'buf' contains at least
  * a non-NUL byte.
  *
  * 'pnum' is set to the number of sectors (including and immediately following
@@ -590,15 +592,15 @@ static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n,
 
 static int img_convert(int argc, char **argv)
 {
-    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
+    int c, ret = 0, n, cur_n, bs_n, bs_i, compress, cluster_size, cluster_sectors;
     int progress = 0;
     const char *fmt, *out_fmt, *out_baseimg, *out_filename;
     BlockDriver *drv, *proto_drv;
     BlockDriverState **bs = NULL, *out_bs = NULL;
     int64_t total_sectors, nb_sectors, sector_num, bs_offset;
     uint64_t bs_sectors;
+    uint64_t *bs_geometry = NULL;
     uint8_t * buf = NULL;
-    const uint8_t *buf1;
     BlockDriverInfo bdi;
     QEMUOptionParameter *param = NULL, *create_options = NULL;
     QEMUOptionParameter *out_baseimg_param;
@@ -874,14 +876,21 @@ static int img_convert(int argc, char **argv)
         /* signal EOF to align */
         bdrv_write_compressed(out_bs, 0, NULL, 0);
     } else {
+        int backing_depth;
+        int bs_i_prev = -1;
+        float progress = 100;
+        BlockDriverState *cur_bs;
         int has_zero_init = bdrv_has_zero_init(out_bs);
 
         sector_num = 0; // total number of sectors converted so far
         nb_sectors = total_sectors - sector_num;
-        local_progress = (float)100 /
-            (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
 
         for(;;) {
+            if (total_sectors) {
+                progress = (long double) sector_num / total_sectors * 100;
+            }
+            qemu_progress_print(progress, 0);
+
             nb_sectors = total_sectors - sector_num;
             if (nb_sectors <= 0) {
                 break;
@@ -893,15 +902,38 @@ static int img_convert(int argc, char **argv)
             }
 
             while (sector_num - bs_offset >= bs_sectors) {
-                bs_i ++;
-                assert (bs_i < bs_n);
+                bs_i++;
+                assert(bs_i < bs_n);
                 bs_offset += bs_sectors;
                 bdrv_get_geometry(bs[bs_i], &bs_sectors);
+
                 /* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
                   "bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
                    sector_num, bs_i, bs_offset, bs_sectors); */
             }
 
+            if (bs_i != bs_i_prev) {
+                /* Getting geometry of the image and all its backing images */
+
+                backing_depth = 1;
+                cur_bs = bs[bs_i];
+                while (( cur_bs = cur_bs->backing_hd )) {
+                    backing_depth++;
+                }
+
+                bs_geometry = (uint64_t *) qemu_realloc(
+                    bs_geometry, backing_depth * sizeof(uint64_t));
+
+                backing_depth = 1;
+                cur_bs = bs[bs_i];
+                *bs_geometry = bs_sectors;
+                while (( cur_bs = cur_bs->backing_hd )) {
+                    bdrv_get_geometry(cur_bs, bs_geometry + backing_depth++);
+                }
+
+                bs_i_prev = bs_i;
+            }
+
             if (n > bs_offset + bs_sectors - sector_num) {
                 n = bs_offset + bs_sectors - sector_num;
             }
@@ -912,55 +944,109 @@ static int img_convert(int argc, char **argv)
                    are present in both the output's and input's base images (no
                    need to copy them). */
                 if (out_baseimg) {
-                    if (!bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
-                                           n, &n1)) {
-                        sector_num += n1;
+                    if (!bdrv_is_allocated(bs[bs_i], sector_num - bs_offset, n, &cur_n)) {
+                        sector_num += cur_n;
                         continue;
                     }
-                    /* The next 'n1' sectors are allocated in the input image. Copy
+                    /* The next 'cur_n' sectors are allocated in the input image. Copy
                        only those as they may be followed by unallocated sectors. */
-                    n = n1;
+                    n = cur_n;
                 }
-            } else {
-                n1 = n;
             }
 
-            ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
-            if (ret < 0) {
-                error_report("error while reading");
-                goto out;
-            }
-            /* NOTE: at the same time we convert, we do not write zero
-               sectors to have a chance to compress the image. Ideally, we
-               should add a specific call to have the info to go faster */
-            buf1 = buf;
-            while (n > 0) {
-                /* If the output image is being created as a copy on write image,
-                   copy all sectors even the ones containing only NUL bytes,
-                   because they may differ from the sectors in the base image.
-
-                   If the output is to a host device, we also write out
-                   sectors that are entirely 0, since whatever data was
-                   already there is garbage, not 0s. */
-                if (!has_zero_init || out_baseimg ||
-                    is_allocated_sectors(buf1, n, &n1)) {
-                    ret = bdrv_write(out_bs, sector_num, buf1, n1);
-                    if (ret < 0) {
-                        error_report("error while writing");
-                        goto out;
+            /* If the output image is being created as a copy on write image,
+               copy all sectors even the ones containing only zero bytes,
+               because they may differ from the sectors in the base image.
+
+               If the output is to a host device, we also write out
+               sectors that are entirely 0, since whatever data was
+               already there is garbage, not 0s. */
+            if (!has_zero_init || out_baseimg) {
+                ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
+                if (ret < 0) {
+                    error_report("error while reading");
+                    goto out;
+                }
+
+                ret = bdrv_write(out_bs, sector_num, buf, n);
+                if (ret < 0) {
+                    error_report("error while writing");
+                    goto out;
+                }
+
+                sector_num += n;
+            } else {
+                /* Look for the sectors in the image and if they are not
+                   allocated - sequentially in all its backing images.
+
+                   Write only non-zero bytes to the output image. */
+
+                uint64_t cur_sectors;
+                uint64_t bs_sector;
+                int allocated_num;
+                int sector_found;
+
+                while (n > 0) {
+                    cur_bs = bs[bs_i];
+                    bs_sector = sector_num - bs_offset;
+                    backing_depth = 0;
+                    sector_found = 0;
+
+                    do {
+                        cur_sectors = bs_geometry[backing_depth++];
+
+                        if (bs_sector >= cur_sectors) {
+                            continue;
+                        }
+
+                        if (bs_sector + n <= cur_sectors) {
+                            cur_n = n;
+                        } else {
+                            cur_n = cur_sectors - bs_sector;
+                        }
+
+                        if (bdrv_is_allocated(cur_bs, bs_sector, cur_n, &allocated_num)) {
+                            const uint8_t *cur_buf = buf;
+                            sector_found = 1;
+
+                            ret = bdrv_read(cur_bs, bs_sector, buf, allocated_num);
+                            if (ret < 0) {
+                                error_report("error while reading");
+                                goto out;
+                            }
+
+                            while (allocated_num > 0) {
+                                if (is_allocated_sectors(cur_buf, allocated_num, &cur_n)) {
+                                    ret = bdrv_write(out_bs, sector_num, cur_buf, cur_n);
+                                    if (ret < 0) {
+                                        error_report("error while writing");
+                                        goto out;
+                                    }
+                                }
+
+                                n -= cur_n;
+                                sector_num += cur_n;
+                                allocated_num -= cur_n;
+                                cur_buf += cur_n * SECTOR_SIZE;
+                            }
+
+                            break;
+                        }
+                    } while(( cur_bs = cur_bs->backing_hd ));
+
+                    if (!sector_found) {
+                        sector_num++;
+                        n--;
                     }
                 }
-                sector_num += n1;
-                n -= n1;
-                buf1 += n1 * 512;
             }
-            qemu_progress_print(local_progress, 100);
         }
     }
 out:
     qemu_progress_end();
     free_option_parameters(create_options);
     free_option_parameters(param);
+    qemu_free(bs_geometry);
     qemu_free(buf);
     if (out_bs) {
         bdrv_delete(out_bs);
-- 
1.7.4.1

next             reply	other threads:[~2011-06-08 14:51 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-08 14:50 Dmitry Konishchev [this message]
2011-06-13  8:26 ` [Qemu-devel] [PATCH] CPU consumption optimization of 'qemu-img convert' using bdrv_is_allocated() Stefan Hajnoczi
2011-06-13  9:13   ` Dmitry Konishchev
2011-06-14  7:43     ` Dmitry Konishchev
2011-06-14 15:58       ` Stefan Hajnoczi
2011-06-15  7:38         ` Dmitry Konishchev
2011-06-15  8:39           ` Stefan Hajnoczi
2011-06-15  9:50             ` Dmitry Konishchev
2011-06-15 12:02               ` Stefan Hajnoczi
2011-06-15 13:14                 ` Dmitry Konishchev
2011-06-15 13:33                   ` Stefan Hajnoczi
2011-06-15 13:37                     ` Dmitry Konishchev
2011-06-15 13:57                       ` Stefan Hajnoczi
2011-06-16  8:10                         ` Dmitry Konishchev
2011-06-17  7:25                           ` Stefan Hajnoczi
2011-06-15 10:14   ` Dmitry Konishchev

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4f162d1 dfblob:9d905ed )
 OR (
bs:"[Qemu-devel] [PATCH] CPU consumption optimization of 'qemu-img convert' using bdrv_is_allocated()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1307544625-22907-1-git-send-email-konishchev@gmail.com \
    --to=konishchev@gmail.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).