From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:58660)
	by lists.gnu.org with esmtp (Exim 4.71) (envelope-from <pl@kamp.de>)
	id 1VlIsc-00024u-Cb
	for qemu-devel@nongnu.org; Tue, 26 Nov 2013 08:40:47 -0500
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <pl@kamp.de>) id 1VlIsW-0000yX-H0
	for qemu-devel@nongnu.org; Tue, 26 Nov 2013 08:40:42 -0500
Received: from mx.ipv6.kamp.de ([2a02:248:0:51::16]:55137 helo=mx01.kamp.de)
	by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from <pl@kamp.de>)
	id 1VlIsW-0000yC-1r
	for qemu-devel@nongnu.org; Tue, 26 Nov 2013 08:40:36 -0500
Message-ID: <5294A4E1.9000108@kamp.de>
Date: Tue, 26 Nov 2013 14:40:49 +0100
From: Peter Lieven <pl@kamp.de>
MIME-Version: 1.0
References: <1385456183-24840-1-git-send-email-pl@kamp.de>
	<1385456183-24840-2-git-send-email-pl@kamp.de>
	<529471CE.9010407@redhat.com>
In-Reply-To: <529471CE.9010407@redhat.com>
Content-Type: text/plain; charset=ISO-8859-15; format=flowed
Content-Transfer-Encoding: 8bit
Subject: Re: [Qemu-devel] [PATCHv2 1.8 1/9] qemu-img: add support for
 skipping zeroes in input during convert
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: kwolf@redhat.com, qemu-devel@nongnu.org, stefanha@redhat.com

On 26.11.2013 11:02, Paolo Bonzini wrote:
> Il 26/11/2013 09:56, Peter Lieven ha scritto:
>> we currently do not check if a sector is allocated during convert.
>> This means if a sector is unallocated that we allocate a bounce
>> buffer of zeroes, find out its zero later and do not write it
>> in the best case. In the worst case this can lead to reading
>> blocks from a raw device (like iSCSI) altough we could easily
>> know via get_block_status that they are zero and simply skip them.
>>
>> This patch also fixes the progress output not being at 100% after
>> a successful conversion.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   qemu-img.c |   85 ++++++++++++++++++++++++++++++++++--------------------------
>>   1 file changed, 48 insertions(+), 37 deletions(-)
>>
>> diff --git a/qemu-img.c b/qemu-img.c
>> index dc0c2f0..efb744c 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -1125,13 +1125,15 @@ out3:
>>   
>>   static int img_convert(int argc, char **argv)
>>   {
>> -    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size,
>> +    int c, n, n1, bs_n, bs_i, compress, cluster_size,
>>           cluster_sectors, skip_create;
>> +    int64_t ret = 0;
>>       int progress = 0, flags;
>>       const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
>>       BlockDriver *drv, *proto_drv;
>>       BlockDriverState **bs = NULL, *out_bs = NULL;
>> -    int64_t total_sectors, nb_sectors, sector_num, bs_offset;
>> +    int64_t total_sectors, nb_sectors, sector_num, bs_offset,
>> +            sector_num_next_status = 0;
>>       uint64_t bs_sectors;
>>       uint8_t * buf = NULL;
>>       const uint8_t *buf1;
>> @@ -1140,7 +1142,6 @@ static int img_convert(int argc, char **argv)
>>       QEMUOptionParameter *out_baseimg_param;
>>       char *options = NULL;
>>       const char *snapshot_name = NULL;
>> -    float local_progress = 0;
>>       int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
>>       bool quiet = false;
>>       Error *local_err = NULL;
>> @@ -1403,10 +1404,6 @@ static int img_convert(int argc, char **argv)
>>           sector_num = 0;
>>   
>>           nb_sectors = total_sectors;
>> -        if (nb_sectors != 0) {
>> -            local_progress = (float)100 /
>> -                (nb_sectors / MIN(nb_sectors, cluster_sectors));
>> -        }
>>   
>>           for(;;) {
>>               int64_t bs_num;
>> @@ -1464,7 +1461,7 @@ static int img_convert(int argc, char **argv)
>>                   }
>>               }
>>               sector_num += n;
>> -            qemu_progress_print(local_progress, 100);
>> +            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
>>           }
>>           /* signal EOF to align */
>>           bdrv_write_compressed(out_bs, 0, NULL, 0);
>> @@ -1481,21 +1478,13 @@ static int img_convert(int argc, char **argv)
>>   
>>           sector_num = 0; // total number of sectors converted so far
>>           nb_sectors = total_sectors - sector_num;
>> -        if (nb_sectors != 0) {
>> -            local_progress = (float)100 /
>> -                (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
>> -        }
>>   
>>           for(;;) {
>>               nb_sectors = total_sectors - sector_num;
>>               if (nb_sectors <= 0) {
>> +                ret = 0;
>>                   break;
>>               }
>> -            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
>> -                n = (IO_BUF_SIZE / 512);
>> -            } else {
>> -                n = nb_sectors;
>> -            }
>>   
>>               while (sector_num - bs_offset >= bs_sectors) {
>>                   bs_i ++;
>> @@ -1507,34 +1496,53 @@ static int img_convert(int argc, char **argv)
>>                      sector_num, bs_i, bs_offset, bs_sectors); */
>>               }
>>   
>> -            if (n > bs_offset + bs_sectors - sector_num) {
>> -                n = bs_offset + bs_sectors - sector_num;
>> -            }
>> -
>> -            /* If the output image is being created as a copy on write image,
>> -               assume that sectors which are unallocated in the input image
>> -               are present in both the output's and input's base images (no
>> -               need to copy them). */
>> -            if (out_baseimg) {
>> -                ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
>> -                                        n, &n1);
>> +            if ((out_baseimg || has_zero_init) &&
>> +                sector_num >= sector_num_next_status) {
>> +                n = nb_sectors > INT_MAX ? INT_MAX : nb_sectors;
>> +                ret = bdrv_get_block_status(bs[bs_i], sector_num - bs_offset,
>> +                                            n, &n1);
>>                   if (ret < 0) {
>> -                    error_report("error while reading metadata for sector "
>> -                                 "%" PRId64 ": %s",
>> -                                 sector_num - bs_offset, strerror(-ret));
>> +                    error_report("error while reading block status of sector %"
>> +                                 PRId64 ": %s", sector_num - bs_offset,
>> +                                 strerror(-ret));
>>                       goto out;
>>                   }
>> -                if (!ret) {
>> +                /* If the output image is zero initialized, we are not working
>> +                 * on a shared base and the input is zero we can skip the next
>> +                 * n1 sectors */
>> +                if (has_zero_init && !out_baseimg && ret & BDRV_BLOCK_ZERO) {
> has_zero_init will imply !out_baseimg if skip_create == false.  Should
> we care and check out_baseimg separately if skip_create == true?  If
> not, you can remove "&& !out_baseimg".
I would leave it in it doesn't hurt.
>
> Also, please add parentheses around "ret & BDRV_BLOCK_ZERO".
Ok.
>
>>                       sector_num += n1;
>>                       continue;
>>                   }
>> -                /* The next 'n1' sectors are allocated in the input image. Copy
>> -                   only those as they may be followed by unallocated sectors. */
>> -                n = n1;
>> +                /* If the output image is being created as a copy on write
>> +                 * image, assume that sectors which are unallocated in the
>> +                 * input image are present in both the output's and input's
>> +                 * base images (no need to copy them). */
>> +                if (out_baseimg) {
>> +                    if (!(ret & BDRV_BLOCK_DATA)) {
>> +                        sector_num += n1;
>> +                        continue;
>> +                    }
>> +                    /* The next 'n1' sectors are allocated in the input image.
>> +                     * Copy only those as they may be followed by unallocated
>> +                     * sectors. */
>> +                    nb_sectors = n1;
>> +                }
>> +                /* avoid redundant callouts to get_block_status */
>> +                sector_num_next_status = sector_num + n1;
>> +            }
>> +
>> +            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
>> +                n = (IO_BUF_SIZE / 512);
>>               } else {
>> -                n1 = n;
>> +                n = nb_sectors;
>>               }
>>   
>> +            if (n > bs_offset + bs_sectors - sector_num) {
>> +                n = bs_offset + bs_sectors - sector_num;
>> +            }
>> +            n1 = n;
> Please change this to:
>
> n = MIN(nb_sectors, IO_BUF_SIZE / 512);
> n = MIN(n, bs_sectors - (sector_num - bs_offset));
> n1 = n;

Thats just copy and paste, I will change it.

I was thinking if it would be a good improvement (separate patch) to
scan the whole source image for holes and account only allocated sectors
in the progress display. It would be much more accurate then.

Peter
>
> Otherwise looks good.
>
> Thanks,
>
> Paolo
>
>>               ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
>>               if (ret < 0) {
>>                   error_report("error while reading sector %" PRId64 ": %s",
>> @@ -1559,10 +1567,13 @@ static int img_convert(int argc, char **argv)
>>                   n -= n1;
>>                   buf1 += n1 * 512;
>>               }
>> -            qemu_progress_print(local_progress, 100);
>> +            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
>>           }
>>       }
>>   out:
>> +    if (!ret) {
>> +        qemu_progress_print(100, 0);
>> +    }
>>       qemu_progress_end();
>>       free_option_parameters(create_options);
>>       free_option_parameters(param);
>>


-- 

Mit freundlichen Grüßen

Peter Lieven

...........................................................

   KAMP Netzwerkdienste GmbH
   Vestische Str. 89-91 | 46117 Oberhausen
   Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40
   pl@kamp.de | http://www.kamp.de

   Geschäftsführer: Heiner Lante | Michael Lante
   Amtsgericht Duisburg | HRB Nr. 12154
   USt-Id-Nr.: DE 120607556

...........................................................