qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: John Snow <jsnow@redhat.com>
To: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>,
	qemu-devel@nongnu.org
Cc: kwolf@redhat.com, famz@redhat.com, qemu-block@nongnu.org,
	mreitz@redhat.com, stefanha@redhat.com, den@openvz.org,
	pbonzini@redhat.com
Subject: Re: [Qemu-devel] [PATCH 06/22] hbitmap: load/store
Date: Mon, 28 Mar 2016 16:20:16 -0400	[thread overview]
Message-ID: <56F99200.1040706@redhat.com> (raw)
In-Reply-To: <56F2525E.3000509@virtuozzo.com>



On 03/23/2016 04:22 AM, Vladimir Sementsov-Ogievskiy wrote:
> On 23.03.2016 00:49, John Snow wrote:
>>
>> On 03/15/2016 04:04 PM, Vladimir Sementsov-Ogievskiy wrote:
>>> Add functions for load/store HBitmap to BDS, using clusters table:
>>> Last level of the bitmap is splitted into chunks of 'cluster_size'
>>> size. Each cell of the table contains offset in bds, to load/store
>>> corresponding chunk.
>>>
>>> Also,
>>>      0 in cell means all-zeroes-chunk (should not be saved)
>>>      1 in cell means all-ones-chunk (should not be saved)
>>>      hbitmap_prepare_store() fills table with
>>>        0 for all-zeroes chunks
>>>        1 for all-ones chunks
>>>        2 for others
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> ---
>>>   block/dirty-bitmap.c         |  23 +++++
>>>   include/block/dirty-bitmap.h |  11 +++
>>>   include/qemu/hbitmap.h       |  12 +++
>>>   util/hbitmap.c               | 209
>>> +++++++++++++++++++++++++++++++++++++++++++
>>>   4 files changed, 255 insertions(+)
>>>
>>> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
>>> index e68c177..816c6ee 100644
>>> --- a/block/dirty-bitmap.c
>>> +++ b/block/dirty-bitmap.c
>>> @@ -396,3 +396,26 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap
>>> *bitmap)
>>>   {
>>>       return hbitmap_count(bitmap->bitmap);
>>>   }
>>> +
>>> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState
>>> *bs,
>>> +                           const uint64_t *table, uint32_t table_size,
>>> +                           uint32_t cluster_size)
>>> +{
>>> +    return hbitmap_load(bitmap->bitmap, bs, table, table_size,
>>> cluster_size);
>>> +}
>>> +
>>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>>> +                                    uint32_t cluster_size,
>>> +                                    uint64_t *table,
>>> +                                    uint32_t *table_size)
>>> +{
>>> +    return hbitmap_prepare_store(bitmap->bitmap, cluster_size,
>>> +                                 table, table_size);
>>> +}
>>> +
>>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap,
>>> BlockDriverState *bs,
>>> +                            const uint64_t *table, uint32_t table_size,
>>> +                            uint32_t cluster_size)
>>> +{
>>> +    return hbitmap_store(bitmap->bitmap, bs, table, table_size,
>>> cluster_size);
>>> +}
>>> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
>>> index 27515af..20cb540 100644
>>> --- a/include/block/dirty-bitmap.h
>>> +++ b/include/block/dirty-bitmap.h
>>> @@ -43,4 +43,15 @@ void bdrv_set_dirty_iter(struct HBitmapIter *hbi,
>>> int64_t offset);
>>>   int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
>>>   void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
>>>   +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap,
>>> BlockDriverState *bs,
>>> +                           const uint64_t *table, uint32_t table_size,
>>> +                           uint32_t cluster_size);
>>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>>> +                                    uint32_t cluster_size,
>>> +                                    uint64_t *table,
>>> +                                    uint32_t *table_size);
>>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap,
>>> BlockDriverState *bs,
>>> +                            const uint64_t *table, uint32_t table_size,
>>> +                            uint32_t cluster_size);
>>> +
>>>   #endif
>>> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
>>> index 6d1da4d..d83bb79 100644
>>> --- a/include/qemu/hbitmap.h
>>> +++ b/include/qemu/hbitmap.h
>>> @@ -241,5 +241,17 @@ static inline size_t
>>> hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_c
>>>       return hbi->pos;
>>>   }
>>>   +typedef struct BlockDriverState BlockDriverState;
>>> +
>>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>>> +                 const uint64_t *table, uint32_t table_size,
>>> +                 uint32_t cluster_size);
>>> +
>>> +int hbitmap_prepare_store(const HBitmap *bitmap, uint32_t cluster_size,
>>> +                          uint64_t *table, uint32_t *table_size);
>>> +
>>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>>> +                  const uint64_t *table, uint32_t table_size,
>>> +                  uint32_t cluster_size);
>>>     #endif
>>> diff --git a/util/hbitmap.c b/util/hbitmap.c
>>> index 28595fb..1960e4f 100644
>>> --- a/util/hbitmap.c
>>> +++ b/util/hbitmap.c
>>> @@ -15,6 +15,8 @@
>>>   #include "qemu/host-utils.h"
>>>   #include "trace.h"
>>>   +#include "block/block.h"
>>> +
>>>   /* HBitmaps provides an array of bits.  The bits are stored as
>>> usual in an
>>>    * array of unsigned longs, but HBitmap is also optimized to
>>> provide fast
>>>    * iteration over set bits; going from one bit to the next is
>>> O(logB n)
>>> @@ -499,3 +501,210 @@ char *hbitmap_md5(const HBitmap *bitmap)
>>>       const guchar *data = (const guchar
>>> *)bitmap->levels[HBITMAP_LEVELS - 1];
>>>       return g_compute_checksum_for_data(G_CHECKSUM_MD5, data, size);
>>>   }
>>> +
>>> +/* hb_restore_levels()
>>> + * Using the last level restore all other levels
>>> + */
>>> +static void hb_restore_levels(HBitmap *bitmap)
>>> +{
>>> +    int64_t i, size, prev_size;
>>> +    int lev;
>>> +
>>> +    /* restore levels starting from penultimate to zero level, assuming
>>> +     * that the last level is ok */
>>> +    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL,
>>> 1);
>>> +    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
>>> +        prev_size = size;
>>> +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>>> +        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
>>> +
>>> +        for (i = 0; i < prev_size; ++i) {
>>> +            if (bitmap->levels[lev + 1][i]) {
>>> +                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
>>> +                    1UL << (i & (BITS_PER_LONG - 1));
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
>>> +}
>>> +
>>> +/* load_bitmap()
>>> + * Load dirty bitmap from file, using table with cluster offsets.
>>> + * Table entries are assumed to be in little endian format.
>>> + */
>>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>>> +                 const uint64_t *table, uint32_t table_size,
>>> +                 uint32_t cluster_size)
>>> +{
>>> +    uint32_t i;
>>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>>> +    uint8_t *end = cur + ((bitmap->size + 7) >> 3);
>>> +
>>> +    hbitmap_reset_all(bitmap);
>>> +
>>> +    for (i = 0; i < table_size && cur < end; ++i) {
>>> +        uint64_t offset = table[i];
>>> +        uint64_t count = MIN(cluster_size, end - cur);
>>> +
>>> +        /* Zero offset means zero region, offset = 1 means filled
>>> region.
>>> +         * Cluster is not allocated in both cases. */
>>> +        if (offset == 1) {
>>> +            memset(cur, 0xff, count);
>>> +        } else if (offset) {
>>> +            int ret = bdrv_pread(bs, offset, cur, count);
>>> +            if (ret < 0) {
>>> +                return ret;
>>> +            }
>>> +        }
>>> +
>>> +        cur += cluster_size;
>>> +    }
>>> +
>>> +    cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>>> +    while (cur < end) {
>>> +        if (BITS_PER_LONG == 32) {
>>> +            le32_to_cpus((uint32_t *)cur);
>>> +        } else {
>>> +            le64_to_cpus((uint64_t *)cur);
>>> +        }
>>> +
>>> +        cur += sizeof(unsigned long);
>>> +    }
>>> +
>>> +    hb_restore_levels(bitmap);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static bool buffer_is_all_ones(void *buf, size_t len)
>>> +{
>>> +    /* FIXME */
>>> +    return false;
>>> +}
>>> +
>>> +/* hbitmap_prepare_store()
>>> + * Devide bitmap data into clusters, and then,
>>> + * for zero cluster: table[i] = 0
>>> + * for all-ones cluster: table[i] = 1
>>> + * for other clusters: table[i] = 2
>>> + */
>>> +int hbitmap_prepare_store(const HBitmap *bitmap,
>>> +                          uint32_t cluster_size,
>>> +                          uint64_t *table,
>>> +                          uint32_t *table_size)
>>> +{
>>> +    HBitmapIter hbi;
>>> +    hbitmap_iter_init(&hbi, bitmap, 0);
>>> +    uint64_t nb_bits_in_cl = (uint64_t)cluster_size << 3;
>>> +    uint32_t need_table_size =
>>> +            (bitmap->size + nb_bits_in_cl - 1) / nb_bits_in_cl;
>>> +
>>> +    if (table == NULL && *table_size == 0) {
>>> +        *table_size = need_table_size;
>>> +        return 0;
>>> +    }
>>> +
>>> +    if (*table_size < need_table_size) {
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    memset(table, 0, *table_size * sizeof(table[0]));
>>> +
>>> +    for (;;) {
>>> +        unsigned long cur;
>>> +        size_t pos = hbitmap_iter_next_word(&hbi, &cur);
>>> +        size_t byte = pos * sizeof(unsigned long);
>>> +        uint64_t bit = byte << 3;
>>> +        uint64_t nbits = MIN(cluster_size << 3, bitmap->size - bit),
>>> next_bit;
>>> +        size_t i = byte / cluster_size;
>>> +
>>> +        if (pos == -1) {
>>> +            break;
>>> +        }
>>> +
>>> +        if (pos % cluster_size != 0) {
>>> +            table[i] = 2;
>>> +        } else if (buffer_is_all_ones(&bitmap->levels[HBITMAP_LEVELS
>>> - 1][pos],
>>> +                               nbits >> 3)) {
>>> +            table[i] = 1;
>>> +            if (nbits & 7) {
>>> +                uint8_t last_byte =
>>> +                        *(((uint8_t *)&bitmap->levels[HBITMAP_LEVELS
>>> - 1][pos])
>>> +                                + (nbits >> 3));
>>> +                if (last_byte != ((1 << (nbits & 7)) - 1)) {
>>> +                    table[i] = 2;
>>> +                }
>>> +            }
>>> +        } else {
>>> +            table[i] = 2;
>>> +        }
>>> +
>>> +        next_bit = (i + 1) * cluster_size << 3;
>>> +
>>> +        if (next_bit >= bitmap->size) {
>>> +            break;
>>> +        }
>>> +
>>> +        hbitmap_iter_init(&hbi, bitmap, next_bit <<
>>> bitmap->granularity);
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void longs_to_le(unsigned long *longs, size_t count)
>>> +{
>>> +    unsigned long *end = longs + count;
>>> +    while (longs < end) {
>>> +        if (BITS_PER_LONG == 32) {
>>> +            cpu_to_le32s((uint32_t *)longs);
>>> +        } else {
>>> +            cpu_to_le64s((uint64_t *)longs);
>>> +        }
>>> +
>>> +        longs++;
>>> +    }
>>> +}
>>> +
>>> +/* store_bitmap()
>>> + * update bitmap table by storing bitmap to it.
>>> + * bitmap table entries are assumed to be in big endian format
>>> + * On the error, the resulting bitmap table is valid for clearing, but
>>> + * may contain invalid bitmap */
>>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>>> +                  const uint64_t *table, uint32_t table_size,
>>> +                  uint32_t cluster_size)
>>> +{
>>> +    int i;
>>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>>> +    uint8_t *end = cur +
>>> +        ((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL) *
>>> (sizeof(long));
>>> +
>>> +    for (i = 0; i < table_size && cur < end; ++i) {
>>> +        uint64_t offset = table[i];
>>> +        uint64_t count = MIN(cluster_size, end - cur);
>>> +
>>> +        /* Zero offset means zero region, offset = 1 means filled
>>> region.
>>> +         * Cluster is not allocated in both cases. */
>>> +        if (offset > 1) {
>>> +            int ret;
>>> +            if (cpu_to_le16(1) == 1) {
>>> +                ret = bdrv_pwrite(bs, offset, cur, count);
>> I suspect that the reason you're using bdrv_pwrite down in here is to
>> avoid a buffered copy where hbitmap prepares a buffer and then the file
>> format layer decides what to do with it, opting instead to allow the
>> hbitmap layer itself to do a direct write.
>>
>> I don't think this is going to work, though -- hbitmaps are a generic
>> utility and shouldn't have block layer access.
>>
>>   I think the standard, naive design is the right one:
>>
>> (1) qcow2-bitmap.c calls
>> bdrv_dirty_bitmap_serialize(bdrvdirtybitmap *bitmap, void *out);
>> and serialization primitives for hbitmap are used to construct a buffer
>> placed in *out.
>>
>> (2) qcow2-bitmap.c writes this buffer itself where it needs it, using
>> bdrv_pwrite.
>>
>> If this proves to be too slow, we can optimize it later.
> 
> May be just pass writing function to hbitmap_store, like int
> (*pwrite_func)(void *opaque, int64_t offset, const void *buf, int bytes)
> ? Serialization is superfluous here. It is needed for migration, where
> we forced to work with data chunks protocol layer. But why to split here
> the bitmap, dance with granularity and block layer wrappers, and make an
> additional copy of the data?
> 

I hate to say it, but I think this is premature optimization. If you can
prove there is a meaningful difference between a buffered serialization
and a zero-copy, I'll go along with it -- but I think until we are
bench-marking quite large bitmaps, we won't see much of a difference.

This should be easy enough to improve later if we need to.

>>
>>> +            } else {
>>> +                void *tmp = g_memdup(cur, count);
>>> +                longs_to_le((unsigned long *)cur,
>>> +                            count / sizeof(unsigned long));
>>> +                ret = bdrv_pwrite(bs, offset, tmp, count);
>>> +                g_free(tmp);
>>> +            }
>>> +
>>> +            if (ret < 0) {
>>> +                return ret;
>>> +            }
>>> +        }
>>> +
>>> +        cur += cluster_size;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>>
> 
> 

  reply	other threads:[~2016-03-28 20:20 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-15 20:04 [Qemu-devel] [PATCH v5 00/22] qcow2: persistent dirty bitmaps Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 01/22] block: Add two dirty bitmap getters Vladimir Sementsov-Ogievskiy
2016-03-15 20:08   ` Vladimir Sementsov-Ogievskiy
2016-03-22 16:37     ` Eric Blake
2016-03-22 18:06     ` John Snow
2016-03-15 20:04 ` [Qemu-devel] [PATCH 02/22] block: fix bdrv_dirty_bitmap_granularity signature Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 03/22] iotests: maintain several vms in test Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 04/22] iotests: add default node-name Vladimir Sementsov-Ogievskiy
2016-03-22 18:08   ` John Snow
2016-03-15 20:04 ` [Qemu-devel] [PATCH 05/22] qapi: add md5 checksum of last dirty bitmap level to query-block Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 06/22] hbitmap: load/store Vladimir Sementsov-Ogievskiy
2016-03-21 22:42   ` John Snow
2016-03-22 10:47     ` Vladimir Sementsov-Ogievskiy
2016-03-22 21:49   ` John Snow
2016-03-23  8:22     ` Vladimir Sementsov-Ogievskiy
2016-03-28 20:20       ` John Snow [this message]
2016-03-15 20:04 ` [Qemu-devel] [PATCH 07/22] qcow2: Bitmaps extension: structs and consts Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 08/22] qcow2-dirty-bitmap: read dirty bitmap directory Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 09/22] qcow2-dirty-bitmap: add qcow2_bitmap_load() Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 10/22] qcow2-dirty-bitmap: add qcow2_bitmap_store() Vladimir Sementsov-Ogievskiy
2016-03-22 18:49   ` Eric Blake
2016-03-23  8:25     ` Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 11/22] qcow2: add dirty bitmaps extension Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 12/22] qcow2-dirty-bitmap: add qcow2_bitmap_load_check() Vladimir Sementsov-Ogievskiy
2016-03-22 18:49   ` Eric Blake
2016-03-15 20:04 ` [Qemu-devel] [PATCH 13/22] block: store persistent dirty bitmaps Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 14/22] block: add bdrv_load_dirty_bitmap() Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 15/22] qcow2-dirty-bitmap: add autoclear bit Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 16/22] qemu: command line option for dirty bitmaps Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 17/22] qcow2-dirty-bitmap: add IN_USE flag Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 18/22] qcow2-dirty-bitmaps: disallow stroing bitmap to other bs Vladimir Sementsov-Ogievskiy
2016-03-22 18:51   ` Eric Blake
2016-03-15 20:04 ` [Qemu-devel] [PATCH 19/22] iotests: add VM.test_launcn() Vladimir Sementsov-Ogievskiy
2016-03-22 18:25   ` Eric Blake
2016-03-15 20:04 ` [Qemu-devel] [PATCH 20/22] iotests: test internal persistent dirty bitmap Vladimir Sementsov-Ogievskiy
2016-03-22 18:27   ` Eric Blake
2016-03-23  8:28     ` Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 21/22] qcow2-dirty-bitmap: add AUTO flag Vladimir Sementsov-Ogievskiy
2016-03-15 20:04 ` [Qemu-devel] [PATCH 22/22] qcow2-dirty-bitmap: add EXTRA_DATA_COMPATIBLE flag Vladimir Sementsov-Ogievskiy
2016-03-22 18:51   ` Eric Blake

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56F99200.1040706@redhat.com \
    --to=jsnow@redhat.com \
    --cc=den@openvz.org \
    --cc=famz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=vsementsov@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).