From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47722) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YAipd-0004aV-VK for qemu-devel@nongnu.org; Mon, 12 Jan 2015 12:31:16 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YAipY-0003UX-RZ for qemu-devel@nongnu.org; Mon, 12 Jan 2015 12:31:13 -0500 Received: from mx1.redhat.com ([209.132.183.28]:34039) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YAipY-0003U5-Hg for qemu-devel@nongnu.org; Mon, 12 Jan 2015 12:31:08 -0500 Message-ID: <54B404D9.6060203@redhat.com> Date: Mon, 12 Jan 2015 12:31:05 -0500 From: John Snow MIME-Version: 1.0 References: <1418307457-25996-1-git-send-email-vsementsov@parallels.com> <1418307457-25996-10-git-send-email-vsementsov@parallels.com> <54B3FACA.7060800@redhat.com> In-Reply-To: <54B3FACA.7060800@redhat.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 9/9] block-migration: add named dirty bitmaps migration List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Paolo Bonzini , Vladimir Sementsov-Ogievskiy , qemu-devel@nongnu.org Cc: kwolf@redhat.com, den@openvz.org, stefanha@redhat.com On 01/12/2015 11:48 AM, Paolo Bonzini wrote: > > > On 11/12/2014 15:17, Vladimir Sementsov-Ogievskiy wrote: >> Just migrate parts of dirty bitmaps, corresponding to the block being >> migrated. Also, skip block migration if it is disabled (blk parameter >> of migrate command is false). > > So I have misread the patch. blk here: > > +static void blk_store_bitmaps(BlkMigBlock *blk) > +{ > + int i; > + for (i = 0; i < blk->nr_bitmaps; ++i) { > + bdrv_dbm_store_data(blk->dirty_bitmaps[i].bitmap, > + blk->dirty_bitmaps[i].buf, > + blk->sector, blk->nr_sectors); > + } > > refers to a BlkMigBlock, not a BlockBackend. > > Still, I need an explanation of the operation of this patch. > > During the bulk phase you migrate all of the dirty bitmap. This is okay. > > Then, during the iterative phase you mgirate parts of the dirty bitmap > corresponding to sectors that are dirty for block migration. This means > parts of the dirty bitmap that have become 1. > > How do you track which parts of the disk have been acted upon (e.g. > mirrored in the case of the drive-mirror command), so that they have > become 0? Hm... so your question here is, "What happens if the named bitmap you are transferring gets reset to 0 during migration? Will anybody track this change?" As far as I know, the only "consumer" of _named_ BdrvDirtyBitmaps is block/backup.c, and we do not support it for mirror (yet?) So can a block-backup job be running WHILE we migrate? Otherwise, I don't think this situation (untracked bitmap resets) will arise here. > This, strictly speaking, is conservative so it is not incorrect, but it > basically throws away all the work done by the block job during the > block migration---which might take several minutes. Is a non-live > solution really that bad? If it is, and it is not feasible to just > migrate the bitmaps non-live, can we do better to migrate bits of the > dirty bitmap that have become 0? > > Paolo > >> Skipping shared sectors: bitmaps are migrated independently of this, >> just send blk without block data. >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy >> --- >> block-migration.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++------- >> savevm.c | 1 + >> 2 files changed, 154 insertions(+), 20 deletions(-) >> >> diff --git a/block-migration.c b/block-migration.c >> index 908a66d..95d54a1 100644 >> --- a/block-migration.c >> +++ b/block-migration.c >> @@ -32,6 +32,8 @@ >> #define BLK_MIG_FLAG_EOS 0x02 >> #define BLK_MIG_FLAG_PROGRESS 0x04 >> #define BLK_MIG_FLAG_ZERO_BLOCK 0x08 >> +#define BLK_MIG_FLAG_HAS_BLOCK 0x10 >> +#define BLK_MIG_FLAG_HAS_BITMAPS 0x20 >> >> #define MAX_IS_ALLOCATED_SEARCH 65536 >> >> @@ -51,6 +53,8 @@ typedef struct BlkMigDevState { >> int shared_base; >> int64_t total_sectors; >> QSIMPLEQ_ENTRY(BlkMigDevState) entry; >> + int nr_bitmaps; >> + BdrvDirtyBitmap **dirty_bitmaps; >> >> /* Only used by migration thread. Does not need a lock. */ >> int bulk_completed; >> @@ -64,6 +68,11 @@ typedef struct BlkMigDevState { >> Error *blocker; >> } BlkMigDevState; >> >> +typedef struct BlkMigDirtyBitmap { >> + BdrvDirtyBitmap *bitmap; >> + uint8_t *buf; >> +} BlkMigDirtyBitmap; >> + >> typedef struct BlkMigBlock { >> /* Only used by migration thread. */ >> uint8_t *buf; >> @@ -74,6 +83,9 @@ typedef struct BlkMigBlock { >> QEMUIOVector qiov; >> BlockAIOCB *aiocb; >> >> + int nr_bitmaps; >> + BlkMigDirtyBitmap *dirty_bitmaps; >> + >> /* Protected by block migration lock. */ >> int ret; >> QSIMPLEQ_ENTRY(BlkMigBlock) entry; >> @@ -83,6 +95,7 @@ typedef struct BlkMigState { >> /* Written during setup phase. Can be read without a lock. */ >> int blk_enable; >> int shared_base; >> + int dbm_enable; >> QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; >> int64_t total_sector_sum; >> bool zero_blocks; >> @@ -116,27 +129,63 @@ static void blk_mig_unlock(void) >> /* Only allocating and initializing structure fields, not copying any data. */ >> >> static BlkMigBlock *blk_create(BlkMigDevState *bmds, int64_t sector, >> - int nr_sectors) >> + int nr_sectors, bool only_bitmaps) >> { >> + int i; >> BlkMigBlock *blk = g_new(BlkMigBlock, 1); >> - blk->buf = g_malloc(BLOCK_SIZE); >> + blk->buf = only_bitmaps ? NULL : g_malloc(BLOCK_SIZE); >> blk->bmds = bmds; >> blk->sector = sector; >> blk->nr_sectors = nr_sectors; >> + blk->dirty_bitmaps = NULL; >> + blk->nr_bitmaps = 0; >> >> blk->iov.iov_base = blk->buf; >> blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; >> qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); >> >> + if (block_mig_state.dbm_enable) { >> + BlkMigDirtyBitmap *mig_bm; >> + >> + blk->nr_bitmaps = bmds->nr_bitmaps; >> + mig_bm = blk->dirty_bitmaps = g_new(BlkMigDirtyBitmap, >> + bmds->nr_bitmaps); >> + for (i = 0; i < bmds->nr_bitmaps; ++i) { >> + BdrvDirtyBitmap *bm = bmds->dirty_bitmaps[i]; >> + mig_bm->buf = g_malloc(bdrv_dbm_data_size(bm, nr_sectors)); >> + mig_bm->bitmap = bm; >> + mig_bm++; >> + } >> + } >> + >> return blk; >> } >> >> static void blk_free(BlkMigBlock *blk) >> { >> + int i; >> g_free(blk->buf); >> + >> + if (blk->dirty_bitmaps) { >> + for (i = 0; i < blk->nr_bitmaps; ++i) { >> + g_free(blk->dirty_bitmaps[i].buf); >> + } >> + g_free(blk->dirty_bitmaps); >> + } >> + >> g_free(blk); >> } >> >> +static void blk_store_bitmaps(BlkMigBlock *blk) >> +{ >> + int i; >> + for (i = 0; i < blk->nr_bitmaps; ++i) { >> + bdrv_dbm_store_data(blk->dirty_bitmaps[i].bitmap, >> + blk->dirty_bitmaps[i].buf, >> + blk->sector, blk->nr_sectors); >> + } >> +} >> + >> /* Must run outside of the iothread lock during the bulk phase, >> * or the VM will stall. >> */ >> @@ -146,9 +195,15 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk) >> int len; >> uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK; >> >> - if (block_mig_state.zero_blocks && >> + if (block_mig_state.zero_blocks && blk->buf && >> buffer_is_zero(blk->buf, BLOCK_SIZE)) { >> flags |= BLK_MIG_FLAG_ZERO_BLOCK; >> + } else if (blk->buf) { >> + flags |= BLK_MIG_FLAG_HAS_BLOCK; >> + } >> + >> + if (block_mig_state.dbm_enable) { >> + flags |= BLK_MIG_FLAG_HAS_BITMAPS; >> } >> >> /* sector number and flags */ >> @@ -160,10 +215,27 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk) >> qemu_put_byte(f, len); >> qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len); >> >> + /* dirty bitmaps */ >> + if (flags & BLK_MIG_FLAG_HAS_BITMAPS) { >> + int i; >> + qemu_put_be32(f, blk->nr_bitmaps); >> + for (i = 0; i < blk->nr_bitmaps; ++i) { >> + BdrvDirtyBitmap *bm = blk->dirty_bitmaps[i].bitmap; >> + int buf_size = bdrv_dbm_data_size(bm, blk->nr_sectors); >> + const char *name = bdrv_dirty_bitmap_name(bm); >> + int len = strlen(name); >> + >> + qemu_put_byte(f, len); >> + qemu_put_buffer(f, (const uint8_t *)name, len); >> + qemu_put_buffer(f, blk->dirty_bitmaps[i].buf, buf_size); >> + } >> + } >> + >> /* if a block is zero we need to flush here since the network >> * bandwidth is now a lot higher than the storage device bandwidth. >> - * thus if we queue zero blocks we slow down the migration */ >> - if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { >> + * thus if we queue zero blocks we slow down the migration. >> + * also, skip writing block when migrate only dirty bitmaps. */ >> + if (!(flags & BLK_MIG_FLAG_HAS_BLOCK)) { >> qemu_fflush(f); >> return; >> } >> @@ -282,13 +354,20 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) >> BlockDriverState *bs = bmds->bs; >> BlkMigBlock *blk; >> int nr_sectors; >> + bool skip_chunk = false; >> >> if (bmds->shared_base) { >> qemu_mutex_lock_iothread(); >> - while (cur_sector < total_sectors && >> - !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, >> - &nr_sectors)) { >> - cur_sector += nr_sectors; >> + if (block_mig_state.dbm_enable) { >> + bdrv_is_allocated(bs, cur_sector, BDRV_SECTORS_PER_DIRTY_CHUNK, >> + &nr_sectors); >> + skip_chunk = nr_sectors >= BDRV_SECTORS_PER_DIRTY_CHUNK; >> + } else { >> + while (cur_sector < total_sectors && >> + !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, >> + &nr_sectors)) { >> + cur_sector += nr_sectors; >> + } >> } >> qemu_mutex_unlock_iothread(); >> } >> @@ -309,7 +388,8 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) >> nr_sectors = total_sectors - cur_sector; >> } >> >> - blk = blk_create(bmds, cur_sector, nr_sectors); >> + blk = blk_create(bmds, cur_sector, nr_sectors, >> + skip_chunk || !block_mig_state.blk_enable); >> >> blk_mig_lock(); >> block_mig_state.submitted++; >> @@ -317,8 +397,16 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) >> >> bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors); >> >> - blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, >> - nr_sectors, blk_mig_read_cb, blk); >> + if (block_mig_state.dbm_enable) { >> + blk_store_bitmaps(blk); >> + } >> + >> + if (block_mig_state.blk_enable && !skip_chunk) { >> + blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, >> + nr_sectors, blk_mig_read_cb, blk); >> + } else if (block_mig_state.dbm_enable) { >> + blk_mig_read_cb(blk, 0); >> + } >> >> bmds->cur_sector = cur_sector + nr_sectors; >> return (bmds->cur_sector >= total_sectors); >> @@ -403,6 +491,8 @@ static void init_blk_migration(QEMUFile *f) >> DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs)); >> } >> >> + bmds->dirty_bitmaps = bdrv_dbm_find_all_named(bs, &bmds->nr_bitmaps); >> + >> QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry); >> } >> } >> @@ -481,20 +571,32 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, >> } else { >> nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; >> } >> - blk = blk_create(bmds, sector, nr_sectors); >> + blk = blk_create(bmds, sector, nr_sectors, >> + !block_mig_state.blk_enable); >> + >> + if (block_mig_state.dbm_enable) { >> + blk_store_bitmaps(blk); >> + } >> >> if (is_async) { >> - blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, >> - nr_sectors, blk_mig_read_cb, blk); >> + if (block_mig_state.blk_enable) { >> + blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, >> + nr_sectors, blk_mig_read_cb, >> + blk); >> + } else { >> + blk_mig_read_cb(blk, 0); >> + } >> >> blk_mig_lock(); >> block_mig_state.submitted++; >> bmds_set_aio_inflight(bmds, sector, nr_sectors, 1); >> blk_mig_unlock(); >> } else { >> - ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); >> - if (ret < 0) { >> - goto error; >> + if (block_mig_state.blk_enable) { >> + ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); >> + if (ret < 0) { >> + goto error; >> + } >> } >> blk_send(f, blk); >> >> @@ -817,10 +919,39 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) >> nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; >> } >> >> + /* load dirty bitmaps */ >> + if (flags & BLK_MIG_FLAG_HAS_BITMAPS) { >> + int i, nr_bitmaps = qemu_get_be32(f); >> + >> + for (i = 0; i < nr_bitmaps; ++i) { >> + char bitmap_name[256]; >> + BdrvDirtyBitmap *bitmap; >> + int buf_size, len; >> + >> + len = qemu_get_byte(f); >> + qemu_get_buffer(f, (uint8_t *)bitmap_name, len); >> + bitmap_name[len] = '\0'; >> + >> + bitmap = bdrv_find_dirty_bitmap(bs, bitmap_name); >> + if (!bitmap) { >> + fprintf(stderr, "Error unknown dirty bitmap\ >> + %s for block device %s\n", >> + bitmap_name, device_name); >> + return -EINVAL; >> + } >> + >> + buf_size = bdrv_dbm_data_size(bitmap, nr_sectors); >> + buf = g_malloc(buf_size); >> + qemu_get_buffer(f, buf, buf_size); >> + bdrv_dbm_restore_data(bitmap, buf, addr, nr_sectors); >> + g_free(buf); >> + } >> + } >> + >> if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { >> ret = bdrv_write_zeroes(bs, addr, nr_sectors, >> BDRV_REQ_MAY_UNMAP); >> - } else { >> + } else if (flags & BLK_MIG_FLAG_HAS_BLOCK) { >> buf = g_malloc(BLOCK_SIZE); >> qemu_get_buffer(f, buf, BLOCK_SIZE); >> ret = bdrv_write(bs, addr, buf, nr_sectors); >> @@ -855,6 +986,7 @@ static void block_set_params(const MigrationParams *params, void *opaque) >> { >> block_mig_state.blk_enable = params->blk; >> block_mig_state.shared_base = params->shared; >> + block_mig_state.dbm_enable = params->dirty; >> >> /* shared base means that blk_enable = 1 */ >> block_mig_state.blk_enable |= params->shared; >> @@ -862,7 +994,8 @@ static void block_set_params(const MigrationParams *params, void *opaque) >> >> static bool block_is_active(void *opaque) >> { >> - return block_mig_state.blk_enable == 1; >> + return block_mig_state.blk_enable == 1 || >> + block_mig_state.dbm_enable == 1; >> } >> >> static SaveVMHandlers savevm_block_handlers = { >> diff --git a/savevm.c b/savevm.c >> index a598d1d..1299faa 100644 >> --- a/savevm.c >> +++ b/savevm.c >> @@ -983,6 +983,7 @@ int qemu_loadvm_state(QEMUFile *f) >> } >> } >> >> + bdrv_dbm_restore_finish(); >> cpu_synchronize_all_post_init(); >> >> ret = 0; >> >