From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59651) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YLebO-00046Q-Al for qemu-devel@nongnu.org; Wed, 11 Feb 2015 16:13:45 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YLebL-0005co-1G for qemu-devel@nongnu.org; Wed, 11 Feb 2015 16:13:42 -0500 Received: from mx1.redhat.com ([209.132.183.28]:59941) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YLebK-0005ce-N7 for qemu-devel@nongnu.org; Wed, 11 Feb 2015 16:13:38 -0500 Message-ID: <54DBC600.9090406@redhat.com> Date: Wed, 11 Feb 2015 16:13:36 -0500 From: John Snow MIME-Version: 1.0 References: <1423532117-14490-1-git-send-email-jsnow@redhat.com> <1423532117-14490-8-git-send-email-jsnow@redhat.com> <54DB95CC.5050205@redhat.com> In-Reply-To: <54DB95CC.5050205@redhat.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v12 07/17] qmp: Add support of "dirty-bitmap" sync mode for drive-backup List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Max Reitz , qemu-devel@nongnu.org Cc: kwolf@redhat.com, famz@redhat.com, armbru@redhat.com, vsementsov@parallels.com, stefanha@redhat.com On 02/11/2015 12:47 PM, Max Reitz wrote: > On 2015-02-09 at 20:35, John Snow wrote: >> For "dirty-bitmap" sync mode, the block job will iterate through the >> given dirty bitmap to decide if a sector needs backup (backup all the >> dirty clusters and skip clean ones), just as allocation conditions of >> "top" sync mode. >> >> Signed-off-by: Fam Zheng >> Signed-off-by: John Snow >> --- >> block.c | 9 +++ >> block/backup.c | 149 >> +++++++++++++++++++++++++++++++++++++++------- >> block/mirror.c | 4 ++ >> blockdev.c | 18 +++++- >> hmp.c | 3 +- >> include/block/block.h | 1 + >> include/block/block_int.h | 2 + >> qapi/block-core.json | 13 ++-- >> qmp-commands.hx | 7 ++- >> 9 files changed, 172 insertions(+), 34 deletions(-) >> >> diff --git a/block.c b/block.c >> index 8d84ace..e93fceb 100644 >> --- a/block.c >> +++ b/block.c >> @@ -5648,6 +5648,15 @@ static void bdrv_reset_dirty(BlockDriverState >> *bs, int64_t cur_sector, >> } >> } >> +/** >> + * Advance an HBitmapIter to an arbitrary offset. >> + */ >> +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) >> +{ >> + assert(hbi->hb); >> + hbitmap_iter_init(hbi, hbi->hb, offset); >> +} >> + >> int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap >> *bitmap) >> { >> return hbitmap_count(bitmap->bitmap); >> diff --git a/block/backup.c b/block/backup.c >> index 1c535b1..bf8c0e7 100644 >> --- a/block/backup.c >> +++ b/block/backup.c >> @@ -37,6 +37,8 @@ typedef struct CowRequest { >> typedef struct BackupBlockJob { >> BlockJob common; >> BlockDriverState *target; >> + /* bitmap for sync=3Ddirty-bitmap */ >> + BdrvDirtyBitmap *sync_bitmap; >> MirrorSyncMode sync_mode; >> RateLimit limit; >> BlockdevOnError on_source_error; >> @@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void >> *opaque) >> g_free(data); >> } >> +static bool coroutine_fn yield_and_check(BackupBlockJob *job) >> +{ >> + if (block_job_is_cancelled(&job->common)) { >> + return true; >> + } >> + >> + /* we need to yield so that qemu_aio_flush() returns. >> + * (without, VM does not reboot) >> + */ >> + if (job->common.speed) { >> + uint64_t delay_ns =3D ratelimit_calculate_delay(&job->limit, >> + >> job->sectors_read); >> + job->sectors_read =3D 0; >> + block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_n= s); >> + } else { >> + block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0); >> + } >> + >> + if (block_job_is_cancelled(&job->common)) { >> + return true; >> + } >> + >> + return false; >> +} >> + >> static void coroutine_fn backup_run(void *opaque) >> { >> BackupBlockJob *job =3D opaque; >> @@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque= ) >> }; >> int64_t start, end; >> int ret =3D 0; >> + bool error_is_read; >> QLIST_INIT(&job->inflight_reqs); >> qemu_co_rwlock_init(&job->flush_rwlock); >> start =3D 0; >> - end =3D DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE, >> - BACKUP_SECTORS_PER_CLUSTER); >> + end =3D DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE); >> job->bitmap =3D hbitmap_alloc(end, 0); >> @@ -278,28 +305,61 @@ static void coroutine_fn backup_run(void *opaque= ) >> qemu_coroutine_yield(); >> job->common.busy =3D true; >> } >> + } else if (job->sync_mode =3D=3D MIRROR_SYNC_MODE_DIRTY_BITMAP) { >> + /* Dirty Bitmap sync has a slightly different iteration >> method */ >> + HBitmapIter hbi; >> + int64_t sector; >> + int64_t cluster; >> + int64_t last_cluster =3D -1; >> + bool polyrhythmic; >> + >> + bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi); >> + /* Does the granularity happen to match our backup cluster >> size? */ >> + polyrhythmic =3D >> (bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=3D >> + BACKUP_CLUSTER_SIZE); >> + >> + /* Find the next dirty /sector/ and copy that /cluster/ */ >> + while ((sector =3D hbitmap_iter_next(&hbi)) !=3D -1) { >> + if (yield_and_check(job)) { >> + goto leave; >> + } >> + cluster =3D sector / BACKUP_SECTORS_PER_CLUSTER; >> + >> + /* Play some catchup with the progress meter */ >> + if (cluster !=3D last_cluster + 1) { >> + job->common.offset +=3D ((cluster - last_cluster - 1)= * >> + BACKUP_CLUSTER_SIZE); >> + } > > I guess the "- 1" in the calculation comes from backup_do_cow() > modifying job->common.offset, too. How about simply overwriting it like > "job->common.offset =3D cluster * BACKUP_CLUSTER_SIZE"? And it may be a > good idea to move this update above yield_and_check() because that's th= e > progress we have actually done up to that point (and I imagine > yield_and_check() is the point where the caller might check the progres= s). > Yes: If the last cluster we wrote out was 2, and we're on 4 now, we only=20 need to make up for #3, because #4 will be handled in the usual ways. I don't know how wise this was, but I wanted to only "fake" progress=20 updates for clusters I *know* I skipped in incremental backups, so I=20 wanted to make sure the math worked out. Just overwriting the progress=20 seemed like cheating a little, I guess ... It was kind of just a "testing thing" to just do the additions and see=20 if the math checked out in the end. You may notice that unlike Fam's=20 iotests, I leave the progress checks enabled for my tests. Putting it above the cancellation check does make sense, though. I=20 didn't test cancellations as well as I ought have. >> + >> + do { >> + ret =3D backup_do_cow(bs, cluster * >> BACKUP_SECTORS_PER_CLUSTER, >> + BACKUP_SECTORS_PER_CLUSTER, >> &error_is_read); >> + if ((ret < 0) && >> + backup_error_action(job, error_is_read, -ret) =3D= =3D >> + BLOCK_ERROR_ACTION_REPORT) { >> + goto leave; >> + } >> + } while (ret < 0); >> + >> + /* Advance (or rewind) our iterator if we need to. */ >> + if (polyrhythmic) { >> + bdrv_set_dirty_iter(&hbi, >> + (cluster + 1) * >> BACKUP_SECTORS_PER_CLUSTER); >> + } >> + >> + last_cluster =3D cluster; >> + } >> + >> + /* Play some final catchup with the progress meter */ >> + if (last_cluster + 1 < end) { >> + job->common.offset +=3D ((end - last_cluster - 1) * >> + BACKUP_CLUSTER_SIZE); >> + } >> + > > Superfluous empty line. > >> } else { >> /* Both FULL and TOP SYNC_MODE's require copying.. */ >> for (; start < end; start++) { >> - bool error_is_read; >> - >> - if (block_job_is_cancelled(&job->common)) { >> - break; >> - } >> - >> - /* we need to yield so that qemu_aio_flush() returns. >> - * (without, VM does not reboot) >> - */ >> - if (job->common.speed) { >> - uint64_t delay_ns =3D ratelimit_calculate_delay( >> - &job->limit, job->sectors_read); >> - job->sectors_read =3D 0; >> - block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, >> delay_ns); >> - } else { >> - block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, >> 0); >> - } >> - >> - if (block_job_is_cancelled(&job->common)) { >> + if (yield_and_check(job)) { >> break; >> } >> @@ -351,12 +411,26 @@ static void coroutine_fn backup_run(void *opaque= ) >> } >> } >> +leave: >> notifier_with_return_remove(&before_write); >> /* wait until pending backup_do_cow() calls have completed */ >> qemu_co_rwlock_wrlock(&job->flush_rwlock); >> qemu_co_rwlock_unlock(&job->flush_rwlock); >> + if (job->sync_bitmap) { >> + BdrvDirtyBitmap *bm; >> + if (ret < 0) { >> + /* Merge the successor back into the parent, delete >> nothing. */ >> + bm =3D bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NU= LL); >> + assert(bm); >> + bdrv_enable_dirty_bitmap(job->sync_bitmap); >> + } else { >> + /* Everything is fine, delete this bitmap and install the >> backup. */ >> + bm =3D bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, N= ULL); >> + assert(bm); >> + } >> + } >> hbitmap_free(job->bitmap); >> bdrv_iostatus_disable(target); >> @@ -369,6 +443,7 @@ static void coroutine_fn backup_run(void *opaque) >> void backup_start(BlockDriverState *bs, BlockDriverState *target, >> int64_t speed, MirrorSyncMode sync_mode, >> + BdrvDirtyBitmap *sync_bitmap, >> BlockdevOnError on_source_error, >> BlockdevOnError on_target_error, >> BlockCompletionFunc *cb, void *opaque, >> @@ -412,17 +487,37 @@ void backup_start(BlockDriverState *bs, >> BlockDriverState *target, >> return; >> } >> + if (sync_mode =3D=3D MIRROR_SYNC_MODE_DIRTY_BITMAP) { >> + if (!sync_bitmap) { >> + error_setg(errp, "must provide a valid bitmap name for " >> + "\"dirty-bitmap\" sync mode"); >> + return; >> + } >> + >> + /* Create a new bitmap, and freeze/disable this one. */ >> + if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) >> < 0) { >> + return; >> + } >> + > > Another superfluous empty line. > >> + } else if (sync_bitmap) { >> + error_setg(errp, >> + "a sync_bitmap was provided to backup_run, " >> + "but received an incompatible sync_mode (%s)", >> + MirrorSyncMode_lookup[sync_mode]); >> + return; >> + } >> + >> len =3D bdrv_getlength(bs); >> if (len < 0) { >> error_setg_errno(errp, -len, "unable to get length for '%s'"= , >> bdrv_get_device_name(bs)); >> - return; >> + goto error; >> } >> BackupBlockJob *job =3D block_job_create(&backup_job_driver, bs, >> speed, >> cb, opaque, errp); >> if (!job) { >> - return; >> + goto error; >> } >> bdrv_op_block_all(target, job->common.blocker); >> @@ -431,7 +526,15 @@ void backup_start(BlockDriverState *bs, >> BlockDriverState *target, >> job->on_target_error =3D on_target_error; >> job->target =3D target; >> job->sync_mode =3D sync_mode; >> + job->sync_bitmap =3D sync_mode =3D=3D MIRROR_SYNC_MODE_DIRTY_BITM= AP ? >> + sync_bitmap : NULL; >> job->common.len =3D len; >> job->common.co =3D qemu_coroutine_create(backup_run); >> qemu_coroutine_enter(job->common.co, job); >> + return; >> + >> + error: >> + if (sync_bitmap) { >> + bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL); >> + } >> } >> diff --git a/block/mirror.c b/block/mirror.c >> index 77bd1ed..271dbf3 100644 >> --- a/block/mirror.c >> +++ b/block/mirror.c >> @@ -718,6 +718,10 @@ void mirror_start(BlockDriverState *bs, >> BlockDriverState *target, >> bool is_none_mode; >> BlockDriverState *base; >> + if (mode =3D=3D MIRROR_SYNC_MODE_DIRTY_BITMAP) { >> + error_setg(errp, "Sync mode 'dirty-bitmap' not supported"); >> + return; >> + } >> is_none_mode =3D mode =3D=3D MIRROR_SYNC_MODE_NONE; >> base =3D mode =3D=3D MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NUL= L; >> mirror_start_job(bs, target, replaces, >> diff --git a/blockdev.c b/blockdev.c >> index 826d0c1..3f41e82 100644 >> --- a/blockdev.c >> +++ b/blockdev.c >> @@ -1569,6 +1569,7 @@ static void >> drive_backup_prepare(BlkTransactionState *common, Error **errp) >> backup->sync, >> backup->has_mode, backup->mode, >> backup->has_speed, backup->speed, >> + backup->has_bitmap, backup->bitmap, >> backup->has_on_source_error, >> backup->on_source_error, >> backup->has_on_target_error, >> backup->on_target_error, >> &local_err); >> @@ -2429,6 +2430,7 @@ void qmp_drive_backup(const char *device, const >> char *target, >> enum MirrorSyncMode sync, >> bool has_mode, enum NewImageMode mode, >> bool has_speed, int64_t speed, >> + bool has_bitmap, const char *bitmap, >> bool has_on_source_error, BlockdevOnError >> on_source_error, >> bool has_on_target_error, BlockdevOnError >> on_target_error, >> Error **errp) >> @@ -2436,6 +2438,7 @@ void qmp_drive_backup(const char *device, const >> char *target, >> BlockDriverState *bs; >> BlockDriverState *target_bs; >> BlockDriverState *source =3D NULL; >> + BdrvDirtyBitmap *bmap =3D NULL; >> AioContext *aio_context; >> BlockDriver *drv =3D NULL; >> Error *local_err =3D NULL; >> @@ -2534,7 +2537,16 @@ void qmp_drive_backup(const char *device, const >> char *target, >> bdrv_set_aio_context(target_bs, aio_context); >> - backup_start(bs, target_bs, speed, sync, on_source_error, >> on_target_error, >> + if (has_bitmap) { >> + bmap =3D bdrv_find_dirty_bitmap(bs, bitmap); >> + if (!bmap) { >> + error_setg(errp, "Bitmap '%s' could not be found", bitmap= ); >> + goto out; >> + } >> + } >> + >> + backup_start(bs, target_bs, speed, sync, bmap, >> + on_source_error, on_target_error, >> block_job_cb, bs, &local_err); >> if (local_err !=3D NULL) { >> bdrv_unref(target_bs); >> @@ -2592,8 +2604,8 @@ void qmp_blockdev_backup(const char *device, >> const char *target, >> bdrv_ref(target_bs); >> bdrv_set_aio_context(target_bs, aio_context); >> - backup_start(bs, target_bs, speed, sync, on_source_error, >> on_target_error, >> - block_job_cb, bs, &local_err); >> + backup_start(bs, target_bs, speed, sync, NULL, on_source_error, >> + on_target_error, block_job_cb, bs, &local_err); >> if (local_err !=3D NULL) { >> bdrv_unref(target_bs); >> error_propagate(errp, local_err); >> diff --git a/hmp.c b/hmp.c >> index b47f331..015499f 100644 >> --- a/hmp.c >> +++ b/hmp.c >> @@ -1027,7 +1027,8 @@ void hmp_drive_backup(Monitor *mon, const QDict >> *qdict) >> qmp_drive_backup(device, filename, !!format, format, >> full ? MIRROR_SYNC_MODE_FULL : >> MIRROR_SYNC_MODE_TOP, >> - true, mode, false, 0, false, 0, false, 0, &err); >> + true, mode, false, 0, false, NULL, >> + false, 0, false, 0, &err); >> hmp_handle_error(mon, &err); >> } >> diff --git a/include/block/block.h b/include/block/block.h >> index b2d84d6..8589e77 100644 >> --- a/include/block/block.h >> +++ b/include/block/block.h >> @@ -469,6 +469,7 @@ void bdrv_reset_dirty_bitmap(BlockDriverState *bs, >> BdrvDirtyBitmap *bitmap, >> int64_t cur_sector, int nr_sectors); >> void bdrv_dirty_iter_init(BlockDriverState *bs, >> BdrvDirtyBitmap *bitmap, struct >> HBitmapIter *hbi); >> +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); >> int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap >> *bitmap); >> void bdrv_enable_copy_on_read(BlockDriverState *bs); >> diff --git a/include/block/block_int.h b/include/block/block_int.h >> index 7ad1950..2233790 100644 >> --- a/include/block/block_int.h >> +++ b/include/block/block_int.h >> @@ -588,6 +588,7 @@ void mirror_start(BlockDriverState *bs, >> BlockDriverState *target, >> * @target: Block device to write to. >> * @speed: The maximum speed, in bytes per second, or 0 for unlimite= d. >> * @sync_mode: What parts of the disk image should be copied to the >> destination. >> + * @sync_bitmap: The dirty bitmap if sync_mode is >> MIRROR_SYNC_MODE_DIRTY_BITMAP. >> * @on_source_error: The action to take upon error reading from the >> source. >> * @on_target_error: The action to take upon error writing to the >> target. >> * @cb: Completion function for the job. >> @@ -598,6 +599,7 @@ void mirror_start(BlockDriverState *bs, >> BlockDriverState *target, >> */ >> void backup_start(BlockDriverState *bs, BlockDriverState *target, >> int64_t speed, MirrorSyncMode sync_mode, >> + BdrvDirtyBitmap *sync_bitmap, >> BlockdevOnError on_source_error, >> BlockdevOnError on_target_error, >> BlockCompletionFunc *cb, void *opaque, >> diff --git a/qapi/block-core.json b/qapi/block-core.json >> index 940eff7..9c5a99c 100644 >> --- a/qapi/block-core.json >> +++ b/qapi/block-core.json >> @@ -508,10 +508,12 @@ >> # >> # @none: only copy data written from now on >> # >> +# @dirty-bitmap: only copy data described by the dirty bitmap. Since: >> 2.3 >> +# >> # Since: 1.3 >> ## >> { 'enum': 'MirrorSyncMode', >> - 'data': ['top', 'full', 'none'] } >> + 'data': ['top', 'full', 'none', 'dirty-bitmap'] } >> ## >> # @BlockJobType: >> @@ -686,14 +688,17 @@ >> # probe if @mode is 'existing', else the format of the sour= ce >> # >> # @sync: what parts of the disk image should be copied to the >> destination >> -# (all the disk, only the sectors allocated in the topmost >> image, or >> -# only new I/O). >> +# (all the disk, only the sectors allocated in the topmost >> image, from a >> +# dirty bitmap, or only new I/O). >> # >> # @mode: #optional whether and how QEMU should create a new image, >> default is >> # 'absolute-paths'. >> # >> # @speed: #optional the maximum speed, in bytes per second >> # >> +# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitma= p" >> +# (Since 2.3) >> +# >> # @on-source-error: #optional the action to take on an error on the >> source, >> # default 'report'. 'stop' and 'enospc' can only >> be used >> # if the block device supports io-status (see >> BlockInfo). >> @@ -711,7 +716,7 @@ >> { 'type': 'DriveBackup', >> 'data': { 'device': 'str', 'target': 'str', '*format': 'str', >> 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', >> - '*speed': 'int', >> + '*speed': 'int', '*bitmap': 'str', >> '*on-source-error': 'BlockdevOnError', >> '*on-target-error': 'BlockdevOnError' } } >> diff --git a/qmp-commands.hx b/qmp-commands.hx >> index ce7782f..5aa3845 100644 >> --- a/qmp-commands.hx >> +++ b/qmp-commands.hx >> @@ -1048,7 +1048,7 @@ EQMP >> { >> .name =3D "drive-backup", >> .args_type =3D >> "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," >> - "on-source-error:s?,on-target-error:s?", >> + "bitmap:s?,on-source-error:s?,on-target-error:s= ?", >> .mhandler.cmd_new =3D qmp_marshal_input_drive_backup, >> }, >> @@ -1075,8 +1075,9 @@ Arguments: >> (json-string, optional) >> - "sync": what parts of the disk image should be copied to the >> destination; >> possibilities include "full" for all the disk, "top" for only the >> sectors >> - allocated in the topmost image, or "none" to only replicate new I/O >> - (MirrorSyncMode). >> + allocated in the topmost image, "dirty-bitmap" for only the dirty >> sectors in >> + the bitmap, or "none" to only replicate new I/O (MirrorSyncMode). >> +- "bitmap": dirty bitmap name for sync=3D=3Ddirty-bitmap >> - "mode": whether and how QEMU should create a new image >> (NewImageMode, optional, default 'absolute-paths') >> - "speed": the maximum speed, in bytes per second (json-int, optiona= l) > > Looks good to me in general, now I need to find out what the successor > bitmap is used for; but I guess I'll find that out by reviewing the res= t > of this series. > > Max --=20 =E2=80=94js