From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: zhanghailiang <zhang.zhanghailiang@huawei.com>
Cc: danielcho@qnap.com, qemu-devel@nongnu.org, quintela@redhat.com
Subject: Re: [PATCH V2 7/8] COLO: Migrate dirty pages during the gap of checkpointing
Date: Thu, 12 Mar 2020 19:50:52 +0000 [thread overview]
Message-ID: <20200312195052.GP3211@work-vm> (raw)
In-Reply-To: <20200224065414.36524-8-zhang.zhanghailiang@huawei.com>
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> We can migrate some dirty pages during the gap of checkpointing,
> by this way, we can reduce the amount of ram migrated during checkpointing.
>
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> ---
> migration/colo.c | 73 ++++++++++++++++++++++++++++++++++++++++--
> migration/migration.h | 1 +
> migration/trace-events | 1 +
> qapi/migration.json | 4 ++-
> 4 files changed, 75 insertions(+), 4 deletions(-)
>
> diff --git a/migration/colo.c b/migration/colo.c
> index 44942c4e23..c36d94072f 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -47,6 +47,13 @@ static COLOMode last_colo_mode;
>
> #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>
> +#define DEFAULT_RAM_PENDING_CHECK 1000
> +
> +/* should be calculated by bandwidth and max downtime ? */
> +#define THRESHOLD_PENDING_SIZE (100 * 1024 * 1024UL)
In the last version I asked to change these two values to parameters.
Dave
> +static int checkpoint_request;
> +
> bool migration_in_colo_state(void)
> {
> MigrationState *s = migrate_get_current();
> @@ -517,6 +524,20 @@ static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
> colo_checkpoint_notify(data);
> }
>
> +static bool colo_need_migrate_ram_background(MigrationState *s)
> +{
> + uint64_t pending_size, pend_pre, pend_compat, pend_post;
> + int64_t max_size = THRESHOLD_PENDING_SIZE;
> +
> + qemu_savevm_state_pending(s->to_dst_file, max_size, &pend_pre,
> + &pend_compat, &pend_post);
> + pending_size = pend_pre + pend_compat + pend_post;
> +
> + trace_colo_need_migrate_ram_background(pending_size);
> + return (pending_size >= max_size);
> +}
> +
> +
> static void colo_process_checkpoint(MigrationState *s)
> {
> QIOChannelBuffer *bioc;
> @@ -572,6 +593,8 @@ static void colo_process_checkpoint(MigrationState *s)
>
> timer_mod(s->colo_delay_timer,
> current_time + s->parameters.x_checkpoint_delay);
> + timer_mod(s->pending_ram_check_timer,
> + current_time + DEFAULT_RAM_PENDING_CHECK);
>
> while (s->state == MIGRATION_STATUS_COLO) {
> if (failover_get_state() != FAILOVER_STATUS_NONE) {
> @@ -584,9 +607,30 @@ static void colo_process_checkpoint(MigrationState *s)
> if (s->state != MIGRATION_STATUS_COLO) {
> goto out;
> }
> - ret = colo_do_checkpoint_transaction(s, bioc, fb);
> - if (ret < 0) {
> - goto out;
> + if (atomic_xchg(&checkpoint_request, 0)) {
> + /* start a colo checkpoint */
> + ret = colo_do_checkpoint_transaction(s, bioc, fb);
> + if (ret < 0) {
> + goto out;
> + }
> + } else {
> + if (colo_need_migrate_ram_background(s)) {
> + colo_send_message(s->to_dst_file,
> + COLO_MESSAGE_MIGRATE_RAM_BACKGROUND,
> + &local_err);
> + if (local_err) {
> + goto out;
> + }
> +
> + qemu_savevm_state_iterate(s->to_dst_file, false);
> + qemu_put_byte(s->to_dst_file, QEMU_VM_EOF);
> + ret = qemu_file_get_error(s->to_dst_file);
> + if (ret < 0) {
> + error_setg_errno(&local_err, -ret,
> + "Failed to send dirty pages backgroud");
> + goto out;
> + }
> + }
> }
> }
>
> @@ -627,6 +671,8 @@ out:
> colo_compare_unregister_notifier(&packets_compare_notifier);
> timer_del(s->colo_delay_timer);
> timer_free(s->colo_delay_timer);
> + timer_del(s->pending_ram_check_timer);
> + timer_free(s->pending_ram_check_timer);
> qemu_sem_destroy(&s->colo_checkpoint_sem);
>
> /*
> @@ -644,6 +690,7 @@ void colo_checkpoint_notify(void *opaque)
> MigrationState *s = opaque;
> int64_t next_notify_time;
>
> + atomic_inc(&checkpoint_request);
> qemu_sem_post(&s->colo_checkpoint_sem);
> s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> next_notify_time = s->colo_checkpoint_time +
> @@ -651,6 +698,19 @@ void colo_checkpoint_notify(void *opaque)
> timer_mod(s->colo_delay_timer, next_notify_time);
> }
>
> +static void colo_pending_ram_check_notify(void *opaque)
> +{
> + int64_t next_notify_time;
> + MigrationState *s = opaque;
> +
> + if (migration_in_colo_state()) {
> + next_notify_time = DEFAULT_RAM_PENDING_CHECK +
> + qemu_clock_get_ms(QEMU_CLOCK_HOST);
> + timer_mod(s->pending_ram_check_timer, next_notify_time);
> + qemu_sem_post(&s->colo_checkpoint_sem);
> + }
> +}
> +
> void migrate_start_colo_process(MigrationState *s)
> {
> qemu_mutex_unlock_iothread();
> @@ -658,6 +718,8 @@ void migrate_start_colo_process(MigrationState *s)
> s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
> colo_checkpoint_notify, s);
>
> + s->pending_ram_check_timer = timer_new_ms(QEMU_CLOCK_HOST,
> + colo_pending_ram_check_notify, s);
> qemu_sem_init(&s->colo_exit_sem, 0);
> migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> MIGRATION_STATUS_COLO);
> @@ -806,6 +868,11 @@ static void colo_wait_handle_message(MigrationIncomingState *mis,
> case COLO_MESSAGE_CHECKPOINT_REQUEST:
> colo_incoming_process_checkpoint(mis, fb, bioc, errp);
> break;
> + case COLO_MESSAGE_MIGRATE_RAM_BACKGROUND:
> + if (qemu_loadvm_state_main(mis->from_src_file, mis) < 0) {
> + error_setg(errp, "Load ram background failed");
> + }
> + break;
> default:
> error_setg(errp, "Got unknown COLO message: %d", msg);
> break;
> diff --git a/migration/migration.h b/migration/migration.h
> index 8473ddfc88..5355259789 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -219,6 +219,7 @@ struct MigrationState
> QemuSemaphore colo_checkpoint_sem;
> int64_t colo_checkpoint_time;
> QEMUTimer *colo_delay_timer;
> + QEMUTimer *pending_ram_check_timer;
>
> /* The first error that has occurred.
> We used the mutex to be able to return the 1st error message */
> diff --git a/migration/trace-events b/migration/trace-events
> index 4ab0a503d2..f2ed0c8645 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -295,6 +295,7 @@ migration_tls_incoming_handshake_complete(void) ""
> colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
> colo_send_message(const char *msg) "Send '%s' message"
> colo_receive_message(const char *msg) "Receive '%s' message"
> +colo_need_migrate_ram_background(uint64_t pending_size) "Pending 0x%" PRIx64 " dirty ram"
>
> # colo-failover.c
> colo_failover_set_state(const char *new_state) "new state %s"
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 52f3429969..73445f1978 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -977,12 +977,14 @@
> #
> # @vmstate-loaded: VM's state has been loaded by SVM.
> #
> +# @migrate-ram-background: Send some dirty pages during the gap of COLO checkpoint
> +#
> # Since: 2.8
> ##
> { 'enum': 'COLOMessage',
> 'data': [ 'checkpoint-ready', 'checkpoint-request', 'checkpoint-reply',
> 'vmstate-send', 'vmstate-size', 'vmstate-received',
> - 'vmstate-loaded' ] }
> + 'vmstate-loaded', 'migrate-ram-background' ] }
>
> ##
> # @COLOMode:
> --
> 2.21.0
>
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
next prev parent reply other threads:[~2020-03-12 20:04 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-24 6:54 [PATCH V2 0/8] Optimize VM's downtime while do checkpoint in COLO zhanghailiang
2020-02-24 6:54 ` [PATCH V2 1/8] migration: fix COLO broken caused by a previous commit zhanghailiang
2020-02-27 18:36 ` Juan Quintela
2020-02-24 6:54 ` [PATCH V2 2/8] migration/colo: wrap incoming checkpoint process into new helper zhanghailiang
2020-02-24 6:54 ` [PATCH V2 3/8] savevm: Don't call colo_init_ram_cache twice zhanghailiang
2020-02-27 18:37 ` Juan Quintela
2020-02-24 6:54 ` [PATCH V2 4/8] COLO: Optimize memory back-up process zhanghailiang
2020-02-25 2:52 ` Daniel Cho
2020-02-25 3:56 ` Zhanghailiang
2020-03-12 18:44 ` Dr. David Alan Gilbert
2020-02-24 6:54 ` [PATCH V2 5/8] ram/colo: only record bitmap of dirty pages in COLO stage zhanghailiang
2020-03-12 18:55 ` Dr. David Alan Gilbert
2020-02-24 6:54 ` [PATCH V2 6/8] migration: recognize COLO as part of activating process zhanghailiang
2020-03-12 19:42 ` Dr. David Alan Gilbert
2020-02-24 6:54 ` [PATCH V2 7/8] COLO: Migrate dirty pages during the gap of checkpointing zhanghailiang
2020-02-24 15:18 ` Eric Blake
2020-02-25 1:07 ` Zhanghailiang
2020-03-12 19:50 ` Dr. David Alan Gilbert [this message]
2020-02-24 6:54 ` [PATCH V2 8/8] migration/colo: Only flush ram cache while do checkpoint zhanghailiang
2020-03-12 19:51 ` Dr. David Alan Gilbert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200312195052.GP3211@work-vm \
--to=dgilbert@redhat.com \
--cc=danielcho@qnap.com \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
--cc=zhang.zhanghailiang@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.