* [PATCH v1] migration: refactor migration_completion
@ 2023-07-14 12:48 Wei Wang
2023-07-18 5:44 ` Isaku Yamahata
0 siblings, 1 reply; 7+ messages in thread
From: Wei Wang @ 2023-07-14 12:48 UTC (permalink / raw)
To: quintela, peterx; +Cc: qemu-devel, Wei Wang
Current migration_completion function is a bit long. Refactor the long
implementation into different subfunctions:
- migration_completion_precopy: completion code related to precopy
- migration_completion_postcopy: completion code related to postcopy
- close_return_path_on_source: rp thread related cleanup on migration
completion. It is named to match with open_return_path_on_source.
This improves readability and is easier for future updates (e.g. add new
subfunctions when completion code related to new features are needed). No
functional changes intended.
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
---
migration/migration.c | 181 +++++++++++++++++++++++++-----------------
1 file changed, 106 insertions(+), 75 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index 91bba630a8..83f1c74f99 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2058,6 +2058,21 @@ static int await_return_path_close_on_source(MigrationState *ms)
return ms->rp_state.error;
}
+static int close_return_path_on_source(MigrationState *ms)
+{
+ int ret;
+
+ if (!ms->rp_state.rp_thread_created) {
+ return 0;
+ }
+
+ trace_migration_return_path_end_before();
+ ret = await_return_path_close_on_source(ms);
+ trace_migration_return_path_end_after(ret);
+
+ return ret;
+}
+
static inline void
migration_wait_main_channel(MigrationState *ms)
{
@@ -2288,66 +2303,107 @@ static int migration_maybe_pause(MigrationState *s,
return s->state == new_state ? 0 : -EINVAL;
}
-/**
- * migration_completion: Used by migration_thread when there's not much left.
- * The caller 'breaks' the loop when this returns.
- *
- * @s: Current migration state
- */
-static void migration_completion(MigrationState *s)
+static int migration_completion_precopy(MigrationState *s,
+ int *current_active_state)
{
int ret;
- int current_active_state = s->state;
- if (s->state == MIGRATION_STATUS_ACTIVE) {
- qemu_mutex_lock_iothread();
- s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ qemu_mutex_lock_iothread();
+ s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
- s->vm_old_state = runstate_get();
- global_state_store();
+ s->vm_old_state = runstate_get();
+ global_state_store();
- ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
- trace_migration_completion_vm_stop(ret);
- if (ret >= 0) {
- ret = migration_maybe_pause(s, ¤t_active_state,
- MIGRATION_STATUS_DEVICE);
- }
- if (ret >= 0) {
- /*
- * Inactivate disks except in COLO, and track that we
- * have done so in order to remember to reactivate
- * them if migration fails or is cancelled.
- */
- s->block_inactive = !migrate_colo();
- migration_rate_set(RATE_LIMIT_DISABLED);
- ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
- s->block_inactive);
- }
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ trace_migration_completion_vm_stop(ret);
+ if (ret < 0) {
+ goto out_unlock;
+ }
- qemu_mutex_unlock_iothread();
+ ret = migration_maybe_pause(s, current_active_state,
+ MIGRATION_STATUS_DEVICE);
+ if (ret < 0) {
+ goto out_unlock;
+ }
- if (ret < 0) {
- goto fail;
- }
- } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
- trace_migration_completion_postcopy_end();
+ /*
+ * Inactivate disks except in COLO, and track that we have done so in order
+ * to remember to reactivate them if migration fails or is cancelled.
+ */
+ s->block_inactive = !migrate_colo();
+ migration_rate_set(RATE_LIMIT_DISABLED);
+ ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
+ s->block_inactive);
+out_unlock:
+ qemu_mutex_unlock_iothread();
+ return ret;
+}
- qemu_mutex_lock_iothread();
- qemu_savevm_state_complete_postcopy(s->to_dst_file);
- qemu_mutex_unlock_iothread();
+static int migration_completion_postcopy(MigrationState *s)
+{
+ trace_migration_completion_postcopy_end();
+
+ qemu_mutex_lock_iothread();
+ qemu_savevm_state_complete_postcopy(s->to_dst_file);
+ qemu_mutex_unlock_iothread();
+
+ /*
+ * Shutdown the postcopy fast path thread. This is only needed when dest
+ * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this.
+ */
+ if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
+ postcopy_preempt_shutdown_file(s);
+ }
+
+ trace_migration_completion_postcopy_end_after_complete();
+
+ return 0;
+}
+static void migration_completion_failed(MigrationState *s,
+ int current_active_state)
+{
+ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
+ s->state == MIGRATION_STATUS_DEVICE)) {
/*
- * Shutdown the postcopy fast path thread. This is only needed
- * when dest QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need
- * this.
+ * If not doing postcopy, vm_start() will be called: let's
+ * regain control on images.
*/
- if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
- postcopy_preempt_shutdown_file(s);
+ Error *local_err = NULL;
+
+ qemu_mutex_lock_iothread();
+ bdrv_activate_all(&local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ } else {
+ s->block_inactive = false;
}
+ qemu_mutex_unlock_iothread();
+ }
- trace_migration_completion_postcopy_end_after_complete();
- } else {
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_FAILED);
+}
+
+/**
+ * migration_completion: Used by migration_thread when there's not much left.
+ * The caller 'breaks' the loop when this returns.
+ *
+ * @s: Current migration state
+ */
+static void migration_completion(MigrationState *s)
+{
+ int ret = -1;
+ int current_active_state = s->state;
+
+ if (s->state == MIGRATION_STATUS_ACTIVE) {
+ ret = migration_completion_precopy(s, ¤t_active_state);
+ } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+ ret = migration_completion_postcopy(s);
+ }
+
+ if (ret < 0) {
goto fail;
}
@@ -2357,14 +2413,8 @@ static void migration_completion(MigrationState *s)
* it will wait for the destination to send it's status in
* a SHUT command).
*/
- if (s->rp_state.rp_thread_created) {
- int rp_error;
- trace_migration_return_path_end_before();
- rp_error = await_return_path_close_on_source(s);
- trace_migration_return_path_end_after(rp_error);
- if (rp_error) {
- goto fail;
- }
+ if (close_return_path_on_source(s) < 0) {
+ goto fail;
}
if (qemu_file_get_error(s->to_dst_file)) {
@@ -2384,26 +2434,7 @@ static void migration_completion(MigrationState *s)
return;
fail:
- if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
- s->state == MIGRATION_STATUS_DEVICE)) {
- /*
- * If not doing postcopy, vm_start() will be called: let's
- * regain control on images.
- */
- Error *local_err = NULL;
-
- qemu_mutex_lock_iothread();
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- } else {
- s->block_inactive = false;
- }
- qemu_mutex_unlock_iothread();
- }
-
- migrate_set_state(&s->state, current_active_state,
- MIGRATION_STATUS_FAILED);
+ migration_completion_failed(s, current_active_state);
}
/**
--
2.27.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v1] migration: refactor migration_completion
2023-07-14 12:48 [PATCH v1] migration: refactor migration_completion Wei Wang
@ 2023-07-18 5:44 ` Isaku Yamahata
2023-07-18 13:25 ` Wang, Wei W
0 siblings, 1 reply; 7+ messages in thread
From: Isaku Yamahata @ 2023-07-18 5:44 UTC (permalink / raw)
To: Wei Wang; +Cc: quintela, peterx, qemu-devel, isaku.yamahata
On Fri, Jul 14, 2023 at 08:48:23PM +0800,
Wei Wang <wei.w.wang@intel.com> wrote:
> Current migration_completion function is a bit long. Refactor the long
> implementation into different subfunctions:
> - migration_completion_precopy: completion code related to precopy
> - migration_completion_postcopy: completion code related to postcopy
> - close_return_path_on_source: rp thread related cleanup on migration
> completion. It is named to match with open_return_path_on_source.
>
> This improves readability and is easier for future updates (e.g. add new
> subfunctions when completion code related to new features are needed). No
> functional changes intended.
>
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> ---
> migration/migration.c | 181 +++++++++++++++++++++++++-----------------
> 1 file changed, 106 insertions(+), 75 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 91bba630a8..83f1c74f99 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -2058,6 +2058,21 @@ static int await_return_path_close_on_source(MigrationState *ms)
> return ms->rp_state.error;
> }
>
> +static int close_return_path_on_source(MigrationState *ms)
> +{
> + int ret;
> +
> + if (!ms->rp_state.rp_thread_created) {
> + return 0;
> + }
> +
> + trace_migration_return_path_end_before();
> + ret = await_return_path_close_on_source(ms);
> + trace_migration_return_path_end_after(ret);
> +
> + return ret;
> +}
> +
There is only one caller, migration_completion(). We can consolidate
two functions, await_return_path_close_on_source() and
close_return_path_on_source(), into single function.
> static inline void
> migration_wait_main_channel(MigrationState *ms)
> {
> @@ -2288,66 +2303,107 @@ static int migration_maybe_pause(MigrationState *s,
> return s->state == new_state ? 0 : -EINVAL;
> }
>
> -/**
> - * migration_completion: Used by migration_thread when there's not much left.
> - * The caller 'breaks' the loop when this returns.
> - *
> - * @s: Current migration state
> - */
> -static void migration_completion(MigrationState *s)
> +static int migration_completion_precopy(MigrationState *s,
> + int *current_active_state)
> {
> int ret;
> - int current_active_state = s->state;
>
> - if (s->state == MIGRATION_STATUS_ACTIVE) {
> - qemu_mutex_lock_iothread();
> - s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
> + qemu_mutex_lock_iothread();
> + s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
>
> - s->vm_old_state = runstate_get();
> - global_state_store();
> + s->vm_old_state = runstate_get();
> + global_state_store();
>
> - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
> - trace_migration_completion_vm_stop(ret);
> - if (ret >= 0) {
> - ret = migration_maybe_pause(s, ¤t_active_state,
> - MIGRATION_STATUS_DEVICE);
> - }
> - if (ret >= 0) {
> - /*
> - * Inactivate disks except in COLO, and track that we
> - * have done so in order to remember to reactivate
> - * them if migration fails or is cancelled.
> - */
> - s->block_inactive = !migrate_colo();
> - migration_rate_set(RATE_LIMIT_DISABLED);
> - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
> - s->block_inactive);
> - }
> + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
> + trace_migration_completion_vm_stop(ret);
> + if (ret < 0) {
> + goto out_unlock;
> + }
>
> - qemu_mutex_unlock_iothread();
> + ret = migration_maybe_pause(s, current_active_state,
> + MIGRATION_STATUS_DEVICE);
> + if (ret < 0) {
> + goto out_unlock;
> + }
>
> - if (ret < 0) {
> - goto fail;
> - }
> - } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> - trace_migration_completion_postcopy_end();
> + /*
> + * Inactivate disks except in COLO, and track that we have done so in order
> + * to remember to reactivate them if migration fails or is cancelled.
> + */
> + s->block_inactive = !migrate_colo();
> + migration_rate_set(RATE_LIMIT_DISABLED);
> + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
> + s->block_inactive);
> +out_unlock:
> + qemu_mutex_unlock_iothread();
> + return ret;
> +}
>
> - qemu_mutex_lock_iothread();
> - qemu_savevm_state_complete_postcopy(s->to_dst_file);
> - qemu_mutex_unlock_iothread();
> +static int migration_completion_postcopy(MigrationState *s)
> +{
> + trace_migration_completion_postcopy_end();
> +
> + qemu_mutex_lock_iothread();
> + qemu_savevm_state_complete_postcopy(s->to_dst_file);
> + qemu_mutex_unlock_iothread();
> +
> + /*
> + * Shutdown the postcopy fast path thread. This is only needed when dest
> + * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this.
> + */
> + if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
> + postcopy_preempt_shutdown_file(s);
> + }
> +
> + trace_migration_completion_postcopy_end_after_complete();
> +
> + return 0;
Always return 0? Make it void.
> +}
>
> +static void migration_completion_failed(MigrationState *s,
> + int current_active_state)
> +{
> + if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
> + s->state == MIGRATION_STATUS_DEVICE)) {
> /*
> - * Shutdown the postcopy fast path thread. This is only needed
> - * when dest QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need
> - * this.
> + * If not doing postcopy, vm_start() will be called: let's
> + * regain control on images.
> */
> - if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
> - postcopy_preempt_shutdown_file(s);
> + Error *local_err = NULL;
> +
> + qemu_mutex_lock_iothread();
> + bdrv_activate_all(&local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + } else {
> + s->block_inactive = false;
> }
> + qemu_mutex_unlock_iothread();
> + }
>
> - trace_migration_completion_postcopy_end_after_complete();
> - } else {
> + migrate_set_state(&s->state, current_active_state,
> + MIGRATION_STATUS_FAILED);
> +}
> +
> +/**
> + * migration_completion: Used by migration_thread when there's not much left.
> + * The caller 'breaks' the loop when this returns.
> + *
> + * @s: Current migration state
> + */
> +static void migration_completion(MigrationState *s)
> +{
> + int ret = -1;
> + int current_active_state = s->state;
> +
> + if (s->state == MIGRATION_STATUS_ACTIVE) {
> + ret = migration_completion_precopy(s, ¤t_active_state);
> + } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> + ret = migration_completion_postcopy(s);
Here we can set ret = 0.
> + }
> +
> + if (ret < 0) {
> goto fail;
> }
>
> @@ -2357,14 +2413,8 @@ static void migration_completion(MigrationState *s)
> * it will wait for the destination to send it's status in
> * a SHUT command).
> */
> - if (s->rp_state.rp_thread_created) {
> - int rp_error;
> - trace_migration_return_path_end_before();
> - rp_error = await_return_path_close_on_source(s);
> - trace_migration_return_path_end_after(rp_error);
> - if (rp_error) {
> - goto fail;
> - }
> + if (close_return_path_on_source(s) < 0) {
> + goto fail;
> }
>
> if (qemu_file_get_error(s->to_dst_file)) {
> @@ -2384,26 +2434,7 @@ static void migration_completion(MigrationState *s)
> return;
>
> fail:
> - if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
> - s->state == MIGRATION_STATUS_DEVICE)) {
> - /*
> - * If not doing postcopy, vm_start() will be called: let's
> - * regain control on images.
> - */
> - Error *local_err = NULL;
> -
> - qemu_mutex_lock_iothread();
> - bdrv_activate_all(&local_err);
> - if (local_err) {
> - error_report_err(local_err);
> - } else {
> - s->block_inactive = false;
> - }
> - qemu_mutex_unlock_iothread();
> - }
> -
> - migrate_set_state(&s->state, current_active_state,
> - MIGRATION_STATUS_FAILED);
> + migration_completion_failed(s, current_active_state);
> }
>
> /**
> --
> 2.27.0
>
>
--
Isaku Yamahata <isaku.yamahata@gmail.com>
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH v1] migration: refactor migration_completion
2023-07-18 5:44 ` Isaku Yamahata
@ 2023-07-18 13:25 ` Wang, Wei W
2023-07-20 20:38 ` Peter Xu
0 siblings, 1 reply; 7+ messages in thread
From: Wang, Wei W @ 2023-07-18 13:25 UTC (permalink / raw)
To: Isaku Yamahata
Cc: quintela@redhat.com, peterx@redhat.com, qemu-devel@nongnu.org
On Tuesday, July 18, 2023 1:44 PM, Isaku Yamahata wrote:
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -2058,6 +2058,21 @@ static int
> await_return_path_close_on_source(MigrationState *ms)
> > return ms->rp_state.error;
> > }
> >
> > +static int close_return_path_on_source(MigrationState *ms) {
> > + int ret;
> > +
> > + if (!ms->rp_state.rp_thread_created) {
> > + return 0;
> > + }
> > +
> > + trace_migration_return_path_end_before();
> > + ret = await_return_path_close_on_source(ms);
> > + trace_migration_return_path_end_after(ret);
> > +
> > + return ret;
> > +}
> > +
>
> There is only one caller, migration_completion(). We can consolidate two
> functions, await_return_path_close_on_source() and
> close_return_path_on_source(), into single function.
Sounds good, thanks.
> > +static int migration_completion_postcopy(MigrationState *s) {
> > + trace_migration_completion_postcopy_end();
> > +
> > + qemu_mutex_lock_iothread();
> > + qemu_savevm_state_complete_postcopy(s->to_dst_file);
> > + qemu_mutex_unlock_iothread();
> > +
> > + /*
> > + * Shutdown the postcopy fast path thread. This is only needed when
> dest
> > + * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this.
> > + */
> > + if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
> > + postcopy_preempt_shutdown_file(s);
> > + }
> > +
> > + trace_migration_completion_postcopy_end_after_complete();
> > +
> > + return 0;
>
> Always return 0? Make it void.
OK.
> > +static void migration_completion(MigrationState *s) {
> > + int ret = -1;
> > + int current_active_state = s->state;
> > +
> > + if (s->state == MIGRATION_STATUS_ACTIVE) {
> > + ret = migration_completion_precopy(s, ¤t_active_state);
> > + } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> > + ret = migration_completion_postcopy(s);
>
> Here we can set ret = 0.
Yes, after migration_completion_postcopy is made "void".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v1] migration: refactor migration_completion
2023-07-18 13:25 ` Wang, Wei W
@ 2023-07-20 20:38 ` Peter Xu
2023-07-21 11:14 ` Wang, Wei W
0 siblings, 1 reply; 7+ messages in thread
From: Peter Xu @ 2023-07-20 20:38 UTC (permalink / raw)
To: Wang, Wei W; +Cc: Isaku Yamahata, quintela@redhat.com, qemu-devel@nongnu.org
On Tue, Jul 18, 2023 at 01:25:12PM +0000, Wang, Wei W wrote:
> On Tuesday, July 18, 2023 1:44 PM, Isaku Yamahata wrote:
> > > --- a/migration/migration.c
> > > +++ b/migration/migration.c
> > > @@ -2058,6 +2058,21 @@ static int
> > await_return_path_close_on_source(MigrationState *ms)
> > > return ms->rp_state.error;
> > > }
> > >
> > > +static int close_return_path_on_source(MigrationState *ms) {
> > > + int ret;
> > > +
> > > + if (!ms->rp_state.rp_thread_created) {
> > > + return 0;
> > > + }
> > > +
> > > + trace_migration_return_path_end_before();
> > > + ret = await_return_path_close_on_source(ms);
> > > + trace_migration_return_path_end_after(ret);
> > > +
> > > + return ret;
> > > +}
> > > +
> >
> > There is only one caller, migration_completion(). We can consolidate two
> > functions, await_return_path_close_on_source() and
> > close_return_path_on_source(), into single function.
>
> Sounds good, thanks.
>
> > > +static int migration_completion_postcopy(MigrationState *s) {
> > > + trace_migration_completion_postcopy_end();
> > > +
> > > + qemu_mutex_lock_iothread();
> > > + qemu_savevm_state_complete_postcopy(s->to_dst_file);
> > > + qemu_mutex_unlock_iothread();
> > > +
> > > + /*
> > > + * Shutdown the postcopy fast path thread. This is only needed when
> > dest
> > > + * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this.
> > > + */
> > > + if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
> > > + postcopy_preempt_shutdown_file(s);
> > > + }
> > > +
> > > + trace_migration_completion_postcopy_end_after_complete();
> > > +
> > > + return 0;
> >
> > Always return 0? Make it void.
>
> OK.
>
> > > +static void migration_completion(MigrationState *s) {
> > > + int ret = -1;
> > > + int current_active_state = s->state;
> > > +
> > > + if (s->state == MIGRATION_STATUS_ACTIVE) {
> > > + ret = migration_completion_precopy(s, ¤t_active_state);
> > > + } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> > > + ret = migration_completion_postcopy(s);
> >
> > Here we can set ret = 0.
>
> Yes, after migration_completion_postcopy is made "void".
Looks good to me, after addressing Isaku's comments.
The current_active_state is very unfortunate, along with most of the calls
to migrate_set_state() - I bet most of the code will definitely go wrong if
that cmpxchg didn't succeed inside of migrate_set_state(), IOW in most
cases we simply always want:
migrate_set_state(&s->state, s->state, XXX);
Not sure whether one pre-requisite patch is good to have so we can rename
migrate_set_state() to something like __migrate_set_state(), then:
migrate_set_state(s, XXX) {
__migrate_set_state(&s->state, s->state, XXX);
}
I don't even know whether there's any call site that will need
__migrate_set_state() for real..
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH v1] migration: refactor migration_completion
2023-07-20 20:38 ` Peter Xu
@ 2023-07-21 11:14 ` Wang, Wei W
2023-07-26 17:10 ` Peter Xu
0 siblings, 1 reply; 7+ messages in thread
From: Wang, Wei W @ 2023-07-21 11:14 UTC (permalink / raw)
To: Peter Xu; +Cc: Isaku Yamahata, quintela@redhat.com, qemu-devel@nongnu.org
On Friday, July 21, 2023 4:38 AM, Peter Xu wrote:
> Looks good to me, after addressing Isaku's comments.
>
> The current_active_state is very unfortunate, along with most of the calls to
> migrate_set_state() - I bet most of the code will definitely go wrong if that
> cmpxchg didn't succeed inside of migrate_set_state(), IOW in most cases we
> simply always want:
Can you share examples where it could be wrong?
(If it has bugs, we need to fix)
>
> migrate_set_state(&s->state, s->state, XXX);
>
> Not sure whether one pre-requisite patch is good to have so we can rename
> migrate_set_state() to something like __migrate_set_state(), then:
>
> migrate_set_state(s, XXX) {
> __migrate_set_state(&s->state, s->state, XXX);
> }
>
> I don't even know whether there's any call site that will need
> __migrate_set_state() for real..
>
Seems this would break the use of "MIGRATION_STATUS_CANCELLING".
For example,
- In migration_maybe_pause:
migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
new_state);
If the current s->state isn't MIGRATION_STATUS_PRE_SWITCHOVER (could
be MIGRATION_STATUS_CANCELLING), then s->state won’t be updated to
new_state.
- Then, in migration_completion, the following update to s->state won't succeed:
migrate_set_state(&s->state, current_active_state,
MIGRATION_STATUS_COMPLETED);
- Finally, when reaching migration_iteration_finish(), s->state is
MIGRATION_STATUS_CANCELLING, instead of MIGRATION_STATUS_COMPLETED.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v1] migration: refactor migration_completion
2023-07-21 11:14 ` Wang, Wei W
@ 2023-07-26 17:10 ` Peter Xu
2023-07-27 14:52 ` Wang, Wei W
0 siblings, 1 reply; 7+ messages in thread
From: Peter Xu @ 2023-07-26 17:10 UTC (permalink / raw)
To: Wang, Wei W; +Cc: Isaku Yamahata, quintela@redhat.com, qemu-devel@nongnu.org
On Fri, Jul 21, 2023 at 11:14:55AM +0000, Wang, Wei W wrote:
> On Friday, July 21, 2023 4:38 AM, Peter Xu wrote:
> > Looks good to me, after addressing Isaku's comments.
> >
> > The current_active_state is very unfortunate, along with most of the calls to
> > migrate_set_state() - I bet most of the code will definitely go wrong if that
> > cmpxchg didn't succeed inside of migrate_set_state(), IOW in most cases we
> > simply always want:
>
> Can you share examples where it could be wrong?
> (If it has bugs, we need to fix)
Nop. What I meant is most of the cases we want to set the state without
caring much about the old state, so at least we can have a helper like
below and simply call migrate_set_state(s, STATE) where we don't care old
state.
>
> >
> > migrate_set_state(&s->state, s->state, XXX);
> >
> > Not sure whether one pre-requisite patch is good to have so we can rename
> > migrate_set_state() to something like __migrate_set_state(), then:
> >
> > migrate_set_state(s, XXX) {
> > __migrate_set_state(&s->state, s->state, XXX);
> > }
> >
> > I don't even know whether there's any call site that will need
> > __migrate_set_state() for real..
> >
>
> Seems this would break the use of "MIGRATION_STATUS_CANCELLING".
> For example,
> - In migration_maybe_pause:
> migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
> new_state);
> If the current s->state isn't MIGRATION_STATUS_PRE_SWITCHOVER (could
> be MIGRATION_STATUS_CANCELLING), then s->state won’t be updated to
> new_state.
> - Then, in migration_completion, the following update to s->state won't succeed:
> migrate_set_state(&s->state, current_active_state,
> MIGRATION_STATUS_COMPLETED);
>
> - Finally, when reaching migration_iteration_finish(), s->state is
> MIGRATION_STATUS_CANCELLING, instead of MIGRATION_STATUS_COMPLETED.
The whole state changes are just flaky to me in general, even with the help
of old_state cmpxchg.
E.g., I'm wondering whether below race can happen, assuming we're starting
with ACTIVE state and just about to complete migration:
main thread migration thread
----------- ----------------
migration_maybe_pause(current_active_state==ACTIVE)
if (s->state != MIGRATION_STATUS_CANCELLING)
--> true, keep setting state
qemu_mutex_unlock_iothread();
qemu_mutex_lock_iothread();
migrate_fd_cancel()
if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER)
--> false, not posting to pause_sem
set state to MIGRATION_STATUS_CANCELLING
migrate_set_state(&s->state, *current_active_state,
MIGRATION_STATUS_PRE_SWITCHOVER);
--> false, cmpxchg fail
qemu_sem_wait(&s->pause_sem);
--> hang death?
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH v1] migration: refactor migration_completion
2023-07-26 17:10 ` Peter Xu
@ 2023-07-27 14:52 ` Wang, Wei W
0 siblings, 0 replies; 7+ messages in thread
From: Wang, Wei W @ 2023-07-27 14:52 UTC (permalink / raw)
To: Peter Xu; +Cc: Isaku Yamahata, quintela@redhat.com, qemu-devel@nongnu.org
On Thursday, July 27, 2023 1:10 AM, Peter Xu wrote:
> On Fri, Jul 21, 2023 at 11:14:55AM +0000, Wang, Wei W wrote:
> > On Friday, July 21, 2023 4:38 AM, Peter Xu wrote:
> > > Looks good to me, after addressing Isaku's comments.
> > >
> > > The current_active_state is very unfortunate, along with most of the
> > > calls to
> > > migrate_set_state() - I bet most of the code will definitely go
> > > wrong if that cmpxchg didn't succeed inside of migrate_set_state(),
> > > IOW in most cases we simply always want:
> >
> > Can you share examples where it could be wrong?
> > (If it has bugs, we need to fix)
>
> Nop. What I meant is most of the cases we want to set the state without
> caring much about the old state, so at least we can have a helper like below
> and simply call migrate_set_state(s, STATE) where we don't care old state.
>
> >
> > >
> > > migrate_set_state(&s->state, s->state, XXX);
> > >
> > > Not sure whether one pre-requisite patch is good to have so we can
> > > rename
> > > migrate_set_state() to something like __migrate_set_state(), then:
> > >
> > > migrate_set_state(s, XXX) {
> > > __migrate_set_state(&s->state, s->state, XXX);
> > > }
> > >
> > > I don't even know whether there's any call site that will need
> > > __migrate_set_state() for real..
> > >
> >
> > Seems this would break the use of "MIGRATION_STATUS_CANCELLING".
> > For example,
> > - In migration_maybe_pause:
> > migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
> > new_state); If the current
> > s->state isn't MIGRATION_STATUS_PRE_SWITCHOVER (could be
> > MIGRATION_STATUS_CANCELLING), then s->state won’t be updated to
> > new_state.
> > - Then, in migration_completion, the following update to s->state won't
> succeed:
> > migrate_set_state(&s->state, current_active_state,
> > MIGRATION_STATUS_COMPLETED);
> >
> > - Finally, when reaching migration_iteration_finish(), s->state is
> > MIGRATION_STATUS_CANCELLING, instead of
> MIGRATION_STATUS_COMPLETED.
>
> The whole state changes are just flaky to me in general, even with the help of
> old_state cmpxchg.
Yes, the design/implementation of the migration state transition can be
improved (it looks fragile to me). I think this should be done in a separate
patchset, though. For this patch, we could keep it no functional change.
>
> E.g., I'm wondering whether below race can happen, assuming we're starting
> with ACTIVE state and just about to complete migration:
>
> main thread migration thread
> ----------- ----------------
>
> migration_maybe_pause(current_active_state==ACTIVE)
> if (s->state != MIGRATION_STATUS_CANCELLING)
> --> true, keep setting state
> qemu_mutex_unlock_iothread();
> qemu_mutex_lock_iothread();
> migrate_fd_cancel()
> if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER)
> --> false, not posting to pause_sem
> set state to MIGRATION_STATUS_CANCELLING
> migrate_set_state(&s->state, *current_active_state,
> MIGRATION_STATUS_PRE_SWITCHOVER);
> --> false, cmpxchg fail
> qemu_sem_wait(&s->pause_sem);
> --> hang death?
Still need "migrate continue" to unblock the migration thread.
Probably we should document that PAUSE_BEFORE_SWITCHOVER always requires an
explicit "migrate continue" to be issued from user (even after migration is cancelled).
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-07-27 15:10 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-14 12:48 [PATCH v1] migration: refactor migration_completion Wei Wang
2023-07-18 5:44 ` Isaku Yamahata
2023-07-18 13:25 ` Wang, Wei W
2023-07-20 20:38 ` Peter Xu
2023-07-21 11:14 ` Wang, Wei W
2023-07-26 17:10 ` Peter Xu
2023-07-27 14:52 ` Wang, Wei W
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).