All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Yury Kotov <yury-kotov@yandex-team.ru>
Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>,
	Stefan Weil <sw@weilnetz.de>, Juan Quintela <quintela@redhat.com>,
	"open list:Overall" <qemu-devel@nongnu.org>,
	"yc-core@yandex-team.ru" <yc-core@yandex-team.ru>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <rth@twiddle.net>
Subject: Re: [Qemu-devel] [PATCH v3 3/3] tests/migration: Add a test for auto converge
Date: Tue, 23 Jul 2019 11:23:53 +0100	[thread overview]
Message-ID: <20190723102353.GG2719@work-vm> (raw)
In-Reply-To: <483241563872053@myt5-bd00a25f9194.qloud-c.yandex.net>

* Yury Kotov (yury-kotov@yandex-team.ru) wrote:
> 22.07.2019, 20:35, "Dr. David Alan Gilbert" <dgilbert@redhat.com>:
> > * Yury Kotov (yury-kotov@yandex-team.ru) wrote:
> >>  Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
> >
> > This looks OK to me, but have you tried it on a really really overloaded
> > host?
> > I worry that you might skip some of the percentage steps or not hit the
> > bandwidth on the small overloaded VMs we get in CI.
> 
> No, I haven't tried. I saw patchew's logs and you're absolutely right - such
> problem exists. Now I'm looking for a way to fix it.

Yes, we've hit that type of thing before; make sure it's happy with 1
CPU that's already got 2 or 3 other things on it.

But as much as possible try and make it survive if it's really slow.

Dave

> >
> > Dave
> >
> >>  ---
> >>   tests/migration-test.c | 119 +++++++++++++++++++++++++++++++++++++----
> >>   1 file changed, 108 insertions(+), 11 deletions(-)
> >>
> >>  diff --git a/tests/migration-test.c b/tests/migration-test.c
> >>  index a4feb9545d..bb69517fc8 100644
> >>  --- a/tests/migration-test.c
> >>  +++ b/tests/migration-test.c
> >>  @@ -241,6 +241,17 @@ static int64_t read_ram_property_int(QTestState *who, const char *property)
> >>       return result;
> >>   }
> >>
> >>  +static int64_t read_migrate_property_int(QTestState *who, const char *property)
> >>  +{
> >>  + QDict *rsp_return;
> >>  + int64_t result;
> >>  +
> >>  + rsp_return = migrate_query(who);
> >>  + result = qdict_get_try_int(rsp_return, property, 0);
> >>  + qobject_unref(rsp_return);
> >>  + return result;
> >>  +}
> >>  +
> >>   static uint64_t get_migration_pass(QTestState *who)
> >>   {
> >>       return read_ram_property_int(who, "dirty-sync-count");
> >>  @@ -255,20 +266,22 @@ static void read_blocktime(QTestState *who)
> >>       qobject_unref(rsp_return);
> >>   }
> >>
> >>  +static bool check_migration_status(QTestState *who, const char *status)
> >>  +{
> >>  + bool completed;
> >>  + char *current_status;
> >>  +
> >>  + current_status = migrate_query_status(who);
> >>  + completed = strcmp(current_status, status) == 0;
> >>  + g_assert_cmpstr(current_status, !=, "failed");
> >>  + g_free(current_status);
> >>  + return completed;
> >>  +}
> >>  +
> >>   static void wait_for_migration_status(QTestState *who,
> >>                                         const char *goal)
> >>   {
> >>  - while (true) {
> >>  - bool completed;
> >>  - char *status;
> >>  -
> >>  - status = migrate_query_status(who);
> >>  - completed = strcmp(status, goal) == 0;
> >>  - g_assert_cmpstr(status, !=, "failed");
> >>  - g_free(status);
> >>  - if (completed) {
> >>  - return;
> >>  - }
> >>  + while (!check_migration_status(who, goal)) {
> >>           usleep(1000);
> >>       }
> >>   }
> >>  @@ -1121,6 +1134,89 @@ static void test_migrate_fd_proto(void)
> >>       test_migrate_end(from, to, true);
> >>   }
> >>
> >>  +static void test_migrate_auto_converge(void)
> >>  +{
> >>  + char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
> >>  + QTestState *from, *to;
> >>  + int i;
> >>  + int64_t remaining, downtime;
> >>  +
> >>  + /*
> >>  + * We want the test to be fast enough, but stable.
> >>  + * Throttle percentages are chosen to cover all cases (init, increment, max)
> >>  + */
> >>  + static const int64_t expected_pcts[] = { 0, 1, 51, 98 };
> >>  + const int64_t max_bandwidth = 200000000; /* ~200Mb/s */
> >>  + const int64_t downtime_limit = 50; /* 50ms */
> >>  + /*
> >>  + * We migrate through unix-socket (> 500Mb/s).
> >>  + * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
> >>  + * So, we can predict expected_threshold
> >>  + */
> >>  + const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
> >>  +
> >>  + if (test_migrate_start(&from, &to, uri, false, false)) {
> >>  + return;
> >>  + }
> >>  +
> >>  + migrate_set_capability(from, "auto-converge", true);
> >>  + migrate_set_parameter_int(from, "cpu-throttle-initial", expected_pcts[1]);
> >>  + migrate_set_parameter_int(from, "cpu-throttle-increment",
> >>  + expected_pcts[2] - expected_pcts[1]);
> >>  + migrate_set_parameter_int(from, "max-cpu-throttle", expected_pcts[3]);
> >>  +
> >>  + migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth);
> >>  + migrate_set_parameter_int(from, "downtime-limit", downtime_limit);
> >>  +
> >>  + /* To check remaining size after precopy */
> >>  + migrate_set_capability(from, "pause-before-switchover", true);
> >>  +
> >>  + /* Wait for the first serial output from the source */
> >>  + wait_for_serial("src_serial");
> >>  +
> >>  + migrate(from, uri, "{}");
> >>  +
> >>  + for (i = 0; i < ARRAY_SIZE(expected_pcts); i++) {
> >>  + int64_t pct;
> >>  + pct = read_migrate_property_int(from, "cpu-throttle-percentage");
> >>  + g_assert_cmpint(pct, ==, expected_pcts[i]);
> >>  + while (pct == expected_pcts[i] && !got_stop) {
> >>  + usleep(1000);
> >>  + pct = read_migrate_property_int(from, "cpu-throttle-percentage");
> >>  + }
> >>  + /* We break out of this loop only in paused state */
> >>  + if (got_stop || i + 1 == ARRAY_SIZE(expected_pcts)) {
> >>  + /* Check unexpected throttle percentage change */
> >>  + g_assert_true(got_stop);
> >>  + /* Check unexpected converge */
> >>  + g_assert_cmpint(i + 1, ==, ARRAY_SIZE(expected_pcts));
> >>  + g_assert_true(check_migration_status(from, "pre-switchover"));
> >>  + }
> >>  + }
> >>  +
> >>  + remaining = read_ram_property_int(from, "remaining");
> >>  + g_assert_cmpint(remaining, <, expected_threshold);
> >>  +
> >>  + wait_command(from, "{ 'execute': 'migrate-continue' , 'arguments':"
> >>  + " { 'state': 'pre-switchover' } }");
> >>  +
> >>  + qtest_qmp_eventwait(to, "RESUME");
> >>  +
> >>  + wait_for_serial("dest_serial");
> >>  + wait_for_migration_complete(from);
> >>  +
> >>  + downtime = read_migrate_property_int(from, "downtime");
> >>  + /*
> >>  + * Actual downtime may be greater than downtime limit,
> >>  + * but the difference should be small enough (~20ms)
> >>  + */
> >>  + g_assert_cmpint(downtime, <, downtime_limit + 20);
> >>  +
> >>  + g_free(uri);
> >>  +
> >>  + test_migrate_end(from, to, true);
> >>  +}
> >>  +
> >>   int main(int argc, char **argv)
> >>   {
> >>       char template[] = "/tmp/migration-test-XXXXXX";
> >>  @@ -1176,6 +1272,7 @@ int main(int argc, char **argv)
> >>       /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */
> >>       qtest_add_func("/migration/xbzrle/unix", test_xbzrle_unix);
> >>       qtest_add_func("/migration/fd_proto", test_migrate_fd_proto);
> >>  + qtest_add_func("/migration/auto_converge", test_migrate_auto_converge);
> >>
> >>       ret = g_test_run();
> >>
> >>  --
> >>  2.22.0
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 
> Regards,
> Yury
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


  reply	other threads:[~2019-07-23 10:24 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-18  9:17 [Qemu-devel] [PATCH v3 0/3] High downtime with 95+ throttle pct Yury Kotov
2019-07-18  9:17 ` [Qemu-devel] [PATCH v3 1/3] qemu-thread: Add qemu_cond_timedwait Yury Kotov
2019-07-18  9:17 ` [Qemu-devel] [PATCH v3 2/3] cpus: Fix throttling during vm_stop Yury Kotov
2019-07-18  9:17 ` [Qemu-devel] [PATCH v3 3/3] tests/migration: Add a test for auto converge Yury Kotov
2019-07-22 17:35   ` Dr. David Alan Gilbert
2019-07-23  8:54     ` Yury Kotov
2019-07-23 10:23       ` Dr. David Alan Gilbert [this message]
2019-07-18 15:33 ` [Qemu-devel] [PATCH v3 0/3] High downtime with 95+ throttle pct no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190723102353.GG2719@work-vm \
    --to=dgilbert@redhat.com \
    --cc=crosthwaite.peter@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=rth@twiddle.net \
    --cc=sw@weilnetz.de \
    --cc=yc-core@yandex-team.ru \
    --cc=yury-kotov@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.