From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>,
qemu-ppc@nongnu.org, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG)
Date: Thu, 13 Nov 2014 18:12:22 +1100 [thread overview]
Message-ID: <546459D6.1040706@ozlabs.ru> (raw)
In-Reply-To: <1415168221-2324-3-git-send-email-sam.mj@au1.ibm.com>
On 11/05/2014 05:17 PM, Samuel Mendoza-Jonas wrote:
> If a TCG guest reboots during a running migration HTAB entries are not
> marked dirty, and the destination boots with an invalid HTAB.
>
> When a reboot occurs reset the state of HTAB migration, and explicitly
> inform the destination of invalid entries.
>
> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
> ---
> hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++---------------
> include/hw/ppc/spapr.h | 1 +
> 2 files changed, 42 insertions(+), 18 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 1610c28..9f419e8 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>
> shift = kvmppc_reset_htab(spapr->htab_shift);
>
> + pthread_mutex_lock(&spapr->htab_mutex);
> if (shift > 0) {
> /* Kernel handles htab, we don't need to allocate one */
> spapr->htab_shift = shift;
> kvmppc_kern_htab = true;
>
> /* Tell readers to update their file descriptor */
> - pthread_mutex_lock(&spapr->htab_mutex);
> if (spapr->htab_fd > 0) {
> spapr->htab_fd_stale = true;
> }
> - pthread_mutex_unlock(&spapr->htab_mutex);
> } else {
> if (!spapr->htab) {
> /* Allocate an htab if we don't yet have one */
> spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
> + } else {
> + spapr->htab_mig_full = true;
> + spapr->htab_first_pass = true;
> + spapr->htab_save_index = 0;
> }
>
> /* And clear it */
> memset(spapr->htab, 0, HTAB_SIZE(spapr));
> }
> + pthread_mutex_unlock(&spapr->htab_mutex);
The pthread_mutex_(un)lock things from the chunk above should go to the
previous patch.
>
> /* Update the RMA size if necessary */
> if (spapr->vrma_adjust) {
> @@ -1019,6 +1023,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
> pthread_mutex_lock(&spapr->htab_mutex);
> spapr->htab_fd = kvmppc_get_htab_fd(false);
> spapr->htab_fd_stale = false;
> + spapr->htab_mig_full = false;
> pthread_mutex_unlock(&spapr->htab_mutex);
> if (spapr->htab_fd < 0) {
> fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
> @@ -1034,6 +1039,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
> static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
> int64_t max_ns)
> {
> + bool final = max_ns < 0;
> int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
> int index = spapr->htab_save_index;
> int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> @@ -1041,33 +1047,40 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
> assert(spapr->htab_first_pass);
>
> do {
> - int chunkstart;
> + int chunkstart, invalidstart;
>
> - /* Consume invalid HPTEs */
> - while ((index < htabslots)
> - && !HPTE_VALID(HPTE(spapr->htab, index))) {
Nit: in most places in qemu "&&" would be in the end of the previos line :)
> + chunkstart = index;
> + /* Consume valid HPTEs */
> + while ((index < htabslots && (index - chunkstart < USHRT_MAX))
> + && HPTE_VALID(HPTE(spapr->htab, index))) {
> index++;
> CLEAN_HPTE(HPTE(spapr->htab, index));
> }
>
> - /* Consume valid HPTEs */
> - chunkstart = index;
> - while ((index < htabslots)
> - && HPTE_VALID(HPTE(spapr->htab, index))) {
> + invalidstart = index;
> + /* Consume invalid HPTEs */
> + while ((index < htabslots && (index - invalidstart < USHRT_MAX))
> + && !HPTE_VALID(HPTE(spapr->htab, index))) {
> index++;
> CLEAN_HPTE(HPTE(spapr->htab, index));
> }
>
> - if (index > chunkstart) {
> - int n_valid = index - chunkstart;
> + /* Avoid writing an end marker (0,0,0) */
> + if (index > chunkstart
> + && !(chunkstart == invalidstart && !spapr->htab_mig_full)) {
> + int n_valid = invalidstart - chunkstart;
> + /* If a reset has occured we must explicitly overwrite the HTAB
> + * of the destination */
> + int n_invalid = spapr->htab_mig_full ? index - invalidstart : 0;
>
> qemu_put_be32(f, chunkstart);
> qemu_put_be16(f, n_valid);
> - qemu_put_be16(f, 0);
> + qemu_put_be16(f, n_invalid);
> qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
> HASH_PTE_SIZE_64 * n_valid);
>
> - if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
> + if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns
> + && !final) {
> break;
> }
> }
> @@ -1182,10 +1195,14 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
> if (rc < 0) {
> return rc;
> }
> - } else if (spapr->htab_first_pass) {
> - htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
> } else {
> - rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
> + pthread_mutex_lock(&spapr->htab_mutex);
> + if (spapr->htab_first_pass) {
> + htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
> + } else {
> + rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
> + }
> + pthread_mutex_unlock(&spapr->htab_mutex);
> }
>
> /* End marker */
> @@ -1220,7 +1237,13 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
> close(spapr->htab_fd);
> spapr->htab_fd = -1;
> } else {
> - htab_save_later_pass(f, spapr, -1);
> + pthread_mutex_lock(&spapr->htab_mutex);
> + if (spapr->htab_first_pass) {
> + htab_save_first_pass(f, spapr, -1);
> + } else {
> + htab_save_later_pass(f, spapr, -1);
> + }
> + pthread_mutex_unlock(&spapr->htab_mutex);
> }
>
> /* End marker */
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 5e29bec..ee95459 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -38,6 +38,7 @@ typedef struct sPAPREnvironment {
> bool htab_first_pass;
> int htab_fd;
> bool htab_fd_stale;
> + bool htab_mig_full;
Not sure what @htab_mig_full stands for exactly. Please explain it in the
commit log.
> pthread_mutex_t htab_mutex;
> } sPAPREnvironment;
>
>
--
Alexey
prev parent reply other threads:[~2014-11-13 7:12 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-05 6:16 [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration Samuel Mendoza-Jonas
2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas
2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-11-05 21:56 ` Samuel Mendoza-Jonas
2014-11-13 6:59 ` [Qemu-devel] " Alexey Kardashevskiy
2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas
2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-11-05 22:00 ` Samuel Mendoza-Jonas
2014-11-05 22:04 ` Alexander Graf
2014-11-13 7:12 ` Alexey Kardashevskiy [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=546459D6.1040706@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
--cc=sam.mj@au1.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).