* [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration @ 2014-11-05 6:16 Samuel Mendoza-Jonas 2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas 2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas 0 siblings, 2 replies; 10+ messages in thread From: Samuel Mendoza-Jonas @ 2014-11-05 6:16 UTC (permalink / raw) To: qemu-ppc, qemu-devel; +Cc: aik, Samuel Mendoza-Jonas If a spapr guest reboots during a live migration, the guest HTAB on the destination is not updated properly, usually resulting in a kernel panic. This is a (delayed!) follow up to my previous patch including a fix for TCG guests as well as KVM. Samuel Mendoza-Jonas (2): spapr: Fix stale HTAB during live migration (KVM) spapr: Fix stale HTAB during live migration (TCG) hw/ppc/spapr.c | 102 +++++++++++++++++++++++++++++++++++++++++-------- include/hw/ppc/spapr.h | 3 ++ 2 files changed, 89 insertions(+), 16 deletions(-) -- 1.9.3 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) 2014-11-05 6:16 [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration Samuel Mendoza-Jonas @ 2014-11-05 6:17 ` Samuel Mendoza-Jonas 2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf 2014-11-13 6:59 ` [Qemu-devel] " Alexey Kardashevskiy 2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas 1 sibling, 2 replies; 10+ messages in thread From: Samuel Mendoza-Jonas @ 2014-11-05 6:17 UTC (permalink / raw) To: qemu-ppc, qemu-devel; +Cc: aik, Samuel Mendoza-Jonas If a guest reboots during a running migration, changes to the hash page table are not necessarily updated on the destination. Opening a new file descriptor to the HTAB forces the migration handler to resend the entire table. Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> --- hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr.h | 2 ++ 2 files changed, 49 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0a2bfe6..1610c28 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) /* Kernel handles htab, we don't need to allocate one */ spapr->htab_shift = shift; kvmppc_kern_htab = true; + + /* Tell readers to update their file descriptor */ + pthread_mutex_lock(&spapr->htab_mutex); + if (spapr->htab_fd > 0) { + spapr->htab_fd_stale = true; + } + pthread_mutex_unlock(&spapr->htab_mutex); } else { if (!spapr->htab) { /* Allocate an htab if we don't yet have one */ @@ -850,6 +857,31 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) } } +/* A guest reset will cause spapr->htab_fd to become stale if being used. + * Reopen the file descriptor to make sure the whole HTAB is properly read. + */ +static int spapr_check_htab_fd(sPAPREnvironment *spapr) +{ + int rc = 0; + + pthread_mutex_lock(&spapr->htab_mutex); + + if (spapr->htab_fd_stale) { + close(spapr->htab_fd); + spapr->htab_fd = kvmppc_get_htab_fd(false); + if (spapr->htab_fd < 0) { + error_report("Unable to open fd for reading hash table from KVM: " + "%s", strerror(errno)); + rc = -1; + } + spapr->htab_fd_stale = false; + } + + pthread_mutex_unlock(&spapr->htab_mutex); + return rc; +} + + static void ppc_spapr_reset(void) { PowerPCCPU *first_ppc_cpu; @@ -984,7 +1016,10 @@ static int htab_save_setup(QEMUFile *f, void *opaque) } else { assert(kvm_enabled()); + pthread_mutex_lock(&spapr->htab_mutex); spapr->htab_fd = kvmppc_get_htab_fd(false); + spapr->htab_fd_stale = false; + pthread_mutex_unlock(&spapr->htab_mutex); if (spapr->htab_fd < 0) { fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n", strerror(errno)); @@ -1137,6 +1172,11 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) if (!spapr->htab) { assert(kvm_enabled()); + rc = spapr_check_htab_fd(spapr); + if (rc < 0) { + return rc; + } + rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS); if (rc < 0) { @@ -1168,6 +1208,11 @@ static int htab_save_complete(QEMUFile *f, void *opaque) assert(kvm_enabled()); + rc = spapr_check_htab_fd(spapr); + if (rc < 0) { + return rc; + } + rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1); if (rc < 0) { return rc; @@ -1355,6 +1400,8 @@ static void ppc_spapr_init(MachineState *machine) spapr->htab_shift++; } + pthread_mutex_init(&spapr->htab_mutex, NULL); + /* Set up Interrupt Controller before we create the VCPUs */ spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads, XICS_IRQS); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 749daf4..5e29bec 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -37,6 +37,8 @@ typedef struct sPAPREnvironment { int htab_save_index; bool htab_first_pass; int htab_fd; + bool htab_fd_stale; + pthread_mutex_t htab_mutex; } sPAPREnvironment; #define H_SUCCESS 0 -- 1.9.3 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) 2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas @ 2014-11-05 7:57 ` Alexander Graf 2014-11-05 21:56 ` Samuel Mendoza-Jonas 2014-11-13 6:59 ` [Qemu-devel] " Alexey Kardashevskiy 1 sibling, 1 reply; 10+ messages in thread From: Alexander Graf @ 2014-11-05 7:57 UTC (permalink / raw) To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel On 05.11.14 07:17, Samuel Mendoza-Jonas wrote: > If a guest reboots during a running migration, changes to the > hash page table are not necessarily updated on the destination. > Opening a new file descriptor to the HTAB forces the migration > handler to resend the entire table. > > Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> > --- > hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ > include/hw/ppc/spapr.h | 2 ++ > 2 files changed, 49 insertions(+) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 0a2bfe6..1610c28 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) > /* Kernel handles htab, we don't need to allocate one */ > spapr->htab_shift = shift; > kvmppc_kern_htab = true; > + > + /* Tell readers to update their file descriptor */ > + pthread_mutex_lock(&spapr->htab_mutex); I don't think you can directly use pthread functions in hw/. These files could be compiled on Windows which doesn't have pthread. Instead, please use the QEMU wrappers from util/qemu-thread-posix.c. Or maybe try and find out whether you actually do need the lock. Reboots can only happen when triggered via a HCALL which takes the BQL. I don't quite know how much the migration code became threaded, but I'd assume that at least device migration would happen under the BQL or after stopping the VM, but in a consistent place. So as long as we're guaranteed that the htab_fd_stale variable is set at the final "send all device contents" phase, we should automatically catch any reset that happened in between - even without a lock, no? Alex ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) 2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf @ 2014-11-05 21:56 ` Samuel Mendoza-Jonas 0 siblings, 0 replies; 10+ messages in thread From: Samuel Mendoza-Jonas @ 2014-11-05 21:56 UTC (permalink / raw) To: Alexander Graf, qemu-ppc, qemu-devel On 05/11/14 18:57, Alexander Graf wrote: > > > On 05.11.14 07:17, Samuel Mendoza-Jonas wrote: >> If a guest reboots during a running migration, changes to the >> hash page table are not necessarily updated on the destination. >> Opening a new file descriptor to the HTAB forces the migration >> handler to resend the entire table. >> >> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> >> --- >> hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ >> include/hw/ppc/spapr.h | 2 ++ >> 2 files changed, 49 insertions(+) >> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index 0a2bfe6..1610c28 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) >> /* Kernel handles htab, we don't need to allocate one */ >> spapr->htab_shift = shift; >> kvmppc_kern_htab = true; >> + >> + /* Tell readers to update their file descriptor */ >> + pthread_mutex_lock(&spapr->htab_mutex); > > I don't think you can directly use pthread functions in hw/. These files > could be compiled on Windows which doesn't have pthread. Instead, please > use the QEMU wrappers from util/qemu-thread-posix.c. Thanks for catching this! > > Or maybe try and find out whether you actually do need the lock. Reboots > can only happen when triggered via a HCALL which takes the BQL. I don't > quite know how much the migration code became threaded, but I'd assume > that at least device migration would happen under the BQL or after > stopping the VM, but in a consistent place. > > So as long as we're guaranteed that the htab_fd_stale variable is set at > the final "send all device contents" phase, we should automatically > catch any reset that happened in between - even without a lock, no? Good point, we only really *need* the flag to be set at least before the call to save_live_complete(). I'll double check but yes, that should work. > > > Alex > -- ----------- LTC Ozlabs IBM ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) 2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas 2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf @ 2014-11-13 6:59 ` Alexey Kardashevskiy 1 sibling, 0 replies; 10+ messages in thread From: Alexey Kardashevskiy @ 2014-11-13 6:59 UTC (permalink / raw) To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel On 11/05/2014 05:17 PM, Samuel Mendoza-Jonas wrote: > If a guest reboots during a running migration, changes to the > hash page table are not necessarily updated on the destination. > Opening a new file descriptor to the HTAB forces the migration > handler to resend the entire table. > > Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> > --- > hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ > include/hw/ppc/spapr.h | 2 ++ > 2 files changed, 49 insertions(+) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 0a2bfe6..1610c28 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) > /* Kernel handles htab, we don't need to allocate one */ > spapr->htab_shift = shift; > kvmppc_kern_htab = true; > + > + /* Tell readers to update their file descriptor */ > + pthread_mutex_lock(&spapr->htab_mutex); > + if (spapr->htab_fd > 0) { s/>/>=/ htab_fd == 0 is correct. > + spapr->htab_fd_stale = true; > + } > + pthread_mutex_unlock(&spapr->htab_mutex); > } else { > if (!spapr->htab) { > /* Allocate an htab if we don't yet have one */ > @@ -850,6 +857,31 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) > } > } > > +/* A guest reset will cause spapr->htab_fd to become stale if being used. Multiline comment starts with /* and \n. > + * Reopen the file descriptor to make sure the whole HTAB is properly read. > + */ > +static int spapr_check_htab_fd(sPAPREnvironment *spapr) > +{ > + int rc = 0; > + > + pthread_mutex_lock(&spapr->htab_mutex); > + > + if (spapr->htab_fd_stale) { > + close(spapr->htab_fd); > + spapr->htab_fd = kvmppc_get_htab_fd(false); > + if (spapr->htab_fd < 0) { > + error_report("Unable to open fd for reading hash table from KVM: " > + "%s", strerror(errno)); > + rc = -1; > + } > + spapr->htab_fd_stale = false; > + } > + > + pthread_mutex_unlock(&spapr->htab_mutex); > + return rc; > +} > + > + 2 empty lines, should be one. > static void ppc_spapr_reset(void) > { > PowerPCCPU *first_ppc_cpu; > @@ -984,7 +1016,10 @@ static int htab_save_setup(QEMUFile *f, void *opaque) > } else { > assert(kvm_enabled()); > > + pthread_mutex_lock(&spapr->htab_mutex); > spapr->htab_fd = kvmppc_get_htab_fd(false); > + spapr->htab_fd_stale = false; > + pthread_mutex_unlock(&spapr->htab_mutex); > if (spapr->htab_fd < 0) { > fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n", > strerror(errno)); > @@ -1137,6 +1172,11 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) > if (!spapr->htab) { > assert(kvm_enabled()); > > + rc = spapr_check_htab_fd(spapr); > + if (rc < 0) { > + return rc; > + } > + > rc = kvmppc_save_htab(f, spapr->htab_fd, > MAX_KVM_BUF_SIZE, MAX_ITERATION_NS); > if (rc < 0) { > @@ -1168,6 +1208,11 @@ static int htab_save_complete(QEMUFile *f, void *opaque) > > assert(kvm_enabled()); > > + rc = spapr_check_htab_fd(spapr); > + if (rc < 0) { > + return rc; > + } > + > rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1); > if (rc < 0) { > return rc; > @@ -1355,6 +1400,8 @@ static void ppc_spapr_init(MachineState *machine) > spapr->htab_shift++; > } > > + pthread_mutex_init(&spapr->htab_mutex, NULL); > + > /* Set up Interrupt Controller before we create the VCPUs */ > spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads, > XICS_IRQS); > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 749daf4..5e29bec 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -37,6 +37,8 @@ typedef struct sPAPREnvironment { > int htab_save_index; > bool htab_first_pass; > int htab_fd; > + bool htab_fd_stale; > + pthread_mutex_t htab_mutex; > } sPAPREnvironment; > > #define H_SUCCESS 0 > -- Alexey ^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) 2014-11-05 6:16 [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration Samuel Mendoza-Jonas 2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas @ 2014-11-05 6:17 ` Samuel Mendoza-Jonas 2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf 2014-11-13 7:12 ` [Qemu-devel] " Alexey Kardashevskiy 1 sibling, 2 replies; 10+ messages in thread From: Samuel Mendoza-Jonas @ 2014-11-05 6:17 UTC (permalink / raw) To: qemu-ppc, qemu-devel; +Cc: aik, Samuel Mendoza-Jonas If a TCG guest reboots during a running migration HTAB entries are not marked dirty, and the destination boots with an invalid HTAB. When a reboot occurs reset the state of HTAB migration, and explicitly inform the destination of invalid entries. Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> --- hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++--------------- include/hw/ppc/spapr.h | 1 + 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1610c28..9f419e8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) shift = kvmppc_reset_htab(spapr->htab_shift); + pthread_mutex_lock(&spapr->htab_mutex); if (shift > 0) { /* Kernel handles htab, we don't need to allocate one */ spapr->htab_shift = shift; kvmppc_kern_htab = true; /* Tell readers to update their file descriptor */ - pthread_mutex_lock(&spapr->htab_mutex); if (spapr->htab_fd > 0) { spapr->htab_fd_stale = true; } - pthread_mutex_unlock(&spapr->htab_mutex); } else { if (!spapr->htab) { /* Allocate an htab if we don't yet have one */ spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); + } else { + spapr->htab_mig_full = true; + spapr->htab_first_pass = true; + spapr->htab_save_index = 0; } /* And clear it */ memset(spapr->htab, 0, HTAB_SIZE(spapr)); } + pthread_mutex_unlock(&spapr->htab_mutex); /* Update the RMA size if necessary */ if (spapr->vrma_adjust) { @@ -1019,6 +1023,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) pthread_mutex_lock(&spapr->htab_mutex); spapr->htab_fd = kvmppc_get_htab_fd(false); spapr->htab_fd_stale = false; + spapr->htab_mig_full = false; pthread_mutex_unlock(&spapr->htab_mutex); if (spapr->htab_fd < 0) { fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n", @@ -1034,6 +1039,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, int64_t max_ns) { + bool final = max_ns < 0; int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; int index = spapr->htab_save_index; int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); @@ -1041,33 +1047,40 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, assert(spapr->htab_first_pass); do { - int chunkstart; + int chunkstart, invalidstart; - /* Consume invalid HPTEs */ - while ((index < htabslots) - && !HPTE_VALID(HPTE(spapr->htab, index))) { + chunkstart = index; + /* Consume valid HPTEs */ + while ((index < htabslots && (index - chunkstart < USHRT_MAX)) + && HPTE_VALID(HPTE(spapr->htab, index))) { index++; CLEAN_HPTE(HPTE(spapr->htab, index)); } - /* Consume valid HPTEs */ - chunkstart = index; - while ((index < htabslots) - && HPTE_VALID(HPTE(spapr->htab, index))) { + invalidstart = index; + /* Consume invalid HPTEs */ + while ((index < htabslots && (index - invalidstart < USHRT_MAX)) + && !HPTE_VALID(HPTE(spapr->htab, index))) { index++; CLEAN_HPTE(HPTE(spapr->htab, index)); } - if (index > chunkstart) { - int n_valid = index - chunkstart; + /* Avoid writing an end marker (0,0,0) */ + if (index > chunkstart + && !(chunkstart == invalidstart && !spapr->htab_mig_full)) { + int n_valid = invalidstart - chunkstart; + /* If a reset has occured we must explicitly overwrite the HTAB + * of the destination */ + int n_invalid = spapr->htab_mig_full ? index - invalidstart : 0; qemu_put_be32(f, chunkstart); qemu_put_be16(f, n_valid); - qemu_put_be16(f, 0); + qemu_put_be16(f, n_invalid); qemu_put_buffer(f, HPTE(spapr->htab, chunkstart), HASH_PTE_SIZE_64 * n_valid); - if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { + if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns + && !final) { break; } } @@ -1182,10 +1195,14 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) if (rc < 0) { return rc; } - } else if (spapr->htab_first_pass) { - htab_save_first_pass(f, spapr, MAX_ITERATION_NS); } else { - rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS); + pthread_mutex_lock(&spapr->htab_mutex); + if (spapr->htab_first_pass) { + htab_save_first_pass(f, spapr, MAX_ITERATION_NS); + } else { + rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS); + } + pthread_mutex_unlock(&spapr->htab_mutex); } /* End marker */ @@ -1220,7 +1237,13 @@ static int htab_save_complete(QEMUFile *f, void *opaque) close(spapr->htab_fd); spapr->htab_fd = -1; } else { - htab_save_later_pass(f, spapr, -1); + pthread_mutex_lock(&spapr->htab_mutex); + if (spapr->htab_first_pass) { + htab_save_first_pass(f, spapr, -1); + } else { + htab_save_later_pass(f, spapr, -1); + } + pthread_mutex_unlock(&spapr->htab_mutex); } /* End marker */ diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 5e29bec..ee95459 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -38,6 +38,7 @@ typedef struct sPAPREnvironment { bool htab_first_pass; int htab_fd; bool htab_fd_stale; + bool htab_mig_full; pthread_mutex_t htab_mutex; } sPAPREnvironment; -- 1.9.3 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) 2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas @ 2014-11-05 8:05 ` Alexander Graf 2014-11-05 22:00 ` Samuel Mendoza-Jonas 2014-11-13 7:12 ` [Qemu-devel] " Alexey Kardashevskiy 1 sibling, 1 reply; 10+ messages in thread From: Alexander Graf @ 2014-11-05 8:05 UTC (permalink / raw) To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel On 05.11.14 07:17, Samuel Mendoza-Jonas wrote: > If a TCG guest reboots during a running migration HTAB entries are not > marked dirty, and the destination boots with an invalid HTAB. > > When a reboot occurs reset the state of HTAB migration, and explicitly > inform the destination of invalid entries. > > Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> > --- > hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++--------------- > include/hw/ppc/spapr.h | 1 + > 2 files changed, 42 insertions(+), 18 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 1610c28..9f419e8 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) > > shift = kvmppc_reset_htab(spapr->htab_shift); > > + pthread_mutex_lock(&spapr->htab_mutex); > if (shift > 0) { > /* Kernel handles htab, we don't need to allocate one */ > spapr->htab_shift = shift; > kvmppc_kern_htab = true; > > /* Tell readers to update their file descriptor */ > - pthread_mutex_lock(&spapr->htab_mutex); > if (spapr->htab_fd > 0) { > spapr->htab_fd_stale = true; > } > - pthread_mutex_unlock(&spapr->htab_mutex); > } else { > if (!spapr->htab) { > /* Allocate an htab if we don't yet have one */ > spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); > + } else { > + spapr->htab_mig_full = true; > + spapr->htab_first_pass = true; > + spapr->htab_save_index = 0; You could just set the dirty bitmap to "all dirty" here, no? Then you don't need all the changes belong I presume? > } > > /* And clear it */ > memset(spapr->htab, 0, HTAB_SIZE(spapr)); ... so instead of memset(0)ing it, you could just ppc_hash64_store_hpte(env, i, HPTE64_V_HPTE_DIRTY, 0); the HTAB in a loop. Alex ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) 2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf @ 2014-11-05 22:00 ` Samuel Mendoza-Jonas 2014-11-05 22:04 ` Alexander Graf 0 siblings, 1 reply; 10+ messages in thread From: Samuel Mendoza-Jonas @ 2014-11-05 22:00 UTC (permalink / raw) To: Alexander Graf, qemu-ppc, qemu-devel On 05/11/14 19:05, Alexander Graf wrote: > > > On 05.11.14 07:17, Samuel Mendoza-Jonas wrote: >> If a TCG guest reboots during a running migration HTAB entries are not >> marked dirty, and the destination boots with an invalid HTAB. >> >> When a reboot occurs reset the state of HTAB migration, and explicitly >> inform the destination of invalid entries. >> >> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> >> --- >> hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++--------------- >> include/hw/ppc/spapr.h | 1 + >> 2 files changed, 42 insertions(+), 18 deletions(-) >> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index 1610c28..9f419e8 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) >> >> shift = kvmppc_reset_htab(spapr->htab_shift); >> >> + pthread_mutex_lock(&spapr->htab_mutex); >> if (shift > 0) { >> /* Kernel handles htab, we don't need to allocate one */ >> spapr->htab_shift = shift; >> kvmppc_kern_htab = true; >> >> /* Tell readers to update their file descriptor */ >> - pthread_mutex_lock(&spapr->htab_mutex); >> if (spapr->htab_fd > 0) { >> spapr->htab_fd_stale = true; >> } >> - pthread_mutex_unlock(&spapr->htab_mutex); >> } else { >> if (!spapr->htab) { >> /* Allocate an htab if we don't yet have one */ >> spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); >> + } else { >> + spapr->htab_mig_full = true; >> + spapr->htab_first_pass = true; >> + spapr->htab_save_index = 0; > > You could just set the dirty bitmap to "all dirty" here, no? Then you > don't need all the changes belong I presume? Yes, then you just need to reset htab_save_index to zero. The idea of this approach was to avoid walking the HTAB twice (once to dirty and once to read it). But it is a lot of changes for a fairly small benefit. If setting it dirty is preferred I'll test and send that version. Thanks! > >> } >> >> /* And clear it */ >> memset(spapr->htab, 0, HTAB_SIZE(spapr)); > > ... so instead of memset(0)ing it, you could just > > ppc_hash64_store_hpte(env, i, HPTE64_V_HPTE_DIRTY, 0); > > the HTAB in a loop. > > > Alex > -- ----------- LTC Ozlabs IBM ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) 2014-11-05 22:00 ` Samuel Mendoza-Jonas @ 2014-11-05 22:04 ` Alexander Graf 0 siblings, 0 replies; 10+ messages in thread From: Alexander Graf @ 2014-11-05 22:04 UTC (permalink / raw) To: sam.mj, qemu-ppc, qemu-devel On 05.11.14 23:00, Samuel Mendoza-Jonas wrote: > On 05/11/14 19:05, Alexander Graf wrote: >> >> >> On 05.11.14 07:17, Samuel Mendoza-Jonas wrote: >>> If a TCG guest reboots during a running migration HTAB entries are not >>> marked dirty, and the destination boots with an invalid HTAB. >>> >>> When a reboot occurs reset the state of HTAB migration, and explicitly >>> inform the destination of invalid entries. >>> >>> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> >>> --- >>> hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++--------------- >>> include/hw/ppc/spapr.h | 1 + >>> 2 files changed, 42 insertions(+), 18 deletions(-) >>> >>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >>> index 1610c28..9f419e8 100644 >>> --- a/hw/ppc/spapr.c >>> +++ b/hw/ppc/spapr.c >>> @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) >>> >>> shift = kvmppc_reset_htab(spapr->htab_shift); >>> >>> + pthread_mutex_lock(&spapr->htab_mutex); >>> if (shift > 0) { >>> /* Kernel handles htab, we don't need to allocate one */ >>> spapr->htab_shift = shift; >>> kvmppc_kern_htab = true; >>> >>> /* Tell readers to update their file descriptor */ >>> - pthread_mutex_lock(&spapr->htab_mutex); >>> if (spapr->htab_fd > 0) { >>> spapr->htab_fd_stale = true; >>> } >>> - pthread_mutex_unlock(&spapr->htab_mutex); >>> } else { >>> if (!spapr->htab) { >>> /* Allocate an htab if we don't yet have one */ >>> spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); >>> + } else { >>> + spapr->htab_mig_full = true; >>> + spapr->htab_first_pass = true; >>> + spapr->htab_save_index = 0; >> >> You could just set the dirty bitmap to "all dirty" here, no? Then you >> don't need all the changes belong I presume? > > Yes, then you just need to reset htab_save_index to zero. The idea of this approach > was to avoid walking the HTAB twice (once to dirty and once to read it). But it is > a lot of changes for a fairly small benefit. If setting it dirty is preferred I'll > test and send that version. Thanks! Yes, please. I would prefer to keep this code as simple as I can ;). And the less corner cases we need to watch out for, the easier it becomes. Alex ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) 2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas 2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf @ 2014-11-13 7:12 ` Alexey Kardashevskiy 1 sibling, 0 replies; 10+ messages in thread From: Alexey Kardashevskiy @ 2014-11-13 7:12 UTC (permalink / raw) To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel On 11/05/2014 05:17 PM, Samuel Mendoza-Jonas wrote: > If a TCG guest reboots during a running migration HTAB entries are not > marked dirty, and the destination boots with an invalid HTAB. > > When a reboot occurs reset the state of HTAB migration, and explicitly > inform the destination of invalid entries. > > Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> > --- > hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++--------------- > include/hw/ppc/spapr.h | 1 + > 2 files changed, 42 insertions(+), 18 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 1610c28..9f419e8 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) > > shift = kvmppc_reset_htab(spapr->htab_shift); > > + pthread_mutex_lock(&spapr->htab_mutex); > if (shift > 0) { > /* Kernel handles htab, we don't need to allocate one */ > spapr->htab_shift = shift; > kvmppc_kern_htab = true; > > /* Tell readers to update their file descriptor */ > - pthread_mutex_lock(&spapr->htab_mutex); > if (spapr->htab_fd > 0) { > spapr->htab_fd_stale = true; > } > - pthread_mutex_unlock(&spapr->htab_mutex); > } else { > if (!spapr->htab) { > /* Allocate an htab if we don't yet have one */ > spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); > + } else { > + spapr->htab_mig_full = true; > + spapr->htab_first_pass = true; > + spapr->htab_save_index = 0; > } > > /* And clear it */ > memset(spapr->htab, 0, HTAB_SIZE(spapr)); > } > + pthread_mutex_unlock(&spapr->htab_mutex); The pthread_mutex_(un)lock things from the chunk above should go to the previous patch. > > /* Update the RMA size if necessary */ > if (spapr->vrma_adjust) { > @@ -1019,6 +1023,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) > pthread_mutex_lock(&spapr->htab_mutex); > spapr->htab_fd = kvmppc_get_htab_fd(false); > spapr->htab_fd_stale = false; > + spapr->htab_mig_full = false; > pthread_mutex_unlock(&spapr->htab_mutex); > if (spapr->htab_fd < 0) { > fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n", > @@ -1034,6 +1039,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) > static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, > int64_t max_ns) > { > + bool final = max_ns < 0; > int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; > int index = spapr->htab_save_index; > int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > @@ -1041,33 +1047,40 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, > assert(spapr->htab_first_pass); > > do { > - int chunkstart; > + int chunkstart, invalidstart; > > - /* Consume invalid HPTEs */ > - while ((index < htabslots) > - && !HPTE_VALID(HPTE(spapr->htab, index))) { Nit: in most places in qemu "&&" would be in the end of the previos line :) > + chunkstart = index; > + /* Consume valid HPTEs */ > + while ((index < htabslots && (index - chunkstart < USHRT_MAX)) > + && HPTE_VALID(HPTE(spapr->htab, index))) { > index++; > CLEAN_HPTE(HPTE(spapr->htab, index)); > } > > - /* Consume valid HPTEs */ > - chunkstart = index; > - while ((index < htabslots) > - && HPTE_VALID(HPTE(spapr->htab, index))) { > + invalidstart = index; > + /* Consume invalid HPTEs */ > + while ((index < htabslots && (index - invalidstart < USHRT_MAX)) > + && !HPTE_VALID(HPTE(spapr->htab, index))) { > index++; > CLEAN_HPTE(HPTE(spapr->htab, index)); > } > > - if (index > chunkstart) { > - int n_valid = index - chunkstart; > + /* Avoid writing an end marker (0,0,0) */ > + if (index > chunkstart > + && !(chunkstart == invalidstart && !spapr->htab_mig_full)) { > + int n_valid = invalidstart - chunkstart; > + /* If a reset has occured we must explicitly overwrite the HTAB > + * of the destination */ > + int n_invalid = spapr->htab_mig_full ? index - invalidstart : 0; > > qemu_put_be32(f, chunkstart); > qemu_put_be16(f, n_valid); > - qemu_put_be16(f, 0); > + qemu_put_be16(f, n_invalid); > qemu_put_buffer(f, HPTE(spapr->htab, chunkstart), > HASH_PTE_SIZE_64 * n_valid); > > - if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { > + if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns > + && !final) { > break; > } > } > @@ -1182,10 +1195,14 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) > if (rc < 0) { > return rc; > } > - } else if (spapr->htab_first_pass) { > - htab_save_first_pass(f, spapr, MAX_ITERATION_NS); > } else { > - rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS); > + pthread_mutex_lock(&spapr->htab_mutex); > + if (spapr->htab_first_pass) { > + htab_save_first_pass(f, spapr, MAX_ITERATION_NS); > + } else { > + rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS); > + } > + pthread_mutex_unlock(&spapr->htab_mutex); > } > > /* End marker */ > @@ -1220,7 +1237,13 @@ static int htab_save_complete(QEMUFile *f, void *opaque) > close(spapr->htab_fd); > spapr->htab_fd = -1; > } else { > - htab_save_later_pass(f, spapr, -1); > + pthread_mutex_lock(&spapr->htab_mutex); > + if (spapr->htab_first_pass) { > + htab_save_first_pass(f, spapr, -1); > + } else { > + htab_save_later_pass(f, spapr, -1); > + } > + pthread_mutex_unlock(&spapr->htab_mutex); > } > > /* End marker */ > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 5e29bec..ee95459 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -38,6 +38,7 @@ typedef struct sPAPREnvironment { > bool htab_first_pass; > int htab_fd; > bool htab_fd_stale; > + bool htab_mig_full; Not sure what @htab_mig_full stands for exactly. Please explain it in the commit log. > pthread_mutex_t htab_mutex; > } sPAPREnvironment; > > -- Alexey ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-11-13 7:12 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2014-11-05 6:16 [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration Samuel Mendoza-Jonas 2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas 2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf 2014-11-05 21:56 ` Samuel Mendoza-Jonas 2014-11-13 6:59 ` [Qemu-devel] " Alexey Kardashevskiy 2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas 2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf 2014-11-05 22:00 ` Samuel Mendoza-Jonas 2014-11-05 22:04 ` Alexander Graf 2014-11-13 7:12 ` [Qemu-devel] " Alexey Kardashevskiy
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).