* [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration
@ 2014-11-05 6:16 Samuel Mendoza-Jonas
2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas
2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas
0 siblings, 2 replies; 10+ messages in thread
From: Samuel Mendoza-Jonas @ 2014-11-05 6:16 UTC (permalink / raw)
To: qemu-ppc, qemu-devel; +Cc: aik, Samuel Mendoza-Jonas
If a spapr guest reboots during a live migration, the guest HTAB on the
destination is not updated properly, usually resulting in a kernel panic.
This is a (delayed!) follow up to my previous patch including a fix
for TCG guests as well as KVM.
Samuel Mendoza-Jonas (2):
spapr: Fix stale HTAB during live migration (KVM)
spapr: Fix stale HTAB during live migration (TCG)
hw/ppc/spapr.c | 102 +++++++++++++++++++++++++++++++++++++++++--------
include/hw/ppc/spapr.h | 3 ++
2 files changed, 89 insertions(+), 16 deletions(-)
--
1.9.3
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM)
2014-11-05 6:16 [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration Samuel Mendoza-Jonas
@ 2014-11-05 6:17 ` Samuel Mendoza-Jonas
2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-11-13 6:59 ` [Qemu-devel] " Alexey Kardashevskiy
2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas
1 sibling, 2 replies; 10+ messages in thread
From: Samuel Mendoza-Jonas @ 2014-11-05 6:17 UTC (permalink / raw)
To: qemu-ppc, qemu-devel; +Cc: aik, Samuel Mendoza-Jonas
If a guest reboots during a running migration, changes to the
hash page table are not necessarily updated on the destination.
Opening a new file descriptor to the HTAB forces the migration
handler to resend the entire table.
Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
---
hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
include/hw/ppc/spapr.h | 2 ++
2 files changed, 49 insertions(+)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0a2bfe6..1610c28 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
/* Kernel handles htab, we don't need to allocate one */
spapr->htab_shift = shift;
kvmppc_kern_htab = true;
+
+ /* Tell readers to update their file descriptor */
+ pthread_mutex_lock(&spapr->htab_mutex);
+ if (spapr->htab_fd > 0) {
+ spapr->htab_fd_stale = true;
+ }
+ pthread_mutex_unlock(&spapr->htab_mutex);
} else {
if (!spapr->htab) {
/* Allocate an htab if we don't yet have one */
@@ -850,6 +857,31 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
}
}
+/* A guest reset will cause spapr->htab_fd to become stale if being used.
+ * Reopen the file descriptor to make sure the whole HTAB is properly read.
+ */
+static int spapr_check_htab_fd(sPAPREnvironment *spapr)
+{
+ int rc = 0;
+
+ pthread_mutex_lock(&spapr->htab_mutex);
+
+ if (spapr->htab_fd_stale) {
+ close(spapr->htab_fd);
+ spapr->htab_fd = kvmppc_get_htab_fd(false);
+ if (spapr->htab_fd < 0) {
+ error_report("Unable to open fd for reading hash table from KVM: "
+ "%s", strerror(errno));
+ rc = -1;
+ }
+ spapr->htab_fd_stale = false;
+ }
+
+ pthread_mutex_unlock(&spapr->htab_mutex);
+ return rc;
+}
+
+
static void ppc_spapr_reset(void)
{
PowerPCCPU *first_ppc_cpu;
@@ -984,7 +1016,10 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
} else {
assert(kvm_enabled());
+ pthread_mutex_lock(&spapr->htab_mutex);
spapr->htab_fd = kvmppc_get_htab_fd(false);
+ spapr->htab_fd_stale = false;
+ pthread_mutex_unlock(&spapr->htab_mutex);
if (spapr->htab_fd < 0) {
fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
strerror(errno));
@@ -1137,6 +1172,11 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
if (!spapr->htab) {
assert(kvm_enabled());
+ rc = spapr_check_htab_fd(spapr);
+ if (rc < 0) {
+ return rc;
+ }
+
rc = kvmppc_save_htab(f, spapr->htab_fd,
MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
if (rc < 0) {
@@ -1168,6 +1208,11 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
assert(kvm_enabled());
+ rc = spapr_check_htab_fd(spapr);
+ if (rc < 0) {
+ return rc;
+ }
+
rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
if (rc < 0) {
return rc;
@@ -1355,6 +1400,8 @@ static void ppc_spapr_init(MachineState *machine)
spapr->htab_shift++;
}
+ pthread_mutex_init(&spapr->htab_mutex, NULL);
+
/* Set up Interrupt Controller before we create the VCPUs */
spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
XICS_IRQS);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 749daf4..5e29bec 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -37,6 +37,8 @@ typedef struct sPAPREnvironment {
int htab_save_index;
bool htab_first_pass;
int htab_fd;
+ bool htab_fd_stale;
+ pthread_mutex_t htab_mutex;
} sPAPREnvironment;
#define H_SUCCESS 0
--
1.9.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG)
2014-11-05 6:16 [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration Samuel Mendoza-Jonas
2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas
@ 2014-11-05 6:17 ` Samuel Mendoza-Jonas
2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-11-13 7:12 ` [Qemu-devel] " Alexey Kardashevskiy
1 sibling, 2 replies; 10+ messages in thread
From: Samuel Mendoza-Jonas @ 2014-11-05 6:17 UTC (permalink / raw)
To: qemu-ppc, qemu-devel; +Cc: aik, Samuel Mendoza-Jonas
If a TCG guest reboots during a running migration HTAB entries are not
marked dirty, and the destination boots with an invalid HTAB.
When a reboot occurs reset the state of HTAB migration, and explicitly
inform the destination of invalid entries.
Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
---
hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++---------------
include/hw/ppc/spapr.h | 1 +
2 files changed, 42 insertions(+), 18 deletions(-)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1610c28..9f419e8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
shift = kvmppc_reset_htab(spapr->htab_shift);
+ pthread_mutex_lock(&spapr->htab_mutex);
if (shift > 0) {
/* Kernel handles htab, we don't need to allocate one */
spapr->htab_shift = shift;
kvmppc_kern_htab = true;
/* Tell readers to update their file descriptor */
- pthread_mutex_lock(&spapr->htab_mutex);
if (spapr->htab_fd > 0) {
spapr->htab_fd_stale = true;
}
- pthread_mutex_unlock(&spapr->htab_mutex);
} else {
if (!spapr->htab) {
/* Allocate an htab if we don't yet have one */
spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
+ } else {
+ spapr->htab_mig_full = true;
+ spapr->htab_first_pass = true;
+ spapr->htab_save_index = 0;
}
/* And clear it */
memset(spapr->htab, 0, HTAB_SIZE(spapr));
}
+ pthread_mutex_unlock(&spapr->htab_mutex);
/* Update the RMA size if necessary */
if (spapr->vrma_adjust) {
@@ -1019,6 +1023,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
pthread_mutex_lock(&spapr->htab_mutex);
spapr->htab_fd = kvmppc_get_htab_fd(false);
spapr->htab_fd_stale = false;
+ spapr->htab_mig_full = false;
pthread_mutex_unlock(&spapr->htab_mutex);
if (spapr->htab_fd < 0) {
fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
@@ -1034,6 +1039,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
int64_t max_ns)
{
+ bool final = max_ns < 0;
int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
int index = spapr->htab_save_index;
int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -1041,33 +1047,40 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
assert(spapr->htab_first_pass);
do {
- int chunkstart;
+ int chunkstart, invalidstart;
- /* Consume invalid HPTEs */
- while ((index < htabslots)
- && !HPTE_VALID(HPTE(spapr->htab, index))) {
+ chunkstart = index;
+ /* Consume valid HPTEs */
+ while ((index < htabslots && (index - chunkstart < USHRT_MAX))
+ && HPTE_VALID(HPTE(spapr->htab, index))) {
index++;
CLEAN_HPTE(HPTE(spapr->htab, index));
}
- /* Consume valid HPTEs */
- chunkstart = index;
- while ((index < htabslots)
- && HPTE_VALID(HPTE(spapr->htab, index))) {
+ invalidstart = index;
+ /* Consume invalid HPTEs */
+ while ((index < htabslots && (index - invalidstart < USHRT_MAX))
+ && !HPTE_VALID(HPTE(spapr->htab, index))) {
index++;
CLEAN_HPTE(HPTE(spapr->htab, index));
}
- if (index > chunkstart) {
- int n_valid = index - chunkstart;
+ /* Avoid writing an end marker (0,0,0) */
+ if (index > chunkstart
+ && !(chunkstart == invalidstart && !spapr->htab_mig_full)) {
+ int n_valid = invalidstart - chunkstart;
+ /* If a reset has occured we must explicitly overwrite the HTAB
+ * of the destination */
+ int n_invalid = spapr->htab_mig_full ? index - invalidstart : 0;
qemu_put_be32(f, chunkstart);
qemu_put_be16(f, n_valid);
- qemu_put_be16(f, 0);
+ qemu_put_be16(f, n_invalid);
qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
HASH_PTE_SIZE_64 * n_valid);
- if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
+ if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns
+ && !final) {
break;
}
}
@@ -1182,10 +1195,14 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
if (rc < 0) {
return rc;
}
- } else if (spapr->htab_first_pass) {
- htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
} else {
- rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
+ pthread_mutex_lock(&spapr->htab_mutex);
+ if (spapr->htab_first_pass) {
+ htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
+ } else {
+ rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
+ }
+ pthread_mutex_unlock(&spapr->htab_mutex);
}
/* End marker */
@@ -1220,7 +1237,13 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
close(spapr->htab_fd);
spapr->htab_fd = -1;
} else {
- htab_save_later_pass(f, spapr, -1);
+ pthread_mutex_lock(&spapr->htab_mutex);
+ if (spapr->htab_first_pass) {
+ htab_save_first_pass(f, spapr, -1);
+ } else {
+ htab_save_later_pass(f, spapr, -1);
+ }
+ pthread_mutex_unlock(&spapr->htab_mutex);
}
/* End marker */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5e29bec..ee95459 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -38,6 +38,7 @@ typedef struct sPAPREnvironment {
bool htab_first_pass;
int htab_fd;
bool htab_fd_stale;
+ bool htab_mig_full;
pthread_mutex_t htab_mutex;
} sPAPREnvironment;
--
1.9.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM)
2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas
@ 2014-11-05 7:57 ` Alexander Graf
2014-11-05 21:56 ` Samuel Mendoza-Jonas
2014-11-13 6:59 ` [Qemu-devel] " Alexey Kardashevskiy
1 sibling, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2014-11-05 7:57 UTC (permalink / raw)
To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel
On 05.11.14 07:17, Samuel Mendoza-Jonas wrote:
> If a guest reboots during a running migration, changes to the
> hash page table are not necessarily updated on the destination.
> Opening a new file descriptor to the HTAB forces the migration
> handler to resend the entire table.
>
> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
> ---
> hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
> include/hw/ppc/spapr.h | 2 ++
> 2 files changed, 49 insertions(+)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 0a2bfe6..1610c28 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
> /* Kernel handles htab, we don't need to allocate one */
> spapr->htab_shift = shift;
> kvmppc_kern_htab = true;
> +
> + /* Tell readers to update their file descriptor */
> + pthread_mutex_lock(&spapr->htab_mutex);
I don't think you can directly use pthread functions in hw/. These files
could be compiled on Windows which doesn't have pthread. Instead, please
use the QEMU wrappers from util/qemu-thread-posix.c.
Or maybe try and find out whether you actually do need the lock. Reboots
can only happen when triggered via a HCALL which takes the BQL. I don't
quite know how much the migration code became threaded, but I'd assume
that at least device migration would happen under the BQL or after
stopping the VM, but in a consistent place.
So as long as we're guaranteed that the htab_fd_stale variable is set at
the final "send all device contents" phase, we should automatically
catch any reset that happened in between - even without a lock, no?
Alex
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG)
2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas
@ 2014-11-05 8:05 ` Alexander Graf
2014-11-05 22:00 ` Samuel Mendoza-Jonas
2014-11-13 7:12 ` [Qemu-devel] " Alexey Kardashevskiy
1 sibling, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2014-11-05 8:05 UTC (permalink / raw)
To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel
On 05.11.14 07:17, Samuel Mendoza-Jonas wrote:
> If a TCG guest reboots during a running migration HTAB entries are not
> marked dirty, and the destination boots with an invalid HTAB.
>
> When a reboot occurs reset the state of HTAB migration, and explicitly
> inform the destination of invalid entries.
>
> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
> ---
> hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++---------------
> include/hw/ppc/spapr.h | 1 +
> 2 files changed, 42 insertions(+), 18 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 1610c28..9f419e8 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>
> shift = kvmppc_reset_htab(spapr->htab_shift);
>
> + pthread_mutex_lock(&spapr->htab_mutex);
> if (shift > 0) {
> /* Kernel handles htab, we don't need to allocate one */
> spapr->htab_shift = shift;
> kvmppc_kern_htab = true;
>
> /* Tell readers to update their file descriptor */
> - pthread_mutex_lock(&spapr->htab_mutex);
> if (spapr->htab_fd > 0) {
> spapr->htab_fd_stale = true;
> }
> - pthread_mutex_unlock(&spapr->htab_mutex);
> } else {
> if (!spapr->htab) {
> /* Allocate an htab if we don't yet have one */
> spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
> + } else {
> + spapr->htab_mig_full = true;
> + spapr->htab_first_pass = true;
> + spapr->htab_save_index = 0;
You could just set the dirty bitmap to "all dirty" here, no? Then you
don't need all the changes belong I presume?
> }
>
> /* And clear it */
> memset(spapr->htab, 0, HTAB_SIZE(spapr));
... so instead of memset(0)ing it, you could just
ppc_hash64_store_hpte(env, i, HPTE64_V_HPTE_DIRTY, 0);
the HTAB in a loop.
Alex
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM)
2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
@ 2014-11-05 21:56 ` Samuel Mendoza-Jonas
0 siblings, 0 replies; 10+ messages in thread
From: Samuel Mendoza-Jonas @ 2014-11-05 21:56 UTC (permalink / raw)
To: Alexander Graf, qemu-ppc, qemu-devel
On 05/11/14 18:57, Alexander Graf wrote:
>
>
> On 05.11.14 07:17, Samuel Mendoza-Jonas wrote:
>> If a guest reboots during a running migration, changes to the
>> hash page table are not necessarily updated on the destination.
>> Opening a new file descriptor to the HTAB forces the migration
>> handler to resend the entire table.
>>
>> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
>> ---
>> hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
>> include/hw/ppc/spapr.h | 2 ++
>> 2 files changed, 49 insertions(+)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 0a2bfe6..1610c28 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>> /* Kernel handles htab, we don't need to allocate one */
>> spapr->htab_shift = shift;
>> kvmppc_kern_htab = true;
>> +
>> + /* Tell readers to update their file descriptor */
>> + pthread_mutex_lock(&spapr->htab_mutex);
>
> I don't think you can directly use pthread functions in hw/. These files
> could be compiled on Windows which doesn't have pthread. Instead, please
> use the QEMU wrappers from util/qemu-thread-posix.c.
Thanks for catching this!
>
> Or maybe try and find out whether you actually do need the lock. Reboots
> can only happen when triggered via a HCALL which takes the BQL. I don't
> quite know how much the migration code became threaded, but I'd assume
> that at least device migration would happen under the BQL or after
> stopping the VM, but in a consistent place.
>
> So as long as we're guaranteed that the htab_fd_stale variable is set at
> the final "send all device contents" phase, we should automatically
> catch any reset that happened in between - even without a lock, no?
Good point, we only really *need* the flag to be set at least before the
call to save_live_complete(). I'll double check but yes, that should work.
>
>
> Alex
>
--
-----------
LTC Ozlabs
IBM
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG)
2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
@ 2014-11-05 22:00 ` Samuel Mendoza-Jonas
2014-11-05 22:04 ` Alexander Graf
0 siblings, 1 reply; 10+ messages in thread
From: Samuel Mendoza-Jonas @ 2014-11-05 22:00 UTC (permalink / raw)
To: Alexander Graf, qemu-ppc, qemu-devel
On 05/11/14 19:05, Alexander Graf wrote:
>
>
> On 05.11.14 07:17, Samuel Mendoza-Jonas wrote:
>> If a TCG guest reboots during a running migration HTAB entries are not
>> marked dirty, and the destination boots with an invalid HTAB.
>>
>> When a reboot occurs reset the state of HTAB migration, and explicitly
>> inform the destination of invalid entries.
>>
>> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
>> ---
>> hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++---------------
>> include/hw/ppc/spapr.h | 1 +
>> 2 files changed, 42 insertions(+), 18 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 1610c28..9f419e8 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>>
>> shift = kvmppc_reset_htab(spapr->htab_shift);
>>
>> + pthread_mutex_lock(&spapr->htab_mutex);
>> if (shift > 0) {
>> /* Kernel handles htab, we don't need to allocate one */
>> spapr->htab_shift = shift;
>> kvmppc_kern_htab = true;
>>
>> /* Tell readers to update their file descriptor */
>> - pthread_mutex_lock(&spapr->htab_mutex);
>> if (spapr->htab_fd > 0) {
>> spapr->htab_fd_stale = true;
>> }
>> - pthread_mutex_unlock(&spapr->htab_mutex);
>> } else {
>> if (!spapr->htab) {
>> /* Allocate an htab if we don't yet have one */
>> spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
>> + } else {
>> + spapr->htab_mig_full = true;
>> + spapr->htab_first_pass = true;
>> + spapr->htab_save_index = 0;
>
> You could just set the dirty bitmap to "all dirty" here, no? Then you
> don't need all the changes belong I presume?
Yes, then you just need to reset htab_save_index to zero. The idea of this approach
was to avoid walking the HTAB twice (once to dirty and once to read it). But it is
a lot of changes for a fairly small benefit. If setting it dirty is preferred I'll
test and send that version. Thanks!
>
>> }
>>
>> /* And clear it */
>> memset(spapr->htab, 0, HTAB_SIZE(spapr));
>
> ... so instead of memset(0)ing it, you could just
>
> ppc_hash64_store_hpte(env, i, HPTE64_V_HPTE_DIRTY, 0);
>
> the HTAB in a loop.
>
>
> Alex
>
--
-----------
LTC Ozlabs
IBM
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG)
2014-11-05 22:00 ` Samuel Mendoza-Jonas
@ 2014-11-05 22:04 ` Alexander Graf
0 siblings, 0 replies; 10+ messages in thread
From: Alexander Graf @ 2014-11-05 22:04 UTC (permalink / raw)
To: sam.mj, qemu-ppc, qemu-devel
On 05.11.14 23:00, Samuel Mendoza-Jonas wrote:
> On 05/11/14 19:05, Alexander Graf wrote:
>>
>>
>> On 05.11.14 07:17, Samuel Mendoza-Jonas wrote:
>>> If a TCG guest reboots during a running migration HTAB entries are not
>>> marked dirty, and the destination boots with an invalid HTAB.
>>>
>>> When a reboot occurs reset the state of HTAB migration, and explicitly
>>> inform the destination of invalid entries.
>>>
>>> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
>>> ---
>>> hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++---------------
>>> include/hw/ppc/spapr.h | 1 +
>>> 2 files changed, 42 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 1610c28..9f419e8 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>>>
>>> shift = kvmppc_reset_htab(spapr->htab_shift);
>>>
>>> + pthread_mutex_lock(&spapr->htab_mutex);
>>> if (shift > 0) {
>>> /* Kernel handles htab, we don't need to allocate one */
>>> spapr->htab_shift = shift;
>>> kvmppc_kern_htab = true;
>>>
>>> /* Tell readers to update their file descriptor */
>>> - pthread_mutex_lock(&spapr->htab_mutex);
>>> if (spapr->htab_fd > 0) {
>>> spapr->htab_fd_stale = true;
>>> }
>>> - pthread_mutex_unlock(&spapr->htab_mutex);
>>> } else {
>>> if (!spapr->htab) {
>>> /* Allocate an htab if we don't yet have one */
>>> spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
>>> + } else {
>>> + spapr->htab_mig_full = true;
>>> + spapr->htab_first_pass = true;
>>> + spapr->htab_save_index = 0;
>>
>> You could just set the dirty bitmap to "all dirty" here, no? Then you
>> don't need all the changes belong I presume?
>
> Yes, then you just need to reset htab_save_index to zero. The idea of this approach
> was to avoid walking the HTAB twice (once to dirty and once to read it). But it is
> a lot of changes for a fairly small benefit. If setting it dirty is preferred I'll
> test and send that version. Thanks!
Yes, please. I would prefer to keep this code as simple as I can ;). And
the less corner cases we need to watch out for, the easier it becomes.
Alex
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM)
2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas
2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
@ 2014-11-13 6:59 ` Alexey Kardashevskiy
1 sibling, 0 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2014-11-13 6:59 UTC (permalink / raw)
To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel
On 11/05/2014 05:17 PM, Samuel Mendoza-Jonas wrote:
> If a guest reboots during a running migration, changes to the
> hash page table are not necessarily updated on the destination.
> Opening a new file descriptor to the HTAB forces the migration
> handler to resend the entire table.
>
> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
> ---
> hw/ppc/spapr.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
> include/hw/ppc/spapr.h | 2 ++
> 2 files changed, 49 insertions(+)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 0a2bfe6..1610c28 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -833,6 +833,13 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
> /* Kernel handles htab, we don't need to allocate one */
> spapr->htab_shift = shift;
> kvmppc_kern_htab = true;
> +
> + /* Tell readers to update their file descriptor */
> + pthread_mutex_lock(&spapr->htab_mutex);
> + if (spapr->htab_fd > 0) {
s/>/>=/
htab_fd == 0 is correct.
> + spapr->htab_fd_stale = true;
> + }
> + pthread_mutex_unlock(&spapr->htab_mutex);
> } else {
> if (!spapr->htab) {
> /* Allocate an htab if we don't yet have one */
> @@ -850,6 +857,31 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
> }
> }
>
> +/* A guest reset will cause spapr->htab_fd to become stale if being used.
Multiline comment starts with /* and \n.
> + * Reopen the file descriptor to make sure the whole HTAB is properly read.
> + */
> +static int spapr_check_htab_fd(sPAPREnvironment *spapr)
> +{
> + int rc = 0;
> +
> + pthread_mutex_lock(&spapr->htab_mutex);
> +
> + if (spapr->htab_fd_stale) {
> + close(spapr->htab_fd);
> + spapr->htab_fd = kvmppc_get_htab_fd(false);
> + if (spapr->htab_fd < 0) {
> + error_report("Unable to open fd for reading hash table from KVM: "
> + "%s", strerror(errno));
> + rc = -1;
> + }
> + spapr->htab_fd_stale = false;
> + }
> +
> + pthread_mutex_unlock(&spapr->htab_mutex);
> + return rc;
> +}
> +
> +
2 empty lines, should be one.
> static void ppc_spapr_reset(void)
> {
> PowerPCCPU *first_ppc_cpu;
> @@ -984,7 +1016,10 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
> } else {
> assert(kvm_enabled());
>
> + pthread_mutex_lock(&spapr->htab_mutex);
> spapr->htab_fd = kvmppc_get_htab_fd(false);
> + spapr->htab_fd_stale = false;
> + pthread_mutex_unlock(&spapr->htab_mutex);
> if (spapr->htab_fd < 0) {
> fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
> strerror(errno));
> @@ -1137,6 +1172,11 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
> if (!spapr->htab) {
> assert(kvm_enabled());
>
> + rc = spapr_check_htab_fd(spapr);
> + if (rc < 0) {
> + return rc;
> + }
> +
> rc = kvmppc_save_htab(f, spapr->htab_fd,
> MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
> if (rc < 0) {
> @@ -1168,6 +1208,11 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
>
> assert(kvm_enabled());
>
> + rc = spapr_check_htab_fd(spapr);
> + if (rc < 0) {
> + return rc;
> + }
> +
> rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
> if (rc < 0) {
> return rc;
> @@ -1355,6 +1400,8 @@ static void ppc_spapr_init(MachineState *machine)
> spapr->htab_shift++;
> }
>
> + pthread_mutex_init(&spapr->htab_mutex, NULL);
> +
> /* Set up Interrupt Controller before we create the VCPUs */
> spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
> XICS_IRQS);
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 749daf4..5e29bec 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -37,6 +37,8 @@ typedef struct sPAPREnvironment {
> int htab_save_index;
> bool htab_first_pass;
> int htab_fd;
> + bool htab_fd_stale;
> + pthread_mutex_t htab_mutex;
> } sPAPREnvironment;
>
> #define H_SUCCESS 0
>
--
Alexey
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG)
2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas
2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
@ 2014-11-13 7:12 ` Alexey Kardashevskiy
1 sibling, 0 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2014-11-13 7:12 UTC (permalink / raw)
To: Samuel Mendoza-Jonas, qemu-ppc, qemu-devel
On 11/05/2014 05:17 PM, Samuel Mendoza-Jonas wrote:
> If a TCG guest reboots during a running migration HTAB entries are not
> marked dirty, and the destination boots with an invalid HTAB.
>
> When a reboot occurs reset the state of HTAB migration, and explicitly
> inform the destination of invalid entries.
>
> Signed-off-by: Samuel Mendoza-Jonas <sam.mj@au1.ibm.com>
> ---
> hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++---------------
> include/hw/ppc/spapr.h | 1 +
> 2 files changed, 42 insertions(+), 18 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 1610c28..9f419e8 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -829,26 +829,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>
> shift = kvmppc_reset_htab(spapr->htab_shift);
>
> + pthread_mutex_lock(&spapr->htab_mutex);
> if (shift > 0) {
> /* Kernel handles htab, we don't need to allocate one */
> spapr->htab_shift = shift;
> kvmppc_kern_htab = true;
>
> /* Tell readers to update their file descriptor */
> - pthread_mutex_lock(&spapr->htab_mutex);
> if (spapr->htab_fd > 0) {
> spapr->htab_fd_stale = true;
> }
> - pthread_mutex_unlock(&spapr->htab_mutex);
> } else {
> if (!spapr->htab) {
> /* Allocate an htab if we don't yet have one */
> spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
> + } else {
> + spapr->htab_mig_full = true;
> + spapr->htab_first_pass = true;
> + spapr->htab_save_index = 0;
> }
>
> /* And clear it */
> memset(spapr->htab, 0, HTAB_SIZE(spapr));
> }
> + pthread_mutex_unlock(&spapr->htab_mutex);
The pthread_mutex_(un)lock things from the chunk above should go to the
previous patch.
>
> /* Update the RMA size if necessary */
> if (spapr->vrma_adjust) {
> @@ -1019,6 +1023,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
> pthread_mutex_lock(&spapr->htab_mutex);
> spapr->htab_fd = kvmppc_get_htab_fd(false);
> spapr->htab_fd_stale = false;
> + spapr->htab_mig_full = false;
> pthread_mutex_unlock(&spapr->htab_mutex);
> if (spapr->htab_fd < 0) {
> fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
> @@ -1034,6 +1039,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
> static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
> int64_t max_ns)
> {
> + bool final = max_ns < 0;
> int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
> int index = spapr->htab_save_index;
> int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> @@ -1041,33 +1047,40 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
> assert(spapr->htab_first_pass);
>
> do {
> - int chunkstart;
> + int chunkstart, invalidstart;
>
> - /* Consume invalid HPTEs */
> - while ((index < htabslots)
> - && !HPTE_VALID(HPTE(spapr->htab, index))) {
Nit: in most places in qemu "&&" would be in the end of the previos line :)
> + chunkstart = index;
> + /* Consume valid HPTEs */
> + while ((index < htabslots && (index - chunkstart < USHRT_MAX))
> + && HPTE_VALID(HPTE(spapr->htab, index))) {
> index++;
> CLEAN_HPTE(HPTE(spapr->htab, index));
> }
>
> - /* Consume valid HPTEs */
> - chunkstart = index;
> - while ((index < htabslots)
> - && HPTE_VALID(HPTE(spapr->htab, index))) {
> + invalidstart = index;
> + /* Consume invalid HPTEs */
> + while ((index < htabslots && (index - invalidstart < USHRT_MAX))
> + && !HPTE_VALID(HPTE(spapr->htab, index))) {
> index++;
> CLEAN_HPTE(HPTE(spapr->htab, index));
> }
>
> - if (index > chunkstart) {
> - int n_valid = index - chunkstart;
> + /* Avoid writing an end marker (0,0,0) */
> + if (index > chunkstart
> + && !(chunkstart == invalidstart && !spapr->htab_mig_full)) {
> + int n_valid = invalidstart - chunkstart;
> + /* If a reset has occured we must explicitly overwrite the HTAB
> + * of the destination */
> + int n_invalid = spapr->htab_mig_full ? index - invalidstart : 0;
>
> qemu_put_be32(f, chunkstart);
> qemu_put_be16(f, n_valid);
> - qemu_put_be16(f, 0);
> + qemu_put_be16(f, n_invalid);
> qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
> HASH_PTE_SIZE_64 * n_valid);
>
> - if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
> + if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns
> + && !final) {
> break;
> }
> }
> @@ -1182,10 +1195,14 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
> if (rc < 0) {
> return rc;
> }
> - } else if (spapr->htab_first_pass) {
> - htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
> } else {
> - rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
> + pthread_mutex_lock(&spapr->htab_mutex);
> + if (spapr->htab_first_pass) {
> + htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
> + } else {
> + rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
> + }
> + pthread_mutex_unlock(&spapr->htab_mutex);
> }
>
> /* End marker */
> @@ -1220,7 +1237,13 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
> close(spapr->htab_fd);
> spapr->htab_fd = -1;
> } else {
> - htab_save_later_pass(f, spapr, -1);
> + pthread_mutex_lock(&spapr->htab_mutex);
> + if (spapr->htab_first_pass) {
> + htab_save_first_pass(f, spapr, -1);
> + } else {
> + htab_save_later_pass(f, spapr, -1);
> + }
> + pthread_mutex_unlock(&spapr->htab_mutex);
> }
>
> /* End marker */
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 5e29bec..ee95459 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -38,6 +38,7 @@ typedef struct sPAPREnvironment {
> bool htab_first_pass;
> int htab_fd;
> bool htab_fd_stale;
> + bool htab_mig_full;
Not sure what @htab_mig_full stands for exactly. Please explain it in the
commit log.
> pthread_mutex_t htab_mutex;
> } sPAPREnvironment;
>
>
--
Alexey
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-11-13 7:12 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-11-05 6:16 [Qemu-devel] [PATCH 0/2] spapr: Fix stale HTAB during live migration Samuel Mendoza-Jonas
2014-11-05 6:17 ` [Qemu-devel] [PATCH 1/2] spapr: Fix stale HTAB during live migration (KVM) Samuel Mendoza-Jonas
2014-11-05 7:57 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-11-05 21:56 ` Samuel Mendoza-Jonas
2014-11-13 6:59 ` [Qemu-devel] " Alexey Kardashevskiy
2014-11-05 6:17 ` [Qemu-devel] [PATCH 2/2] spapr: Fix stale HTAB during live migration (TCG) Samuel Mendoza-Jonas
2014-11-05 8:05 ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-11-05 22:00 ` Samuel Mendoza-Jonas
2014-11-05 22:04 ` Alexander Graf
2014-11-13 7:12 ` [Qemu-devel] " Alexey Kardashevskiy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).