* [Qemu-devel] [RFC PATCH v2 1/3] separate thread for VM migration
2011-07-29 20:57 [Qemu-devel] [RFC PATCH v2 0/3] separate thread for VM migration Umesh Deshpande
@ 2011-07-29 20:57 ` Umesh Deshpande
2011-08-01 9:37 ` Paolo Bonzini
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 2/3] fine grained qemu_mutex locking for migration Umesh Deshpande
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: Umesh Deshpande @ 2011-07-29 20:57 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: pbonzini, mtosatti, Umesh Deshpande, Juan Quintela
This patch creates a separate thread for the guest migration on the source side.
Signed-off-by: Umesh Deshpande <udeshpan@redhat.com>
---
buffered_file.c | 28 ++++++++++++++++---------
buffered_file.h | 4 +++
migration.c | 59 +++++++++++++++++++++++++++++++++++++++++++-----------
migration.h | 3 ++
savevm.c | 22 +-------------------
savevm.h | 29 +++++++++++++++++++++++++++
6 files changed, 102 insertions(+), 43 deletions(-)
create mode 100644 savevm.h
diff --git a/buffered_file.c b/buffered_file.c
index 41b42c3..d4146bf 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -16,12 +16,16 @@
#include "qemu-timer.h"
#include "qemu-char.h"
#include "buffered_file.h"
+#include "migration.h"
+#include "savevm.h"
+#include "qemu-thread.h"
//#define DEBUG_BUFFERED_FILE
typedef struct QEMUFileBuffered
{
BufferedPutFunc *put_buffer;
+ BufferedBeginFunc *begin;
BufferedPutReadyFunc *put_ready;
BufferedWaitForUnfreezeFunc *wait_for_unfreeze;
BufferedCloseFunc *close;
@@ -35,6 +39,7 @@ typedef struct QEMUFileBuffered
size_t buffer_size;
size_t buffer_capacity;
QEMUTimer *timer;
+ QemuThread thread;
} QEMUFileBuffered;
#ifdef DEBUG_BUFFERED_FILE
@@ -181,8 +186,6 @@ static int buffered_close(void *opaque)
ret = s->close(s->opaque);
- qemu_del_timer(s->timer);
- qemu_free_timer(s->timer);
qemu_free(s->buffer);
qemu_free(s);
@@ -228,17 +231,15 @@ static int64_t buffered_get_rate_limit(void *opaque)
return s->xfer_limit;
}
-static void buffered_rate_tick(void *opaque)
+void buffered_rate_tick(QEMUFile *file)
{
- QEMUFileBuffered *s = opaque;
+ QEMUFileBuffered *s = file->opaque;
if (s->has_error) {
buffered_close(s);
return;
}
- qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
-
if (s->freeze_output)
return;
@@ -250,9 +251,17 @@ static void buffered_rate_tick(void *opaque)
s->put_ready(s->opaque);
}
+static void *migrate_vm(void *opaque)
+{
+ QEMUFileBuffered *s = opaque;
+ s->begin(s->opaque);
+ return NULL;
+}
+
QEMUFile *qemu_fopen_ops_buffered(void *opaque,
size_t bytes_per_sec,
BufferedPutFunc *put_buffer,
+ BufferedBeginFunc *begin,
BufferedPutReadyFunc *put_ready,
BufferedWaitForUnfreezeFunc *wait_for_unfreeze,
BufferedCloseFunc *close)
@@ -264,6 +273,7 @@ QEMUFile *qemu_fopen_ops_buffered(void *opaque,
s->opaque = opaque;
s->xfer_limit = bytes_per_sec / 10;
s->put_buffer = put_buffer;
+ s->begin = begin;
s->put_ready = put_ready;
s->wait_for_unfreeze = wait_for_unfreeze;
s->close = close;
@@ -271,11 +281,9 @@ QEMUFile *qemu_fopen_ops_buffered(void *opaque,
s->file = qemu_fopen_ops(s, buffered_put_buffer, NULL,
buffered_close, buffered_rate_limit,
buffered_set_rate_limit,
- buffered_get_rate_limit);
-
- s->timer = qemu_new_timer_ms(rt_clock, buffered_rate_tick, s);
+ buffered_get_rate_limit);
- qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
+ qemu_thread_create(&s->thread, migrate_vm, s);
return s->file;
}
diff --git a/buffered_file.h b/buffered_file.h
index 98d358b..cfe2833 100644
--- a/buffered_file.h
+++ b/buffered_file.h
@@ -17,12 +17,16 @@
#include "hw/hw.h"
typedef ssize_t (BufferedPutFunc)(void *opaque, const void *data, size_t size);
+typedef void (BufferedBeginFunc)(void *opaque);
typedef void (BufferedPutReadyFunc)(void *opaque);
typedef void (BufferedWaitForUnfreezeFunc)(void *opaque);
typedef int (BufferedCloseFunc)(void *opaque);
+void buffered_rate_tick(QEMUFile *file);
+
QEMUFile *qemu_fopen_ops_buffered(void *opaque, size_t xfer_limit,
BufferedPutFunc *put_buffer,
+ BufferedBeginFunc *begin,
BufferedPutReadyFunc *put_ready,
BufferedWaitForUnfreezeFunc *wait_for_unfreeze,
BufferedCloseFunc *close);
diff --git a/migration.c b/migration.c
index af3a1f2..bf86067 100644
--- a/migration.c
+++ b/migration.c
@@ -31,6 +31,8 @@
do { } while (0)
#endif
+static int64_t expire_time;
+
/* Migration speed throttling */
static int64_t max_throttle = (32 << 20);
@@ -284,8 +286,6 @@ int migrate_fd_cleanup(FdMigrationState *s)
{
int ret = 0;
- qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-
if (s->file) {
DPRINTF("closing file\n");
if (qemu_fclose(s->file) != 0) {
@@ -310,8 +310,7 @@ int migrate_fd_cleanup(FdMigrationState *s)
void migrate_fd_put_notify(void *opaque)
{
FdMigrationState *s = opaque;
-
- qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+ s->callback = NULL;
qemu_file_put_notify(s->file);
}
@@ -328,7 +327,7 @@ ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size)
ret = -(s->get_error(s));
if (ret == -EAGAIN) {
- qemu_set_fd_handler2(s->fd, NULL, NULL, migrate_fd_put_notify, s);
+ s->callback = migrate_fd_put_notify;
} else if (ret < 0) {
if (s->mon) {
monitor_resume(s->mon);
@@ -342,27 +341,66 @@ ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size)
void migrate_fd_connect(FdMigrationState *s)
{
- int ret;
-
+ s->callback = NULL;
s->file = qemu_fopen_ops_buffered(s,
s->bandwidth_limit,
migrate_fd_put_buffer,
+ migrate_fd_begin,
migrate_fd_put_ready,
migrate_fd_wait_for_unfreeze,
migrate_fd_close);
+}
+
+static int migrate_fd_check_expire(void)
+{
+ int64_t current_time = qemu_get_clock_ms(rt_clock);
+
+ if (expire_time > current_time) {
+ return 0;
+ } else {
+ expire_time = qemu_get_clock_ms(rt_clock) + 100;
+ return 1;
+ }
+}
+
+void migrate_fd_begin(void *arg)
+{
+ FdMigrationState *s = arg;
+ int ret;
+ qemu_mutex_lock_iothread();
DPRINTF("beginning savevm\n");
ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
s->mig_state.shared);
if (ret < 0) {
DPRINTF("failed, %d\n", ret);
migrate_fd_error(s);
- return;
+ goto out;
}
-
+
+ expire_time = qemu_get_clock_ms(rt_clock) + 100;
migrate_fd_put_ready(s);
+
+ while (s->state == MIG_STATE_ACTIVE) {
+ if (migrate_fd_check_expire()) {
+ buffered_rate_tick(s->file);
+ }
+
+ if (s->state != MIG_STATE_ACTIVE) {
+ break;
+ }
+
+ if (s->callback) {
+ migrate_fd_wait_for_unfreeze(s);
+ s->callback(s);
+ }
+ }
+
+out:
+ qemu_mutex_unlock_iothread();
}
+
void migrate_fd_put_ready(void *opaque)
{
FdMigrationState *s = opaque;
@@ -376,8 +414,6 @@ void migrate_fd_put_ready(void *opaque)
if (qemu_savevm_state_iterate(s->mon, s->file) == 1) {
int state;
int old_vm_running = vm_running;
-
- DPRINTF("done iterating\n");
vm_stop(VMSTOP_MIGRATE);
if ((qemu_savevm_state_complete(s->mon, s->file)) < 0) {
@@ -458,7 +494,6 @@ int migrate_fd_close(void *opaque)
{
FdMigrationState *s = opaque;
- qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
return s->close(s);
}
diff --git a/migration.h b/migration.h
index 050c56c..8ed34ab 100644
--- a/migration.h
+++ b/migration.h
@@ -48,6 +48,7 @@ struct FdMigrationState
int (*get_error)(struct FdMigrationState*);
int (*close)(struct FdMigrationState*);
int (*write)(struct FdMigrationState*, const void *, size_t);
+ void (*callback)(void *);
void *opaque;
};
@@ -118,6 +119,8 @@ ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size);
void migrate_fd_connect(FdMigrationState *s);
+void migrate_fd_begin(void *opaque);
+
void migrate_fd_put_ready(void *opaque);
int migrate_fd_get_status(MigrationState *mig_state);
diff --git a/savevm.c b/savevm.c
index 8139bc7..4859a34 100644
--- a/savevm.c
+++ b/savevm.c
@@ -82,6 +82,7 @@
#include "qemu_socket.h"
#include "qemu-queue.h"
#include "cpus.h"
+#include "savevm.h"
#define SELF_ANNOUNCE_ROUNDS 5
@@ -155,27 +156,6 @@ void qemu_announce_self(void)
/***********************************************************/
/* savevm/loadvm support */
-#define IO_BUF_SIZE 32768
-
-struct QEMUFile {
- QEMUFilePutBufferFunc *put_buffer;
- QEMUFileGetBufferFunc *get_buffer;
- QEMUFileCloseFunc *close;
- QEMUFileRateLimit *rate_limit;
- QEMUFileSetRateLimit *set_rate_limit;
- QEMUFileGetRateLimit *get_rate_limit;
- void *opaque;
- int is_write;
-
- int64_t buf_offset; /* start of buffer when writing, end of buffer
- when reading */
- int buf_index;
- int buf_size; /* 0 when writing */
- uint8_t buf[IO_BUF_SIZE];
-
- int has_error;
-};
-
typedef struct QEMUFileStdio
{
FILE *stdio_file;
diff --git a/savevm.h b/savevm.h
new file mode 100644
index 0000000..954eded
--- /dev/null
+++ b/savevm.h
@@ -0,0 +1,29 @@
+#ifndef QEMU_SAVEVM_H
+#define QEMU_SAVEVM_H
+
+#include "hw/hw.h"
+
+#define IO_BUF_SIZE 32768
+
+struct QEMUFile {
+ QEMUFilePutBufferFunc *put_buffer;
+ QEMUFileGetBufferFunc *get_buffer;
+ QEMUFileCloseFunc *close;
+ QEMUFileRateLimit *rate_limit;
+ QEMUFileSetRateLimit *set_rate_limit;
+ QEMUFileGetRateLimit *get_rate_limit;
+ void *opaque;
+ int is_write;
+
+ int64_t buf_offset; /* start of buffer when writing, end of buffer
+ when reading */
+ int buf_index;
+ int buf_size; /* 0 when
+ * writing
+ * */
+ uint8_t buf[IO_BUF_SIZE];
+
+ int has_error;
+};
+
+#endif
--
1.7.4.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 1/3] separate thread for VM migration
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 1/3] " Umesh Deshpande
@ 2011-08-01 9:37 ` Paolo Bonzini
2011-08-01 21:00 ` Umesh Deshpande
0 siblings, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2011-08-01 9:37 UTC (permalink / raw)
To: Umesh Deshpande; +Cc: mtosatti, qemu-devel, kvm, Juan Quintela
On 07/29/2011 10:57 PM, Umesh Deshpande wrote:
> This patch creates a separate thread for the guest migration on the source side.
>
> Signed-off-by: Umesh Deshpande<udeshpan@redhat.com>
Looks pretty good!
One thing that shows, is that the interface separation between
buffered_file.c is migration.c is pretty weird. Your patch makes it
somewhat worse, but it was like this before so it's not your fault. The
good thing is that if buffered_file.c uses threads, you can fix a large
part of this and get even simpler code:
1) there is really just one way to implement migrate_fd_put_notify, and
with your simplifications it does not belong anymore in migration.c.
2) s->callback is actually not NULL exactly if s->file->frozen_output is
true, you can remove it as well;
3) buffered_close is messy because it can be called from both the
iothread (monitor->migrate_fd_cancel->migrate_fd_cleanup->qemu_fclose)
or the migration thread (after qemu_savevm_state_complete). But
buffered_close is actually very similar to your thread function (it does
flush+wait_for_unfreeze, basically)! So buffered_close can be simply:
s->closed = 1;
ret = qemu_thread_join(s->thread); /* doesn't exist yet :) */
qemu_free(...);
return ret;
Another nit is that here:
> + if (migrate_fd_check_expire()) {
> + buffered_rate_tick(s->file);
> + }
> +
> + if (s->state != MIG_STATE_ACTIVE) {
> + break;
> + }
> +
> + if (s->callback) {
> + migrate_fd_wait_for_unfreeze(s);
> + s->callback(s);
> + }
you can still have a busy wait.
Putting it all together, you can move the thread function back to
buffered_file.c like:
while (!s->closed || (!s->has_error && s->buffer_size)) {
if (s->freeze_output) {
qemu_mutex_unlock_iothread();
s->wait_for_unfreeze(s);
qemu_mutex_lock_iothread();
/* This comes from qemu_file_put_notify (via
buffered_put_buffer---can be simplified a lot too?).
s->freeze_output = 0;
/* Test again for cancellation. */
continue;
}
int64_t current_time = qemu_get_clock_ms(rt_clock);
if (s->expire_time > current_time) {
struct timeval tv = { .tv_sec = 0, .tv_usec = ... };
qemu_mutex_unlock_iothread();
select (0, NULL, NULL, NULL, &tv);
qemu_mutex_lock_iothread();
s->expire_time = qemu_get_clock_ms(rt_clock) + 100;
continue;
}
/* This comes from buffered_rate_tick. */
s->bytes_xfer = 0;
buffered_flush(s);
if (!s->closed) {
s->put_ready(s->opaque);
}
}
ret = s->close(s->opaque);
...
Does it look sane?
Paolo
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 1/3] separate thread for VM migration
2011-08-01 9:37 ` Paolo Bonzini
@ 2011-08-01 21:00 ` Umesh Deshpande
2011-08-02 7:44 ` Paolo Bonzini
0 siblings, 1 reply; 11+ messages in thread
From: Umesh Deshpande @ 2011-08-01 21:00 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: mtosatti, qemu-devel, kvm, Juan Quintela
On 08/01/2011 05:37 AM, Paolo Bonzini wrote:
> On 07/29/2011 10:57 PM, Umesh Deshpande wrote:
>> This patch creates a separate thread for the guest migration on the
>> source side.
>>
>> Signed-off-by: Umesh Deshpande<udeshpan@redhat.com>
>
> Looks pretty good!
>
> One thing that shows, is that the interface separation between
> buffered_file.c is migration.c is pretty weird. Your patch makes it
> somewhat worse, but it was like this before so it's not your fault.
> The good thing is that if buffered_file.c uses threads, you can fix a
> large part of this and get even simpler code:
>
> 1) there is really just one way to implement migrate_fd_put_notify,
> and with your simplifications it does not belong anymore in migration.c.
>
> 2) s->callback is actually not NULL exactly if s->file->frozen_output
> is true, you can remove it as well;
>
> 3) buffered_close is messy because it can be called from both the
> iothread (monitor->migrate_fd_cancel->migrate_fd_cleanup->qemu_fclose)
> or the migration thread (after qemu_savevm_state_complete). But
> buffered_close is actually very similar to your thread function (it
> does flush+wait_for_unfreeze, basically)! So buffered_close can be
> simply:
>
> s->closed = 1;
> ret = qemu_thread_join(s->thread); /* doesn't exist yet :) */
> qemu_free(...);
> return ret;
>
> Another nit is that here:
>
>> + if (migrate_fd_check_expire()) {
>> + buffered_rate_tick(s->file);
>> + }
>> +
>> + if (s->state != MIG_STATE_ACTIVE) {
>> + break;
>> + }
>> +
>> + if (s->callback) {
>> + migrate_fd_wait_for_unfreeze(s);
>> + s->callback(s);
>> + }
>
> you can still have a busy wait.
>
> Putting it all together, you can move the thread function back to
> buffered_file.c like:
>
> while (!s->closed || (!s->has_error && s->buffer_size)) {
> if (s->freeze_output) {
> qemu_mutex_unlock_iothread();
> s->wait_for_unfreeze(s);
> qemu_mutex_lock_iothread();
> /* This comes from qemu_file_put_notify (via
> buffered_put_buffer---can be simplified a lot too?).
> s->freeze_output = 0;
> /* Test again for cancellation. */
> continue;
> }
>
> int64_t current_time = qemu_get_clock_ms(rt_clock);
> if (s->expire_time > current_time) {
> struct timeval tv = { .tv_sec = 0, .tv_usec = ... };
> qemu_mutex_unlock_iothread();
> select (0, NULL, NULL, NULL, &tv);
> qemu_mutex_lock_iothread();
> s->expire_time = qemu_get_clock_ms(rt_clock) + 100;
> continue;
> }
>
> /* This comes from buffered_rate_tick. */
> s->bytes_xfer = 0;
> buffered_flush(s);
> if (!s->closed) {
> s->put_ready(s->opaque);
> }
> }
>
> ret = s->close(s->opaque);
> ...
>
> Does it look sane?
>
I kept this in migration.c to call qemu_savevm_state_begin. (The way it
is done currently. i.e. to keep access to FdMigrationState in migration.c)
Calling it from buffered_file.c would be inconsistent in that sense. or
we will have to call it from the iothread before spawning the migration
thread.
Also why is the separation between FdMigrationState and QEMUFileBuffered
is required. Is QEMUFileBuffered designed to use also for things other
than migration?
Thanks
Umesh
>
> Paolo
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 1/3] separate thread for VM migration
2011-08-01 21:00 ` Umesh Deshpande
@ 2011-08-02 7:44 ` Paolo Bonzini
0 siblings, 0 replies; 11+ messages in thread
From: Paolo Bonzini @ 2011-08-02 7:44 UTC (permalink / raw)
To: Umesh Deshpande; +Cc: mtosatti, qemu-devel, kvm, Juan Quintela
On 08/01/2011 11:00 PM, Umesh Deshpande wrote:
>>
> I kept this in migration.c to call qemu_savevm_state_begin. (The way it
> is done currently. i.e. to keep access to FdMigrationState in migration.c)
> Calling it from buffered_file.c would be inconsistent in that sense. or
> we will have to call it from the iothread before spawning the migration
> thread.
Right, I missed that. Perhaps you can call it the first time put_ready
is called.
> Also why is the separation between FdMigrationState and QEMUFileBuffered
> is required. Is QEMUFileBuffered designed to use also for things other
> than migration?
No, but let's keep it this way for now. It may be an annoyance, but it
also helps making a reusable architecture, and it can probably be
cleaned up substantially with thread support.
Paolo
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Qemu-devel] [RFC PATCH v2 2/3] fine grained qemu_mutex locking for migration
2011-07-29 20:57 [Qemu-devel] [RFC PATCH v2 0/3] separate thread for VM migration Umesh Deshpande
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 1/3] " Umesh Deshpande
@ 2011-07-29 20:57 ` Umesh Deshpande
2011-08-01 9:39 ` Paolo Bonzini
2011-08-02 16:30 ` Marcelo Tosatti
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 3/3] Per memslot dirty bitmap Umesh Deshpande
2011-08-01 9:41 ` [Qemu-devel] [RFC PATCH v2 0/3] separate thread for VM migration shawn che
3 siblings, 2 replies; 11+ messages in thread
From: Umesh Deshpande @ 2011-07-29 20:57 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: pbonzini, mtosatti, Umesh Deshpande, Juan Quintela
In the migration thread, qemu_mutex is released during the most time consuming
part. i.e. during is_dup_page which identifies the uniform data pages and during
the put_buffer. qemu_mutex is also released while blocking on select to wait for
the descriptor to become ready for writes.
Signed-off-by: Umesh Deshpande <udeshpan@redhat.com>
---
arch_init.c | 14 +++++++++++---
migration.c | 11 +++++++----
2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/arch_init.c b/arch_init.c
index 484b39d..cd545bc 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -110,7 +110,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
static RAMBlock *last_block;
static ram_addr_t last_offset;
-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
{
RAMBlock *block = last_block;
ram_addr_t offset = last_offset;
@@ -131,6 +131,10 @@ static int ram_save_block(QEMUFile *f)
current_addr + TARGET_PAGE_SIZE,
MIGRATION_DIRTY_FLAG);
+ if (stage != 3) {
+ qemu_mutex_unlock_iothread();
+ }
+
p = block->host + offset;
if (is_dup_page(p, *p)) {
@@ -153,6 +157,10 @@ static int ram_save_block(QEMUFile *f)
bytes_sent = TARGET_PAGE_SIZE;
}
+ if (stage != 3) {
+ qemu_mutex_lock_iothread();
+ }
+
break;
}
@@ -301,7 +309,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
while (!qemu_file_rate_limit(f)) {
int bytes_sent;
- bytes_sent = ram_save_block(f);
+ bytes_sent = ram_save_block(f, stage);
bytes_transferred += bytes_sent;
if (bytes_sent == 0) { /* no more blocks */
break;
@@ -322,7 +330,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
int bytes_sent;
/* flush all remaining blocks regardless of rate limiting */
- while ((bytes_sent = ram_save_block(f)) != 0) {
+ while ((bytes_sent = ram_save_block(f, stage)) != 0) {
bytes_transferred += bytes_sent;
}
cpu_physical_memory_set_dirty_tracking(0);
diff --git a/migration.c b/migration.c
index bf86067..992fef5 100644
--- a/migration.c
+++ b/migration.c
@@ -375,15 +375,19 @@ void migrate_fd_begin(void *arg)
if (ret < 0) {
DPRINTF("failed, %d\n", ret);
migrate_fd_error(s);
- goto out;
+ qemu_mutex_unlock_iothread();
+ return;
}
expire_time = qemu_get_clock_ms(rt_clock) + 100;
migrate_fd_put_ready(s);
+ qemu_mutex_unlock_iothread();
while (s->state == MIG_STATE_ACTIVE) {
if (migrate_fd_check_expire()) {
+ qemu_mutex_lock_iothread();
buffered_rate_tick(s->file);
+ qemu_mutex_unlock_iothread();
}
if (s->state != MIG_STATE_ACTIVE) {
@@ -392,12 +396,11 @@ void migrate_fd_begin(void *arg)
if (s->callback) {
migrate_fd_wait_for_unfreeze(s);
+ qemu_mutex_lock_iothread();
s->callback(s);
+ qemu_mutex_unlock_iothread();
}
}
-
-out:
- qemu_mutex_unlock_iothread();
}
--
1.7.4.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/3] fine grained qemu_mutex locking for migration
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 2/3] fine grained qemu_mutex locking for migration Umesh Deshpande
@ 2011-08-01 9:39 ` Paolo Bonzini
2011-08-02 16:30 ` Marcelo Tosatti
1 sibling, 0 replies; 11+ messages in thread
From: Paolo Bonzini @ 2011-08-01 9:39 UTC (permalink / raw)
To: Umesh Deshpande; +Cc: mtosatti, qemu-devel, kvm, Juan Quintela
On 07/29/2011 10:57 PM, Umesh Deshpande wrote:
> + qemu_mutex_unlock_iothread();
>
> while (s->state == MIG_STATE_ACTIVE) {
> if (migrate_fd_check_expire()) {
> + qemu_mutex_lock_iothread();
> buffered_rate_tick(s->file);
> + qemu_mutex_unlock_iothread();
> }
>
> if (s->state != MIG_STATE_ACTIVE) {
> @@ -392,12 +396,11 @@ void migrate_fd_begin(void *arg)
>
> if (s->callback) {
> migrate_fd_wait_for_unfreeze(s);
> + qemu_mutex_lock_iothread();
> s->callback(s);
> + qemu_mutex_unlock_iothread();
> }
> }
> -
> -out:
> - qemu_mutex_unlock_iothread();
I think it's clearer to unlock explicitly around the waiting points (see
review of 1/3). In fact, I think you're working around the busy wait by
accessing s->state outside the lock, right? I don't think this is
provably safe; moving the knowledge of the thread entirely within
buffered_file.c also fixes this, because then the lifetimes of the
thread and the QEMUFile are much clearer.
Thanks,
Paolo
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/3] fine grained qemu_mutex locking for migration
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 2/3] fine grained qemu_mutex locking for migration Umesh Deshpande
2011-08-01 9:39 ` Paolo Bonzini
@ 2011-08-02 16:30 ` Marcelo Tosatti
1 sibling, 0 replies; 11+ messages in thread
From: Marcelo Tosatti @ 2011-08-02 16:30 UTC (permalink / raw)
To: Umesh Deshpande; +Cc: pbonzini, qemu-devel, kvm, Juan Quintela
On Fri, Jul 29, 2011 at 04:57:25PM -0400, Umesh Deshpande wrote:
> In the migration thread, qemu_mutex is released during the most time consuming
> part. i.e. during is_dup_page which identifies the uniform data pages and during
> the put_buffer. qemu_mutex is also released while blocking on select to wait for
> the descriptor to become ready for writes.
>
> Signed-off-by: Umesh Deshpande <udeshpan@redhat.com>
> ---
> arch_init.c | 14 +++++++++++---
> migration.c | 11 +++++++----
> 2 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 484b39d..cd545bc 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -110,7 +110,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
> static RAMBlock *last_block;
> static ram_addr_t last_offset;
>
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
> {
> RAMBlock *block = last_block;
> ram_addr_t offset = last_offset;
> @@ -131,6 +131,10 @@ static int ram_save_block(QEMUFile *f)
> current_addr + TARGET_PAGE_SIZE,
> MIGRATION_DIRTY_FLAG);
>
> + if (stage != 3) {
> + qemu_mutex_unlock_iothread();
> + }
> +
> p = block->host + offset;
>
> if (is_dup_page(p, *p)) {
> @@ -153,6 +157,10 @@ static int ram_save_block(QEMUFile *f)
> bytes_sent = TARGET_PAGE_SIZE;
> }
>
> + if (stage != 3) {
> + qemu_mutex_lock_iothread();
> + }
> +
Batching multiple pages (instead of a single page per lock/unlock cycle)
is probably worthwhile.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Qemu-devel] [RFC PATCH v2 3/3] Per memslot dirty bitmap
2011-07-29 20:57 [Qemu-devel] [RFC PATCH v2 0/3] separate thread for VM migration Umesh Deshpande
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 1/3] " Umesh Deshpande
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 2/3] fine grained qemu_mutex locking for migration Umesh Deshpande
@ 2011-07-29 20:57 ` Umesh Deshpande
2011-08-02 16:29 ` Marcelo Tosatti
2011-08-01 9:41 ` [Qemu-devel] [RFC PATCH v2 0/3] separate thread for VM migration shawn che
3 siblings, 1 reply; 11+ messages in thread
From: Umesh Deshpande @ 2011-07-29 20:57 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: pbonzini, mtosatti, Umesh Deshpande, Juan Quintela
This patch creates a separate dirty bitmap for each slot. Currently dirty bitmap
is created for addresses ranging from 0 to the end address of the last memory
slot. Since the memslots are not necessarily contiguous, current bitmap might
contain empty region or holes that doesn't represent any VM pages. This patch
reduces the size of the dirty bitmap by allocating per memslot dirty bitmaps.
Signed-off-by: Umesh Deshpande <udeshpan@redhat.com>
---
cpu-all.h | 40 +++++++++++++++++++++++++++++++++-------
exec.c | 38 +++++++++++++++++++++++---------------
xen-all.c | 6 ++----
3 files changed, 58 insertions(+), 26 deletions(-)
diff --git a/cpu-all.h b/cpu-all.h
index e839100..9517a9b 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -920,6 +920,7 @@ extern ram_addr_t ram_size;
typedef struct RAMBlock {
uint8_t *host;
+ uint8_t *phys_dirty;
ram_addr_t offset;
ram_addr_t length;
uint32_t flags;
@@ -931,7 +932,6 @@ typedef struct RAMBlock {
} RAMBlock;
typedef struct RAMList {
- uint8_t *phys_dirty;
QLIST_HEAD(ram, RAMBlock) blocks;
} RAMList;
extern RAMList ram_list;
@@ -961,32 +961,55 @@ extern int mem_prealloc;
#define CODE_DIRTY_FLAG 0x02
#define MIGRATION_DIRTY_FLAG 0x08
+RAMBlock *qemu_addr_to_ramblock(ram_addr_t);
+
+static inline int get_page_nr(ram_addr_t addr, RAMBlock **block)
+{
+ int page_nr;
+ *block = qemu_addr_to_ramblock(addr);
+
+ page_nr = addr - (*block)->offset;
+ page_nr = page_nr >> TARGET_PAGE_BITS;
+
+ return page_nr;
+}
+
/* read dirty bit (return 0 or 1) */
static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
{
- return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+ RAMBlock *block;
+ int page_nr = get_page_nr(addr, &block);
+ return block->phys_dirty[page_nr] == 0xff;
}
static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
{
- return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
+ RAMBlock *block;
+ int page_nr = get_page_nr(addr, &block);
+ return block->phys_dirty[page_nr];
}
static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
int dirty_flags)
{
- return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
+ RAMBlock *block;
+ int page_nr = get_page_nr(addr, &block);
+ return block->phys_dirty[page_nr] & dirty_flags;
}
static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
{
- ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+ RAMBlock *block;
+ int page_nr = get_page_nr(addr, &block);
+ block->phys_dirty[page_nr] = 0xff;
}
static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
int dirty_flags)
{
- return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
+ RAMBlock *block;
+ int page_nr = get_page_nr(addr, &block);
+ return block->phys_dirty[page_nr] |= dirty_flags;
}
static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
@@ -995,10 +1018,13 @@ static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
{
int i, mask, len;
uint8_t *p;
+ RAMBlock *block;
+ int page_nr = get_page_nr(start, &block);
len = length >> TARGET_PAGE_BITS;
mask = ~dirty_flags;
- p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
+
+ p = block->phys_dirty + page_nr;
for (i = 0; i < len; i++) {
p[i] &= mask;
}
diff --git a/exec.c b/exec.c
index 0e2ce57..6312550 100644
--- a/exec.c
+++ b/exec.c
@@ -2106,6 +2106,10 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
abort();
}
+ if (kvm_enabled()) {
+ return;
+ }
+
for(env = first_cpu; env != NULL; env = env->next_cpu) {
int mmu_idx;
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@@ -2894,17 +2898,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
return offset;
}
-static ram_addr_t last_ram_offset(void)
-{
- RAMBlock *block;
- ram_addr_t last = 0;
-
- QLIST_FOREACH(block, &ram_list.blocks, next)
- last = MAX(last, block->offset + block->length);
-
- return last;
-}
-
ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
ram_addr_t size, void *host)
{
@@ -2974,10 +2967,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
- ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
- last_ram_offset() >> TARGET_PAGE_BITS);
- memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
- 0xff, size >> TARGET_PAGE_BITS);
+ new_block->phys_dirty = qemu_mallocz(new_block->length >> TARGET_PAGE_BITS);
+ memset(new_block->phys_dirty, 0xff, new_block->length >> TARGET_PAGE_BITS);
if (kvm_enabled())
kvm_setup_guest_memory(new_block->host, size);
@@ -3141,6 +3132,23 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
return NULL;
}
+RAMBlock *qemu_addr_to_ramblock(ram_addr_t addr)
+{
+ RAMBlock *block;
+
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ if (addr - block->offset < block->length) {
+ return block;
+ }
+ }
+
+ fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+ abort();
+
+ return NULL;
+}
+
+
/* Return a host pointer to ram allocated with qemu_ram_alloc.
* Same as qemu_get_ram_ptr but avoid reordering ramblocks.
*/
diff --git a/xen-all.c b/xen-all.c
index fcb106f..6782e68 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -147,10 +147,8 @@ static void xen_ram_init(ram_addr_t ram_size)
QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
- ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
- new_block->length >> TARGET_PAGE_BITS);
- memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
- 0xff, new_block->length >> TARGET_PAGE_BITS);
+ new_block->phys_dirty = qemu_mallocz(new_block->length >> TARGET_PAGE_BITS);
+ memset(new_block->phys_dirty, 0xff, new_block->length >> TARGET_PAGE_BITS);
if (ram_size >= 0xe0000000 ) {
above_4g_mem_size = ram_size - 0xe0000000;
--
1.7.4.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 3/3] Per memslot dirty bitmap
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 3/3] Per memslot dirty bitmap Umesh Deshpande
@ 2011-08-02 16:29 ` Marcelo Tosatti
0 siblings, 0 replies; 11+ messages in thread
From: Marcelo Tosatti @ 2011-08-02 16:29 UTC (permalink / raw)
To: Umesh Deshpande; +Cc: pbonzini, qemu-devel, kvm, Juan Quintela
On Fri, Jul 29, 2011 at 04:57:26PM -0400, Umesh Deshpande wrote:
> This patch creates a separate dirty bitmap for each slot. Currently dirty bitmap
> is created for addresses ranging from 0 to the end address of the last memory
> slot. Since the memslots are not necessarily contiguous, current bitmap might
> contain empty region or holes that doesn't represent any VM pages. This patch
> reduces the size of the dirty bitmap by allocating per memslot dirty bitmaps.
>
> Signed-off-by: Umesh Deshpande <udeshpan@redhat.com>
> ---
> cpu-all.h | 40 +++++++++++++++++++++++++++++++++-------
> exec.c | 38 +++++++++++++++++++++++---------------
> xen-all.c | 6 ++----
> 3 files changed, 58 insertions(+), 26 deletions(-)
>
> diff --git a/cpu-all.h b/cpu-all.h
> index e839100..9517a9b 100644
> --- a/cpu-all.h
> +++ b/cpu-all.h
> @@ -920,6 +920,7 @@ extern ram_addr_t ram_size;
>
> typedef struct RAMBlock {
> uint8_t *host;
> + uint8_t *phys_dirty;
> ram_addr_t offset;
> ram_addr_t length;
> uint32_t flags;
> @@ -931,7 +932,6 @@ typedef struct RAMBlock {
> } RAMBlock;
>
> typedef struct RAMList {
> - uint8_t *phys_dirty;
> QLIST_HEAD(ram, RAMBlock) blocks;
> } RAMList;
> extern RAMList ram_list;
> @@ -961,32 +961,55 @@ extern int mem_prealloc;
> #define CODE_DIRTY_FLAG 0x02
> #define MIGRATION_DIRTY_FLAG 0x08
>
> +RAMBlock *qemu_addr_to_ramblock(ram_addr_t);
> +
> +static inline int get_page_nr(ram_addr_t addr, RAMBlock **block)
> +{
> + int page_nr;
> + *block = qemu_addr_to_ramblock(addr);
> +
> + page_nr = addr - (*block)->offset;
> + page_nr = page_nr >> TARGET_PAGE_BITS;
> +
> + return page_nr;
> +}
> +
> /* read dirty bit (return 0 or 1) */
> static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
> {
> - return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
> + RAMBlock *block;
> + int page_nr = get_page_nr(addr, &block);
> + return block->phys_dirty[page_nr] == 0xff;
> }
>
> static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
> {
> - return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
> + RAMBlock *block;
> + int page_nr = get_page_nr(addr, &block);
> + return block->phys_dirty[page_nr];
> }
>
> static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
> int dirty_flags)
> {
> - return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
> + RAMBlock *block;
> + int page_nr = get_page_nr(addr, &block);
> + return block->phys_dirty[page_nr] & dirty_flags;
> }
>
> static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
> {
> - ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
> + RAMBlock *block;
> + int page_nr = get_page_nr(addr, &block);
> + block->phys_dirty[page_nr] = 0xff;
> }
>
> static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
> int dirty_flags)
> {
> - return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
> + RAMBlock *block;
> + int page_nr = get_page_nr(addr, &block);
> + return block->phys_dirty[page_nr] |= dirty_flags;
> }
>
> static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
> @@ -995,10 +1018,13 @@ static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
> {
> int i, mask, len;
> uint8_t *p;
> + RAMBlock *block;
> + int page_nr = get_page_nr(start, &block);
>
> len = length >> TARGET_PAGE_BITS;
> mask = ~dirty_flags;
> - p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
> +
> + p = block->phys_dirty + page_nr;
> p[i] &= mask;
> }
> diff --git a/exec.c b/exec.c
> index 0e2ce57..6312550 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2106,6 +2106,10 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
> abort();
> }
>
> + if (kvm_enabled()) {
> + return;
> + }
> +
This belongs to a separate patch.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 0/3] separate thread for VM migration
2011-07-29 20:57 [Qemu-devel] [RFC PATCH v2 0/3] separate thread for VM migration Umesh Deshpande
` (2 preceding siblings ...)
2011-07-29 20:57 ` [Qemu-devel] [RFC PATCH v2 3/3] Per memslot dirty bitmap Umesh Deshpande
@ 2011-08-01 9:41 ` shawn che
3 siblings, 0 replies; 11+ messages in thread
From: shawn che @ 2011-08-01 9:41 UTC (permalink / raw)
To: Umesh Deshpande; +Cc: pbonzini, mtosatti, qemu-devel, kvm, Juan Quintela
Hi ,
I am studying KVM, but I have a question to ask.I wish someone can spare
time to help me.
What does the file "Assigned-dev.c" use for? I add some printk in the
funcitons in this file,but it seldom work.
Is it used for VT-d or others? can you give me some suggestions?
Thank you so much for your feedback...
^ permalink raw reply [flat|nested] 11+ messages in thread