* [Qemu-devel] migration: adding migration to/from a file (v2)
@ 2009-02-19 11:45 Uri Lublin
2009-02-19 13:57 ` Anthony Liguori
0 siblings, 1 reply; 14+ messages in thread
From: Uri Lublin @ 2009-02-19 11:45 UTC (permalink / raw)
To: qemu-devel; +Cc: Uri Lublin
[-- Attachment #1: Type: text/plain, Size: 583 bytes --]
Migration to file, uses migration-to-fd (supports live migration).
Migration from file, uses qemu-fopen directly.
pre-save:
(qemu) stop # unless you really want live migration
(qemu) migrate_set_speed 3.5G # not needed, speed things up
save:
(qemu) migrate [-d] file:/path/to/state-file
load:
[shell] <qemu> <params> -incoming file:/path/to/state-file
changes from v1:
- making sure write would not block (using select)
- pass max_throttle as is (can be set by user)
- not checking qemu_mallocz
- compile for non-windows as select is being used.
[-- Attachment #2: 0001-migration-adding-migration-to-from-a-file-v2.patch --]
[-- Type: text/x-patch, Size: 6454 bytes --]
>From adfdf0c43b39b3bd3a52612e3748cf7929af9c8c Mon Sep 17 00:00:00 2001
From: Uri Lublin <uril@redhat.com>
Date: Sun, 18 Jan 2009 18:54:14 +0200
Subject: [PATCH] migration: adding migration to/from a file (v2)
Migration to file, uses migration-to-fd.
Migration from file, uses qemu-fopen directly.
The file should be used only once and removed (or used with -snapshot,
or a the disk-image should be copoied), as the disk image is not
saved, only the VM state.
This version uses select to make sure write() will not block.
Thus this solution is limited to non-Windows hosts.
Signed-off-by: Uri Lublin <uril@redhat.com>
---
Makefile | 2 +-
migration-file.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
migration.c | 4 ++
migration.h | 5 ++
4 files changed, 162 insertions(+), 1 deletions(-)
create mode 100644 migration-file.c
diff --git a/Makefile b/Makefile
index 4f7a55a..6d4a8c8 100644
--- a/Makefile
+++ b/Makefile
@@ -94,7 +94,7 @@ endif
ifdef CONFIG_WIN32
OBJS+=tap-win32.o
else
-OBJS+=migration-exec.o
+OBJS+=migration-exec.o migration-file.o
endif
AUDIO_OBJS = audio.o noaudio.o wavaudio.o mixeng.o
diff --git a/migration-file.c b/migration-file.c
new file mode 100644
index 0000000..cc86ef6
--- /dev/null
+++ b/migration-file.c
@@ -0,0 +1,152 @@
+/*
+ * QEMU live migration
+ *
+ * Copyright IBM, Corp. 2008
+ * Red Hat, Inc. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Uri Lublin <uril@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "qemu-common.h"
+#include "migration.h"
+#include "hw/hw.h"
+#include "sysemu.h"
+#include "console.h"
+#include "block.h"
+
+//#define DEBUG_MIGRATION_FILE
+
+#ifdef DEBUG_MIGRATION_FILE
+#define dprintf(fmt, ...) \
+ do { printf("migration-file: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) \
+ do { } while (0)
+#endif
+
+static int file_close(FdMigrationState *s)
+{
+ return close(s->fd);
+}
+
+static int file_errno(FdMigrationState *s)
+{
+ return errno;
+}
+
+/* returns 0 if write will not block
+ * returns -1 and sets errno to EAGAIN if write will block
+ * returns -1 upon error (and sets errno)
+ */
+static int will_write_block(int fd)
+{
+ int n;
+ fd_set wfds;
+ struct timeval tv;
+
+ FD_ZERO(&wfds);
+ FD_SET(fd, &wfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+
+ n = select(fd + 1, NULL, &wfds, NULL, &tv);
+ if (n<0)
+ return n;
+
+ if (!FD_ISSET(fd, &wfds)) {
+ /* Don't block, let migration freeze for a while */
+ dprintf("will block -- setting EAGAIN\n");
+ errno = EAGAIN;
+ return -1;
+ }
+
+ return 0;
+}
+
+static int file_write(FdMigrationState *s, const void * buf, size_t size)
+{
+ int ret;
+
+ ret = will_write_block(s->fd);
+ if (ret == 0)
+ ret = write(s->fd, buf, size);
+ return ret;
+}
+
+MigrationState *file_start_outgoing_migration(const char *filename,
+ int64_t bandwidth_limit,
+ int async)
+{
+ FdMigrationState *s;
+ int fd;
+
+ s = qemu_mallocz(sizeof(*s));
+
+ fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+ if (fd < 0) {
+ perror("file_migration: failed to open filename");
+ term_printf("file_migration: failed to open filename %s\n", filename);
+ return NULL;
+ }
+
+ s->fd = fd;
+ s->close = file_close;
+ s->get_error = file_errno;
+ s->write = file_write;
+ s->mig_state.cancel = migrate_fd_cancel;
+ s->mig_state.get_status = migrate_fd_get_status;
+ s->mig_state.release = migrate_fd_release;
+
+ s->state = MIG_STATE_ACTIVE;
+ s->detach = !async;
+ s->bandwidth_limit = bandwidth_limit;
+
+ dprintf("start migration to file: name=%s limit=%ld detach=%d\n", filename, bandwidth_limit, s->detach);
+ if (s->detach == 1) {
+ dprintf("detaching from monitor\n");
+ monitor_suspend();
+ s->detach = 2;
+ }
+
+ migrate_fd_connect(s);
+ return &s->mig_state;
+}
+
+int file_start_incoming_migration(const char *filename)
+{
+ int ret;
+ QEMUFile *f;
+
+ dprintf("Starting incoming file migration from '%s'\n", filename);
+ f = qemu_fopen(filename, "rb");
+ if(f == NULL) {
+ perror("failed to open file");
+ term_printf("failed to open file %s\n", filename);
+ return -errno;
+ }
+
+ vm_stop(0); /* just in case */
+ ret = qemu_loadvm_state(f);
+ if (ret < 0) {
+ fprintf(stderr, "in_file_mig: load of migration failed\n");
+ goto err;
+ }
+ qemu_announce_self();
+ dprintf("successfully loaded vm state\n");
+ vm_start();
+ qemu_fclose(f);
+ return 0;
+
+err:
+ qemu_fclose(f);
+ return -errno;
+}
diff --git a/migration.c b/migration.c
index 0ef777a..31c1c2b 100644
--- a/migration.c
+++ b/migration.c
@@ -43,6 +43,8 @@ void qemu_start_incoming_migration(const char *uri)
#if !defined(WIN32)
else if (strstart(uri, "exec:", &p))
exec_start_incoming_migration(p);
+ else if (strstart(uri, "file:", &p))
+ file_start_incoming_migration(p);
#endif
else
fprintf(stderr, "unknown migration protocol: %s\n", uri);
@@ -58,6 +60,8 @@ void do_migrate(int detach, const char *uri)
#if !defined(WIN32)
else if (strstart(uri, "exec:", &p))
s = exec_start_outgoing_migration(p, max_throttle, detach);
+ else if (strstart(uri, "file:", &p))
+ s = file_start_outgoing_migration(p, max_throttle, detach);
#endif
else
term_printf("unknown migration protocol: %s\n", uri);
diff --git a/migration.h b/migration.h
index d9771ad..c22beb9 100644
--- a/migration.h
+++ b/migration.h
@@ -67,6 +67,11 @@ MigrationState *tcp_start_outgoing_migration(const char *host_port,
int64_t bandwidth_limit,
int detach);
+MigrationState *file_start_outgoing_migration(const char *filename,
+ int64_t bandwidth_limit,
+ int detach);
+int file_start_incoming_migration(const char *filename);
+
void migrate_fd_error(FdMigrationState *s);
void migrate_fd_cleanup(FdMigrationState *s);
--
1.6.0.6
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 11:45 [Qemu-devel] migration: adding migration to/from a file (v2) Uri Lublin
@ 2009-02-19 13:57 ` Anthony Liguori
2009-02-19 16:14 ` Uri Lublin
0 siblings, 1 reply; 14+ messages in thread
From: Anthony Liguori @ 2009-02-19 13:57 UTC (permalink / raw)
To: qemu-devel; +Cc: Uri Lublin
Uri Lublin wrote:
>
> Migration to file, uses migration-to-fd (supports live migration).
> Migration from file, uses qemu-fopen directly.
Eh? Haven't we already talked about why this doesn't work? Maybe
there's a v3 that you meant to send?
Regards,
Anthony Liguori
> pre-save:
> (qemu) stop # unless you really want live migration
> (qemu) migrate_set_speed 3.5G # not needed, speed things up
>
> save:
> (qemu) migrate [-d] file:/path/to/state-file
>
> load:
> [shell] <qemu> <params> -incoming file:/path/to/state-file
>
>
> changes from v1:
> - making sure write would not block (using select)
> - pass max_throttle as is (can be set by user)
> - not checking qemu_mallocz
> - compile for non-windows as select is being used.
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 13:57 ` Anthony Liguori
@ 2009-02-19 16:14 ` Uri Lublin
2009-02-19 16:51 ` Anthony Liguori
2009-02-19 19:33 ` Jamie Lokier
0 siblings, 2 replies; 14+ messages in thread
From: Uri Lublin @ 2009-02-19 16:14 UTC (permalink / raw)
To: qemu-devel
Anthony Liguori wrote:
> Uri Lublin wrote:
>>
>> Migration to file, uses migration-to-fd (supports live migration).
>> Migration from file, uses qemu-fopen directly.
>
> Eh? Haven't we already talked about why this doesn't work? Maybe
> there's a v3 that you meant to send?
>
Actually I do have a v3 which uses posix-aio-compat.c
It's a much more complicated solution then just writing to a file though.
Also I am not sure if I need to use a signal or not as the migration (to-fd)
code is polling. And if I use signal should I use SIGUSR2 or a different one and
use a pipe similar to block-raw-posix.c ?
Your concern in the previous patch was that write() to a regular file might
block. That's why I'm calling select before calling write. Do you think select
will mark the fd as writeable but write would still block ?
Thanks,
Uri.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 16:14 ` Uri Lublin
@ 2009-02-19 16:51 ` Anthony Liguori
2009-02-19 19:06 ` Uri Lublin
2009-02-19 19:37 ` Jamie Lokier
2009-02-19 19:33 ` Jamie Lokier
1 sibling, 2 replies; 14+ messages in thread
From: Anthony Liguori @ 2009-02-19 16:51 UTC (permalink / raw)
To: Uri Lublin; +Cc: qemu-devel
Uri Lublin wrote:
> Anthony Liguori wrote:
>> Uri Lublin wrote:
>>>
>>> Migration to file, uses migration-to-fd (supports live migration).
>>> Migration from file, uses qemu-fopen directly.
>>
>> Eh? Haven't we already talked about why this doesn't work? Maybe
>> there's a v3 that you meant to send?
>>
>
> Actually I do have a v3 which uses posix-aio-compat.c
> It's a much more complicated solution then just writing to a file though.
> Also I am not sure if I need to use a signal or not as the migration
> (to-fd) code is polling. And if I use signal should I use SIGUSR2 or a
> different one and use a pipe similar to block-raw-posix.c ?
How is the migration code polling? It will attempt to do writes until a
write returns EAGAIN. At this point, it will wait for notification that
the more writes are available. Remember, migration is a streaming
protocol, not a random access, so it only makes sense to have one
outstanding request at a time.
Your code would look something like:
write() -> submit aio request
until aio completes, write returns EAGAIN
when aio completes, notify migration code that we are writable again
>
> Your concern in the previous patch was that write() to a regular file
> might block. That's why I'm calling select before calling write. Do
> you think select will mark the fd as writeable but write would still
> block ?
select() doesn't help. It will return that the file descriptor is
writable and then the subsequent write will block.
Regards,
Anthony Liguori
> Thanks,
> Uri.
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 16:51 ` Anthony Liguori
@ 2009-02-19 19:06 ` Uri Lublin
2009-02-19 20:05 ` Anthony Liguori
2009-02-19 19:37 ` Jamie Lokier
1 sibling, 1 reply; 14+ messages in thread
From: Uri Lublin @ 2009-02-19 19:06 UTC (permalink / raw)
To: Anthony Liguori; +Cc: qemu-devel
Anthony Liguori wrote:
> Uri Lublin wrote:
>> Anthony Liguori wrote:
>>> Uri Lublin wrote:
>>>>
>>>> Migration to file, uses migration-to-fd (supports live migration).
>>>> Migration from file, uses qemu-fopen directly.
>>>
>>> Eh? Haven't we already talked about why this doesn't work? Maybe
>>> there's a v3 that you meant to send?
>>>
>>
>> Actually I do have a v3 which uses posix-aio-compat.c
>> It's a much more complicated solution then just writing to a file though.
>> Also I am not sure if I need to use a signal or not as the migration
>> (to-fd) code is polling. And if I use signal should I use SIGUSR2 or a
>> different one and use a pipe similar to block-raw-posix.c ?
>
> How is the migration code polling? It will attempt to do writes until a
> write returns EAGAIN. At this point, it will wait for notification that
> the more writes are available. Remember, migration is a streaming
> protocol, not a random access, so it only makes sense to have one
> outstanding request at a time.
>
> Your code would look something like:
>
> write() -> submit aio request
> until aio completes, write returns EAGAIN
> when aio completes, notify migration code that we are writable again
>
Basically that's what I've done in my v3.
But since I fake EAGAIN, and the fd is writeable by select, the scenario is as
follows:
file_write (s->write of fd-migration) returns EAGAIN
migrate_fd_put_buffer calls qemu_set_fd_handler2 with migrate_fd_put_notify
upon the next main_loop_wait fd is found writable and migrate_fd_put_notify is
called.
qemu_file_put_notify is called.
buffered_put_buffer is called.
buffered_flush is called.
s->freeze_output=0
migrate_fd_put_buffer is called
file_write is called again -- here I can check if previous aio write finished.
>>
>> Your concern in the previous patch was that write() to a regular file
>> might block. That's why I'm calling select before calling write. Do
>> you think select will mark the fd as writeable but write would still
>> block ?
>
> select() doesn't help. It will return that the file descriptor is
> writable and then the subsequent write will block.
Sure looks like a bug.
I'll send my v3 using posix-aio-compat for review soon.
Thanks,
Uri.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 16:14 ` Uri Lublin
2009-02-19 16:51 ` Anthony Liguori
@ 2009-02-19 19:33 ` Jamie Lokier
1 sibling, 0 replies; 14+ messages in thread
From: Jamie Lokier @ 2009-02-19 19:33 UTC (permalink / raw)
To: qemu-devel
Uri Lublin wrote:
> Your concern in the previous patch was that write() to a regular file might
> block. That's why I'm calling select before calling write. Do you think
> select will mark the fd as writeable but write would still block ?
select() on a regular file _always_ marks the fd as writable. That's
one of the rules of select(). For non-blocking file access you can
only use threads or AIO, or a helper process.
-- Jamie
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 16:51 ` Anthony Liguori
2009-02-19 19:06 ` Uri Lublin
@ 2009-02-19 19:37 ` Jamie Lokier
2009-02-19 20:06 ` Anthony Liguori
1 sibling, 1 reply; 14+ messages in thread
From: Jamie Lokier @ 2009-02-19 19:37 UTC (permalink / raw)
To: qemu-devel; +Cc: Uri Lublin
Anthony Liguori wrote:
> Uri Lublin wrote:
> >Anthony Liguori wrote:
> >>Uri Lublin wrote:
> >>>
> >>>Migration to file, uses migration-to-fd (supports live migration).
> >>>Migration from file, uses qemu-fopen directly.
> >>
> >>Eh? Haven't we already talked about why this doesn't work? Maybe
> >>there's a v3 that you meant to send?
> >>
> >
> >Actually I do have a v3 which uses posix-aio-compat.c
> >It's a much more complicated solution then just writing to a file though.
> >Also I am not sure if I need to use a signal or not as the migration
> >(to-fd) code is polling. And if I use signal should I use SIGUSR2 or a
> >different one and use a pipe similar to block-raw-posix.c ?
>
> How is the migration code polling? It will attempt to do writes until a
> write returns EAGAIN. At this point, it will wait for notification that
> the more writes are available.
> Remember, migration is a streaming
> protocol, not a random access, so it only makes sense to have one
> outstanding request at a time.
Fwow, often with streaming (in general) the highest performance comes
from having two or more outstanding requests at a time, so there are
no gaps between requests being processed at the OS and device level.
However, unless migrate-to-file uses O_DIRECT that will be hidden by
the OS's buffering.
Then the remaining overhead from AIOs (or fake AIOs using threads) is
lots context switching, one per write completed.
-- Jamie
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 19:06 ` Uri Lublin
@ 2009-02-19 20:05 ` Anthony Liguori
2009-02-19 20:28 ` Jamie Lokier
2009-02-19 20:45 ` Uri Lublin
0 siblings, 2 replies; 14+ messages in thread
From: Anthony Liguori @ 2009-02-19 20:05 UTC (permalink / raw)
To: Uri Lublin; +Cc: qemu-devel
Uri Lublin wrote:
> Anthony Liguori wrote:
>> Uri Lublin wrote:
>>> Anthony Liguori wrote:
>>>> Uri Lublin wrote:
>>>>>
>>>>> Migration to file, uses migration-to-fd (supports live migration).
>>>>> Migration from file, uses qemu-fopen directly.
>>>>
>>>> Eh? Haven't we already talked about why this doesn't work? Maybe
>>>> there's a v3 that you meant to send?
>>>>
>>>
>>> Actually I do have a v3 which uses posix-aio-compat.c
>>> It's a much more complicated solution then just writing to a file
>>> though.
>>> Also I am not sure if I need to use a signal or not as the migration
>>> (to-fd) code is polling. And if I use signal should I use SIGUSR2 or
>>> a different one and use a pipe similar to block-raw-posix.c ?
>>
>> How is the migration code polling? It will attempt to do writes
>> until a write returns EAGAIN. At this point, it will wait for
>> notification that the more writes are available. Remember, migration
>> is a streaming protocol, not a random access, so it only makes sense
>> to have one outstanding request at a time.
>>
>> Your code would look something like:
>>
>> write() -> submit aio request
>> until aio completes, write returns EAGAIN
>> when aio completes, notify migration code that we are writable again
>>
>
> Basically that's what I've done in my v3.
> But since I fake EAGAIN, and the fd is writeable by select, the
> scenario is as follows:
Oh, yes, this is a problem. You cannot use the existing migrate_fd code
unfortunately :-(
Or maybe you can. If you change all instances of
qemu_set_fd_handler2() in migrate.c (in the migrate_fd_ routines) to
basically, s->set_fd_handler(), you can introduce a simple wrapper that
calls qemu_set_fd_handler2() for everything else, but for yourself, use
it as a mechanism to keep track of what the "writable" callback should
be. This is the callback you would invoke when the aio request completes.
> Sure looks like a bug.
I wish! It's Unix suckiness.
Regards,
Anthony Liguori
> I'll send my v3 using posix-aio-compat for review soon.
>
> Thanks,
> Uri.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 19:37 ` Jamie Lokier
@ 2009-02-19 20:06 ` Anthony Liguori
0 siblings, 0 replies; 14+ messages in thread
From: Anthony Liguori @ 2009-02-19 20:06 UTC (permalink / raw)
To: qemu-devel; +Cc: Uri Lublin
Jamie Lokier wrote:
>
>> Remember, migration is a streaming
>> protocol, not a random access, so it only makes sense to have one
>> outstanding request at a time.
>>
>
> Fwow, often with streaming (in general) the highest performance comes
> from having two or more outstanding requests at a time, so there are
> no gaps between requests being processed at the OS and device level.
>
> However, unless migrate-to-file uses O_DIRECT that will be hidden by
> the OS's buffering.
>
It's already buffered FWIW by the savevm infrastructure. You'll won't
usually see small operations.
Regards,
Anthony Liguori
> Then the remaining overhead from AIOs (or fake AIOs using threads) is
> lots context switching, one per write completed.
>
> -- Jamie
>
>
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 20:05 ` Anthony Liguori
@ 2009-02-19 20:28 ` Jamie Lokier
2009-02-19 23:36 ` M. Warner Losh
2009-02-19 20:45 ` Uri Lublin
1 sibling, 1 reply; 14+ messages in thread
From: Jamie Lokier @ 2009-02-19 20:28 UTC (permalink / raw)
To: qemu-devel; +Cc: Uri Lublin
Anthony Liguori wrote:
> >Sure looks like a bug.
> I wish! It's Unix suckiness.
Windows is the same.
It's a more of a conceptual problem than it looks, not merely an API bug.
It comes down to "what would 'readable' and 'writable' mean on a file?".
For a pipe or socket, readability depends on whether it's connected
and some data has been received from another process. That's
well-defined.
For a file, readability depends on the OS knowing in advance that you
_want_ to read the file so it can issue a request to the underlying
device - but it doesn't know you want to read the file until you call
read() at a specific offset and length, so readability is not
well-defined. Writability is similar but also depends on dynamic
memory availability.
That's why AIO is more fundemantally required for files than streams.
AIO is how you tell the OS "I'm going to want to read or write this bit".
-- Jamie
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 20:05 ` Anthony Liguori
2009-02-19 20:28 ` Jamie Lokier
@ 2009-02-19 20:45 ` Uri Lublin
1 sibling, 0 replies; 14+ messages in thread
From: Uri Lublin @ 2009-02-19 20:45 UTC (permalink / raw)
To: Anthony Liguori; +Cc: qemu-devel
Anthony Liguori wrote:
> Uri Lublin wrote:
>> Anthony Liguori wrote:
>>> Uri Lublin wrote:
>>>> Anthony Liguori wrote:
>>>>> Uri Lublin wrote:
>>>>>>
>>>>>> Migration to file, uses migration-to-fd (supports live migration).
>>>>>> Migration from file, uses qemu-fopen directly.
>>>>>
>>>>> Eh? Haven't we already talked about why this doesn't work? Maybe
>>>>> there's a v3 that you meant to send?
>>>>>
>>>>
>>>> Actually I do have a v3 which uses posix-aio-compat.c
>>>> It's a much more complicated solution then just writing to a file
>>>> though.
>>>> Also I am not sure if I need to use a signal or not as the migration
>>>> (to-fd) code is polling. And if I use signal should I use SIGUSR2 or
>>>> a different one and use a pipe similar to block-raw-posix.c ?
>>>
>>> How is the migration code polling? It will attempt to do writes
>>> until a write returns EAGAIN. At this point, it will wait for
>>> notification that the more writes are available. Remember, migration
>>> is a streaming protocol, not a random access, so it only makes sense
>>> to have one outstanding request at a time.
>>>
>>> Your code would look something like:
>>>
>>> write() -> submit aio request
>>> until aio completes, write returns EAGAIN
>>> when aio completes, notify migration code that we are writable again
>>>
>>
>> Basically that's what I've done in my v3.
>> But since I fake EAGAIN, and the fd is writeable by select, the
>> scenario is as follows:
>
> Oh, yes, this is a problem. You cannot use the existing migrate_fd code
> unfortunately :-(
>
> Or maybe you can. If you change all instances of
> qemu_set_fd_handler2() in migrate.c (in the migrate_fd_ routines) to
> basically, s->set_fd_handler(), you can introduce a simple wrapper that
> calls qemu_set_fd_handler2() for everything else, but for yourself, use
> it as a mechanism to keep track of what the "writable" callback should
> be. This is the callback you would invoke when the aio request completes.
>
So it looks simpler to just (v4 implementation) spawn a single writing thread,
and use a pipe to "connect" it to the migration code (as the pipe supports
non-blocking write).
Would that be acceptable, or you prefer I'll implement s->set_fd_handler() ?
In that case will need another thread and pipe for compatfd (qemu_signalfd), and
allocate a buffer to hold the data until it's written (as the original buffer
may change).
Thanks,
Uri.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 20:28 ` Jamie Lokier
@ 2009-02-19 23:36 ` M. Warner Losh
2009-02-19 23:59 ` Anthony Liguori
0 siblings, 1 reply; 14+ messages in thread
From: M. Warner Losh @ 2009-02-19 23:36 UTC (permalink / raw)
To: qemu-devel, jamie; +Cc: uril
In message: <20090219202849.GE22319@shareable.org>
Jamie Lokier <jamie@shareable.org> writes:
: Anthony Liguori wrote:
: > >Sure looks like a bug.
: > I wish! It's Unix suckiness.
:
: Windows is the same.
: It's a more of a conceptual problem than it looks, not merely an API bug.
:
: It comes down to "what would 'readable' and 'writable' mean on a file?".
"Would a read or write operation block?" is a better way to look at
the interface that select() or poll() provides. For a regular file,
the answer is "no" since the writes are so fast and often
asynchronous...
Warner
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 23:36 ` M. Warner Losh
@ 2009-02-19 23:59 ` Anthony Liguori
2009-02-20 0:36 ` M. Warner Losh
0 siblings, 1 reply; 14+ messages in thread
From: Anthony Liguori @ 2009-02-19 23:59 UTC (permalink / raw)
To: qemu-devel; +Cc: uril
M. Warner Losh wrote:
> In message: <20090219202849.GE22319@shareable.org>
> Jamie Lokier <jamie@shareable.org> writes:
> : Anthony Liguori wrote:
> : > >Sure looks like a bug.
> : > I wish! It's Unix suckiness.
> :
> : Windows is the same.
> : It's a more of a conceptual problem than it looks, not merely an API bug.
> :
> : It comes down to "what would 'readable' and 'writable' mean on a file?".
>
> "Would a read or write operation block?" is a better way to look at
> the interface that select() or poll() provides. For a regular file,
> the answer is "no" since the writes are so fast and often
> asynchronous...
>
Except when they aren't..
The real issue is that read/write offers streaming semantics, not random
access. You cannot guarantee that a read is going to complete unless
you do read ahead. So the semantics would be something like pread(fd,
buf, X) = EAGAIN (kernel starts the operation for X), later, pread(fd,
buf, X) = OK. Sort of a weird interface.
For write, it's even more bizarre because you can't "write-ahead". If
you're dealing with O_SYNC or O_DIRECT, there's simply no semantic that
makes sense.
So fundamentally, read/write is a bad interface for random IO.
Regards,
Anthony Liguori
> Warner
>
>
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] migration: adding migration to/from a file (v2)
2009-02-19 23:59 ` Anthony Liguori
@ 2009-02-20 0:36 ` M. Warner Losh
0 siblings, 0 replies; 14+ messages in thread
From: M. Warner Losh @ 2009-02-20 0:36 UTC (permalink / raw)
To: qemu-devel, anthony; +Cc: uril
In message: <499DF276.4080305@codemonkey.ws>
Anthony Liguori <anthony@codemonkey.ws> writes:
: M. Warner Losh wrote:
: > In message: <20090219202849.GE22319@shareable.org>
: > Jamie Lokier <jamie@shareable.org> writes:
: > : Anthony Liguori wrote:
: > : > >Sure looks like a bug.
: > : > I wish! It's Unix suckiness.
: > :
: > : Windows is the same.
: > : It's a more of a conceptual problem than it looks, not merely an API bug.
: > :
: > : It comes down to "what would 'readable' and 'writable' mean on a file?".
: >
: > "Would a read or write operation block?" is a better way to look at
: > the interface that select() or poll() provides. For a regular file,
: > the answer is "no" since the writes are so fast and often
: > asynchronous...
: >
:
: Except when they aren't..
Right. That bit was added later, I think... My note was more of an
explanation of how we got here, not that it was perfect and a good
thing..
: The real issue is that read/write offers streaming semantics, not random
: access. You cannot guarantee that a read is going to complete unless
: you do read ahead. So the semantics would be something like pread(fd,
: buf, X) = EAGAIN (kernel starts the operation for X), later, pread(fd,
: buf, X) = OK. Sort of a weird interface.
:
: For write, it's even more bizarre because you can't "write-ahead". If
: you're dealing with O_SYNC or O_DIRECT, there's simply no semantic that
: makes sense.
:
: So fundamentally, read/write is a bad interface for random IO.
Also agreed.
: Regards,
:
: Anthony Liguori
:
: > Warner
: >
: >
: >
:
:
:
:
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2009-02-20 0:40 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-19 11:45 [Qemu-devel] migration: adding migration to/from a file (v2) Uri Lublin
2009-02-19 13:57 ` Anthony Liguori
2009-02-19 16:14 ` Uri Lublin
2009-02-19 16:51 ` Anthony Liguori
2009-02-19 19:06 ` Uri Lublin
2009-02-19 20:05 ` Anthony Liguori
2009-02-19 20:28 ` Jamie Lokier
2009-02-19 23:36 ` M. Warner Losh
2009-02-19 23:59 ` Anthony Liguori
2009-02-20 0:36 ` M. Warner Losh
2009-02-19 20:45 ` Uri Lublin
2009-02-19 19:37 ` Jamie Lokier
2009-02-19 20:06 ` Anthony Liguori
2009-02-19 19:33 ` Jamie Lokier
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).