From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KdQpi-0000MD-HM for qemu-devel@nongnu.org; Wed, 10 Sep 2008 10:38:14 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KdQpb-0000ES-64 for qemu-devel@nongnu.org; Wed, 10 Sep 2008 10:38:12 -0400 Received: from [199.232.76.173] (port=44576 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KdQpa-0000Di-Hd for qemu-devel@nongnu.org; Wed, 10 Sep 2008 10:38:06 -0400 Received: from wa-out-1112.google.com ([209.85.146.179]:17550) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1KdQpZ-0002KZ-UW for qemu-devel@nongnu.org; Wed, 10 Sep 2008 10:38:06 -0400 Received: by wa-out-1112.google.com with SMTP id j5so1925698wah.18 for ; Wed, 10 Sep 2008 07:38:03 -0700 (PDT) Message-ID: <5d6222a80809100738i3e3ed54fs666ca6c3225a7c63@mail.gmail.com> Date: Wed, 10 Sep 2008 11:38:02 -0300 From: "Glauber Costa" In-Reply-To: <1220989802-13706-2-git-send-email-aliguori@us.ibm.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Content-Disposition: inline References: <1220989802-13706-1-git-send-email-aliguori@us.ibm.com> <1220989802-13706-2-git-send-email-aliguori@us.ibm.com> Subject: [Qemu-devel] Re: [PATCH 1/10] Refactor QEMUFile for live migration Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Anthony Liguori Cc: Chris Wright , Uri Lublin , qemu-devel@nongnu.org, kvm@vger.kernel.org On Tue, Sep 9, 2008 at 4:49 PM, Anthony Liguori wrote: > To support live migration, we override QEMUFile so that instead of writing to > disk, the save/restore state happens over a network connection. > > This patch makes QEMUFile read/write operations function pointers so that we > can override them for live migration. > > Signed-off-by: Anthony Liguori > > diff --git a/hw/hw.h b/hw/hw.h > index b84ace0..771dbd4 100644 > --- a/hw/hw.h > +++ b/hw/hw.h > @@ -7,9 +7,36 @@ > > /* VM Load/Save */ > > +/* This function writes a chunk of data to a file at the given position. > + * The pos argument can be ignored if the file is only being used for > + * streaming. The handler should try to write all of the data it can. > + */ > +typedef void (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf, > + int64_t pos, int size); > + > +/* Read a chunk of data from a file at the given position. The pos argument > + * can be ignored if the file is only be used for streaming. The number of > + * bytes actually read should be returned. > + */ > +typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf, > + int64_t pos, int size); > + > +/* Close a file and return an error code */ > +typedef int (QEMUFileCloseFunc)(void *opaque); > + > +/* Called to determine if the file has exceeded it's bandwidth allocation. The > + * bandwidth capping is a soft limit, not a hard limit. > + */ > +typedef int (QEMUFileRateLimit)(void *opaque); > + > +QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, > + QEMUFileGetBufferFunc *get_buffer, > + QEMUFileCloseFunc *close, > + QEMUFileRateLimit *rate_limit); > QEMUFile *qemu_fopen(const char *filename, const char *mode); > +QEMUFile *qemu_fopen_fd(int fd); > void qemu_fflush(QEMUFile *f); > -void qemu_fclose(QEMUFile *f); > +int qemu_fclose(QEMUFile *f); > void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size); > void qemu_put_byte(QEMUFile *f, int v); > void qemu_put_be16(QEMUFile *f, unsigned int v); > @@ -20,6 +47,12 @@ int qemu_get_byte(QEMUFile *f); > unsigned int qemu_get_be16(QEMUFile *f); > unsigned int qemu_get_be32(QEMUFile *f); > uint64_t qemu_get_be64(QEMUFile *f); > +int qemu_file_rate_limit(QEMUFile *f); > + > +/* Try to send any outstanding data. This function is useful when output is > + * halted due to rate limiting or EAGAIN errors occur as it can be used to > + * resume output. */ > +void qemu_file_put_notify(QEMUFile *f); > > static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) > { > diff --git a/vl.c b/vl.c > index 0a457a9..659fd95 100644 > --- a/vl.c > +++ b/vl.c > @@ -6152,11 +6152,12 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque) > #define IO_BUF_SIZE 32768 > > struct QEMUFile { > - FILE *outfile; > - BlockDriverState *bs; > - int is_file; > - int is_writable; > - int64_t base_offset; > + QEMUFilePutBufferFunc *put_buffer; > + QEMUFileGetBufferFunc *get_buffer; > + QEMUFileCloseFunc *close; > + QEMUFileRateLimit *rate_limit; > + void *opaque; > + > int64_t buf_offset; /* start of buffer when writing, end of buffer > when reading */ > int buf_index; > @@ -6164,58 +6165,194 @@ struct QEMUFile { > uint8_t buf[IO_BUF_SIZE]; > }; > > +typedef struct QEMUFileFD > +{ > + int fd; > + QEMUFile *file; > +} QEMUFileFD; > + > +static void fd_put_notify(void *opaque) > +{ > + QEMUFileFD *s = opaque; > + > + /* Remove writable callback and do a put notify */ > + qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > + qemu_file_put_notify(s->file); > +} > + > +static int fd_put_buffer(void *opaque, const uint8_t *buf, > + int64_t pos, int size) > +{ > + QEMUFileFD *s = opaque; > + ssize_t len; > + > + do { > + len = write(s->fd, buf, size); > + } while (len == -1 && errno == EINTR); What about the len == size case ? > +static int fd_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) > +{ > + QEMUFileFD *s = opaque; > + ssize_t len; > + > + do { > + len = read(s->fd, buf, size); > + } while (len == -1 && errno == EINTR); ditto. > + > + if (len == -1) > + len = -errno; > + > + return len; > +} nitpicking, but... maybe "ret" is a better name here? > + > +static int fd_close(void *opaque) > +{ > + QEMUFileFD *s = opaque; > + qemu_free(s); > + return 0; > +} Why don't we need to do any specific callback for closing the file descriptor? In the case of a socket, I imagine we may want to shut the socket down, for ex. > + > +QEMUFile *qemu_fopen_fd(int fd) > +{ > + QEMUFileFD *s = qemu_mallocz(sizeof(QEMUFileFD)); can't it fail? > + s->fd = fd; > + s->file = qemu_fopen_ops(s, fd_put_buffer, fd_get_buffer, fd_close, NULL); > + return s->file; > +} > +typedef struct QEMUFileUnix > +{ > + FILE *outfile; > +} QEMUFileUnix; > + > +static void file_put_buffer(void *opaque, const uint8_t *buf, > + int64_t pos, int size) > +{ > + QEMUFileUnix *s = opaque; > + fseek(s->outfile, pos, SEEK_SET); > + fwrite(buf, 1, size, s->outfile); > +} > + > +static int file_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) > +{ > + QEMUFileUnix *s = opaque; > + fseek(s->outfile, pos, SEEK_SET); > + return fread(buf, 1, size, s->outfile); > +} > + > +static int file_close(void *opaque) > +{ > + QEMUFileUnix *s = opaque; > + fclose(s->outfile); > + qemu_free(s); > + return 0; > +} > + > QEMUFile *qemu_fopen(const char *filename, const char *mode) > { > - QEMUFile *f; > + QEMUFileUnix *s; > > - f = qemu_mallocz(sizeof(QEMUFile)); > - if (!f) > + s = qemu_mallocz(sizeof(QEMUFileUnix)); > + if (!s) > return NULL; > - if (!strcmp(mode, "wb")) { > - f->is_writable = 1; > - } else if (!strcmp(mode, "rb")) { > - f->is_writable = 0; > - } else { > - goto fail; > - } > - f->outfile = fopen(filename, mode); > - if (!f->outfile) > + > + s->outfile = fopen(filename, mode); > + if (!s->outfile) > goto fail; > - f->is_file = 1; > - return f; > - fail: > - if (f->outfile) > - fclose(f->outfile); > - qemu_free(f); > + > + if (!strcmp(mode, "wb")) > + return qemu_fopen_ops(s, file_put_buffer, NULL, file_close, NULL); > + else if (!strcmp(mode, "rb")) > + return qemu_fopen_ops(s, NULL, file_get_buffer, file_close, NULL); > + > +fail: > + if (s->outfile) > + fclose(s->outfile); > + qemu_free(s); > return NULL; > } > > -static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int64_t offset, int is_writable) > +typedef struct QEMUFileBdrv > +{ > + BlockDriverState *bs; > + int64_t base_offset; > +} QEMUFileBdrv; Isn't it possible to abstract the differences between bdrv and file so to have a common implementation between them? Do you think it's worthwhile ? > + > +QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int64_t offset, int is_writable) > +{ > + QEMUFileBdrv *s; > + > + s = qemu_mallocz(sizeof(QEMUFileBdrv)); > + if (!s) > + return NULL; > + > + s->bs = bs; > + s->base_offset = offset; > + > + if (is_writable) > + return qemu_fopen_ops(s, bdrv_put_buffer, NULL, bdrv_fclose, NULL); > + > + return qemu_fopen_ops(s, NULL, bdrv_get_buffer, bdrv_fclose, NULL); > +} > + > +QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, > + QEMUFileGetBufferFunc *get_buffer, > + QEMUFileCloseFunc *close, > + QEMUFileRateLimit *rate_limit) > { > QEMUFile *f; > > f = qemu_mallocz(sizeof(QEMUFile)); > if (!f) > - return NULL; > - f->is_file = 0; > - f->bs = bs; > - f->is_writable = is_writable; > - f->base_offset = offset; > + return NULL; > + > + f->opaque = opaque; > + f->put_buffer = put_buffer; > + f->get_buffer = get_buffer; > + f->close = close; > + f->rate_limit = rate_limit; > + > return f; > } > > void qemu_fflush(QEMUFile *f) > { > - if (!f->is_writable) > + if (!f->put_buffer) > return; > + > if (f->buf_index > 0) { > - if (f->is_file) { > - fseek(f->outfile, f->buf_offset, SEEK_SET); > - fwrite(f->buf, 1, f->buf_index, f->outfile); > - } else { > - bdrv_pwrite(f->bs, f->base_offset + f->buf_offset, > - f->buf, f->buf_index); > - } > + f->put_buffer(f->opaque, f->buf, f->buf_offset, f->buf_index); > f->buf_offset += f->buf_index; > f->buf_index = 0; > } > @@ -6225,32 +6362,31 @@ static void qemu_fill_buffer(QEMUFile *f) > { > int len; > > - if (f->is_writable) > + if (!f->get_buffer) > return; > - if (f->is_file) { > - fseek(f->outfile, f->buf_offset, SEEK_SET); > - len = fread(f->buf, 1, IO_BUF_SIZE, f->outfile); > - if (len < 0) > - len = 0; > - } else { > - len = bdrv_pread(f->bs, f->base_offset + f->buf_offset, > - f->buf, IO_BUF_SIZE); > - if (len < 0) > - len = 0; > - } > + > + len = f->get_buffer(f->opaque, f->buf, f->buf_offset, IO_BUF_SIZE); > + if (len < 0) > + len = 0; > + > f->buf_index = 0; > f->buf_size = len; > f->buf_offset += len; > } > > -void qemu_fclose(QEMUFile *f) > +int qemu_fclose(QEMUFile *f) > { > - if (f->is_writable) > - qemu_fflush(f); > - if (f->is_file) { > - fclose(f->outfile); > - } > + int ret = 0; > + qemu_fflush(f); > + if (f->close) > + ret = f->close(f->opaque); > qemu_free(f); > + return ret; > +} > + > +void qemu_file_put_notify(QEMUFile *f) > +{ > + f->put_buffer(f->opaque, NULL, 0, 0); > } > > void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) > @@ -6324,7 +6460,7 @@ int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence) > /* SEEK_END not supported */ > return -1; > } > - if (f->is_writable) { > + if (f->put_buffer) { > qemu_fflush(f); > f->buf_offset = pos; > } else { > @@ -6335,6 +6471,14 @@ int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence) > return pos; > } > > +int qemu_file_rate_limit(QEMUFile *f) > +{ > + if (f->rate_limit) > + return f->rate_limit(f->opaque); > + > + return 0; > +} > + > void qemu_put_be16(QEMUFile *f, unsigned int v) > { > qemu_put_byte(f, v >> 8); > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- Glauber Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act."