From: Paolo Bonzini <pbonzini@redhat.com>
To: mrhines@linux.vnet.ibm.com
Cc: aliguori@us.ibm.com, mst@redhat.com, qemu-devel@nongnu.org,
owasserm@redhat.com, abali@us.ibm.com, mrhines@us.ibm.com,
gokul@us.ibm.com
Subject: Re: [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA
Date: Mon, 18 Mar 2013 10:09:19 +0100 [thread overview]
Message-ID: <5146D9BF.3030407@redhat.com> (raw)
In-Reply-To: <1363576743-6146-9-git-send-email-mrhines@linux.vnet.ibm.com>
Il 18/03/2013 04:19, mrhines@linux.vnet.ibm.com ha scritto:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
>
> This compiles with and without --enable-rdma.
>
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
> include/migration/qemu-file.h | 10 +++
> savevm.c | 172 ++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 172 insertions(+), 10 deletions(-)
>
> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> index df81261..9046751 100644
> --- a/include/migration/qemu-file.h
> +++ b/include/migration/qemu-file.h
> @@ -51,23 +51,33 @@ typedef int (QEMUFileCloseFunc)(void *opaque);
> */
> typedef int (QEMUFileGetFD)(void *opaque);
>
> +/*
> + * 'drain' from a QEMUFile perspective means
> + * to flush the outbound send buffer
> + * (if one exists). (Only used by RDMA right now)
> + */
> +typedef int (QEMUFileDrainFunc)(void *opaque);
> +
> typedef struct QEMUFileOps {
> QEMUFilePutBufferFunc *put_buffer;
> QEMUFileGetBufferFunc *get_buffer;
> QEMUFileCloseFunc *close;
> QEMUFileGetFD *get_fd;
> + QEMUFileDrainFunc *drain;
> } QEMUFileOps;
>
> QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
> QEMUFile *qemu_fopen(const char *filename, const char *mode);
> QEMUFile *qemu_fdopen(int fd, const char *mode);
> QEMUFile *qemu_fopen_socket(int fd, const char *mode);
> +QEMUFile *qemu_fopen_rdma(void *opaque, const char *mode);
> QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
> int qemu_get_fd(QEMUFile *f);
> int qemu_fclose(QEMUFile *f);
> int64_t qemu_ftell(QEMUFile *f);
> void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
> void qemu_put_byte(QEMUFile *f, int v);
> +int qemu_drain(QEMUFile *f);
>
> static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
> {
> diff --git a/savevm.c b/savevm.c
> index 35c8d1e..9b90b7f 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -32,6 +32,7 @@
> #include "qemu/timer.h"
> #include "audio/audio.h"
> #include "migration/migration.h"
> +#include "migration/rdma.h"
> #include "qemu/sockets.h"
> #include "qemu/queue.h"
> #include "sysemu/cpus.h"
> @@ -143,6 +144,13 @@ typedef struct QEMUFileSocket
> QEMUFile *file;
> } QEMUFileSocket;
>
> +typedef struct QEMUFileRDMA
> +{
> + void *rdma;
This is an RDMAData *. Please avoid using void * as much as possible.
> + size_t len;
> + QEMUFile *file;
> +} QEMUFileRDMA;
> +
> typedef struct {
> Coroutine *co;
> int fd;
> @@ -178,6 +186,66 @@ static int socket_get_fd(void *opaque)
> return s->fd;
> }
>
> +/*
> + * SEND messages for none-live state only.
> + * pc.ram is handled elsewhere...
> + */
> +static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
> +{
> + QEMUFileRDMA *r = opaque;
> + size_t remaining = size;
> + uint8_t * data = (void *) buf;
> +
> + /*
> + * Although we're sending non-live
> + * state here, push out any writes that
> + * we're queued up for pc.ram anyway.
> + */
> + if (qemu_rdma_write_flush(r->rdma) < 0)
> + return -EIO;
> +
> + while(remaining) {
> + r->len = MIN(remaining, RDMA_SEND_INCREMENT);
> + remaining -= r->len;
> +
> + if(qemu_rdma_exchange_send(r->rdma, data, r->len) < 0)
> + return -EINVAL;
> +
> + data += r->len;
> + }
> +
> + return size;
> +}
> +
> +/*
> + * RDMA links don't use bytestreams, so we have to
> + * return bytes to QEMUFile opportunistically.
> + */
> +static int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
> +{
> + QEMUFileRDMA *r = opaque;
> +
> + /*
> + * First, we hold on to the last SEND message we
> + * were given and dish out the bytes until we run
> + * out of bytes.
> + */
> + if((r->len = qemu_rdma_fill(r->rdma, buf, size)))
> + return r->len;
> +
> + /*
> + * Once we run out, we block and wait for another
> + * SEND message to arrive.
> + */
> + if(qemu_rdma_exchange_recv(r->rdma) < 0)
> + return -EINVAL;
> +
> + /*
> + * SEND was received with new bytes, now try again.
> + */
> + return qemu_rdma_fill(r->rdma, buf, size);
> +}
Please move these functions closer to qemu_fopen_rdma (or better, to an
RDMA-specific file altogether). Also, using qemu_rdma_fill introduces a
dependency of savevm.c on migration-rdma.c. There should be no such
dependency; migration-rdma.c should be used only by migration.c.
> static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
> {
> QEMUFileSocket *s = opaque;
> @@ -390,16 +458,24 @@ static const QEMUFileOps socket_write_ops = {
> .close = socket_close
> };
>
> -QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> +static bool qemu_mode_is_not_valid(const char * mode)
> {
> - QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket));
> -
> if (mode == NULL ||
> (mode[0] != 'r' && mode[0] != 'w') ||
> mode[1] != 'b' || mode[2] != 0) {
> fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
> - return NULL;
> + return true;
> }
> +
> + return false;
> +}
> +
> +QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> +{
> + QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket));
> +
> + if(qemu_mode_is_not_valid(mode))
> + return NULL;
>
> s->fd = fd;
> if (mode[0] == 'w') {
> @@ -411,16 +487,66 @@ QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> return s->file;
> }
>
> +static int qemu_rdma_close(void *opaque)
> +{
> + QEMUFileRDMA *r = opaque;
> + if(r->rdma) {
> + qemu_rdma_cleanup(r->rdma);
> + g_free(r->rdma);
> + }
> + g_free(r);
> + return 0;
> +}
> +
> +void * migrate_use_rdma(QEMUFile *f)
> +{
> + QEMUFileRDMA *r = f->opaque;
> +
> + return qemu_rdma_enabled(r->rdma) ? r->rdma : NULL;
You cannot be sure that f->opaque->rdma is a valid pointer. For
example, the first field in a socket QEMUFile's is a file descriptor.
Instead, you could use a qemu_file_ops_are(const QEMUFile *, const
QEMUFileOps *) function that checks if the file uses the given ops.
Then, migrate_use_rdma can simply check if the QEMUFile is using the
RDMA ops structure.
With this change, the "enabled" field of RDMAData should go.
> +}
> +
> +static int qemu_rdma_drain_completion(void *opaque)
> +{
> + QEMUFileRDMA *r = opaque;
> + r->len = 0;
> + return qemu_rdma_drain_cq(r->rdma);
> +}
> +
> +static const QEMUFileOps rdma_read_ops = {
> + .get_buffer = qemu_rdma_get_buffer,
> + .close = qemu_rdma_close,
> +};
> +
> +static const QEMUFileOps rdma_write_ops = {
> + .put_buffer = qemu_rdma_put_buffer,
> + .close = qemu_rdma_close,
> + .drain = qemu_rdma_drain_completion,
> +};
> +
> +QEMUFile *qemu_fopen_rdma(void *opaque, const char * mode)
> +{
> + QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA));
> +
> + if(qemu_mode_is_not_valid(mode))
> + return NULL;
> +
> + r->rdma = opaque;
> +
> + if (mode[0] == 'w') {
> + r->file = qemu_fopen_ops(r, &rdma_write_ops);
> + } else {
> + r->file = qemu_fopen_ops(r, &rdma_read_ops);
> + }
> +
> + return r->file;
> +}
> +
> QEMUFile *qemu_fopen(const char *filename, const char *mode)
> {
> QEMUFileStdio *s;
>
> - if (mode == NULL ||
> - (mode[0] != 'r' && mode[0] != 'w') ||
> - mode[1] != 'b' || mode[2] != 0) {
> - fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
> - return NULL;
> - }
> + if(qemu_mode_is_not_valid(mode))
> + return NULL;
>
> s = g_malloc0(sizeof(QEMUFileStdio));
>
> @@ -497,6 +623,24 @@ static void qemu_file_set_error(QEMUFile *f, int ret)
> }
> }
>
> +/*
> + * Called only for RDMA right now at the end
> + * of each live iteration of memory.
> + *
> + * 'drain' from a QEMUFile perspective means
> + * to flush the outbound send buffer
> + * (if one exists).
> + *
> + * For RDMA, this means to make sure we've
> + * received completion queue (CQ) messages
> + * successfully for all of the RDMA writes
> + * that we requested.
> + */
> +int qemu_drain(QEMUFile *f)
> +{
> + return f->ops->drain ? f->ops->drain(f->opaque) : 0;
> +}
Hmm, this is very similar to qemu_fflush, but not quite. :/
Why exactly is this needed?
> /** Flushes QEMUFile buffer
> *
> */
> @@ -723,6 +867,8 @@ int qemu_get_byte(QEMUFile *f)
> int64_t qemu_ftell(QEMUFile *f)
> {
> qemu_fflush(f);
> + if(migrate_use_rdma(f))
> + return delta_norm_mig_bytes_transferred();
Not needed, and another undesirable dependency (savevm.c ->
arch_init.c). Just update f->pos in save_rdma_page.
This is taking shape. Thanks for persevering!
Paolo
> return f->pos;
> }
>
> @@ -1737,6 +1883,12 @@ void qemu_savevm_state_complete(QEMUFile *f)
> }
> }
>
> + if ((ret = qemu_drain(f)) < 0) {
> + fprintf(stderr, "failed to drain RDMA first!\n");
> + qemu_file_set_error(f, ret);
> + return;
> + }
> +
> QTAILQ_FOREACH(se, &savevm_handlers, entry) {
> int len;
>
>
next prev parent reply other threads:[~2013-03-18 9:09 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-18 3:18 [Qemu-devel] [RFC PATCH RDMA support v4: 00/10] cleaner ramblocks and documentation mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 01/10] ./configure --enable-rdma mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 02/10] check for CONFIG_RDMA mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 03/10] more verbose documentation of the RDMA transport mrhines
2013-03-18 10:40 ` Michael S. Tsirkin
2013-03-18 20:24 ` Michael R. Hines
2013-03-18 21:26 ` Michael S. Tsirkin
2013-03-18 23:23 ` Michael R. Hines
2013-03-19 8:19 ` Michael S. Tsirkin
2013-03-19 13:21 ` Michael R. Hines
2013-03-19 15:08 ` Michael R. Hines
2013-03-19 15:16 ` Michael S. Tsirkin
2013-03-19 15:32 ` Michael R. Hines
2013-03-19 15:36 ` Michael S. Tsirkin
2013-03-19 17:09 ` Michael R. Hines
2013-03-19 17:14 ` Paolo Bonzini
2013-03-19 17:23 ` Michael S. Tsirkin
2013-03-19 17:40 ` Michael R. Hines
2013-03-19 17:52 ` Paolo Bonzini
2013-03-19 18:04 ` Michael R. Hines
2013-03-20 13:07 ` Michael S. Tsirkin
2013-03-20 15:15 ` Michael R. Hines
2013-03-20 15:22 ` Michael R. Hines
2013-03-20 15:55 ` Michael S. Tsirkin
2013-03-20 16:08 ` Michael R. Hines
2013-03-20 19:06 ` Michael S. Tsirkin
2013-03-20 20:20 ` Michael R. Hines
2013-03-20 20:31 ` Michael S. Tsirkin
2013-03-20 20:39 ` Michael R. Hines
2013-03-20 20:46 ` Michael S. Tsirkin
2013-03-20 20:56 ` Michael R. Hines
2013-03-21 5:20 ` Michael S. Tsirkin
2013-03-20 20:24 ` Michael R. Hines
2013-03-20 20:37 ` Michael S. Tsirkin
2013-03-20 20:45 ` Michael R. Hines
2013-03-20 20:52 ` Michael S. Tsirkin
2013-03-19 17:49 ` Michael R. Hines
2013-03-21 6:11 ` Michael S. Tsirkin
2013-03-21 15:22 ` Michael R. Hines
2013-04-05 20:45 ` Michael R. Hines
2013-04-05 20:46 ` Michael R. Hines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 04/10] iterators for getting the RAMBlocks mrhines
2013-03-18 8:48 ` Paolo Bonzini
2013-03-18 20:25 ` Michael R. Hines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 05/10] reuse function for parsing the QMP 'migrate' string mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 06/10] core RDMA migration code (rdma.c) mrhines
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 07/10] connection-establishment for RDMA mrhines
2013-03-18 8:56 ` Paolo Bonzini
2013-03-18 20:26 ` Michael R. Hines
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA mrhines
2013-03-18 9:09 ` Paolo Bonzini [this message]
2013-03-18 20:33 ` Michael R. Hines
2013-03-19 9:18 ` Paolo Bonzini
2013-03-19 13:12 ` Michael R. Hines
2013-03-19 13:25 ` Paolo Bonzini
2013-03-19 13:40 ` Michael R. Hines
2013-03-19 13:45 ` Paolo Bonzini
2013-03-19 14:10 ` Michael R. Hines
2013-03-19 14:22 ` Paolo Bonzini
2013-03-19 15:02 ` [Qemu-devel] [Bug]? (RDMA-related) ballooned memory not consulted during migration? Michael R. Hines
2013-03-19 15:12 ` Michael R. Hines
2013-03-19 15:17 ` Michael S. Tsirkin
2013-03-19 18:27 ` [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA Michael R. Hines
2013-03-19 18:40 ` Paolo Bonzini
2013-03-20 15:20 ` Paolo Bonzini
2013-03-20 16:09 ` Michael R. Hines
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 09/10] check for QMP string and bypass nonblock() calls mrhines
2013-03-18 8:47 ` Paolo Bonzini
2013-03-18 20:37 ` Michael R. Hines
2013-03-19 9:23 ` Paolo Bonzini
2013-03-19 13:08 ` Michael R. Hines
2013-03-19 13:20 ` Paolo Bonzini
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 10/10] send pc.ram over RDMA mrhines
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5146D9BF.3030407@redhat.com \
--to=pbonzini@redhat.com \
--cc=abali@us.ibm.com \
--cc=aliguori@us.ibm.com \
--cc=gokul@us.ibm.com \
--cc=mrhines@linux.vnet.ibm.com \
--cc=mrhines@us.ibm.com \
--cc=mst@redhat.com \
--cc=owasserm@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).