From: Paolo Bonzini <pbonzini@redhat.com>
To: mrhines@linux.vnet.ibm.com
Cc: aliguori@us.ibm.com, mst@redhat.com, qemu-devel@nongnu.org,
owasserm@redhat.com, abali@us.ibm.com, mrhines@us.ibm.com,
gokul@us.ibm.com
Subject: Re: [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA
Date: Mon, 18 Mar 2013 10:09:19 +0100 [thread overview]
Message-ID: <5146D9BF.3030407@redhat.com> (raw)
In-Reply-To: <1363576743-6146-9-git-send-email-mrhines@linux.vnet.ibm.com>
Il 18/03/2013 04:19, mrhines@linux.vnet.ibm.com ha scritto:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
>
> This compiles with and without --enable-rdma.
>
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
> include/migration/qemu-file.h | 10 +++
> savevm.c | 172 ++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 172 insertions(+), 10 deletions(-)
>
> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> index df81261..9046751 100644
> --- a/include/migration/qemu-file.h
> +++ b/include/migration/qemu-file.h
> @@ -51,23 +51,33 @@ typedef int (QEMUFileCloseFunc)(void *opaque);
> */
> typedef int (QEMUFileGetFD)(void *opaque);
>
> +/*
> + * 'drain' from a QEMUFile perspective means
> + * to flush the outbound send buffer
> + * (if one exists). (Only used by RDMA right now)
> + */
> +typedef int (QEMUFileDrainFunc)(void *opaque);
> +
> typedef struct QEMUFileOps {
> QEMUFilePutBufferFunc *put_buffer;
> QEMUFileGetBufferFunc *get_buffer;
> QEMUFileCloseFunc *close;
> QEMUFileGetFD *get_fd;
> + QEMUFileDrainFunc *drain;
> } QEMUFileOps;
>
> QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
> QEMUFile *qemu_fopen(const char *filename, const char *mode);
> QEMUFile *qemu_fdopen(int fd, const char *mode);
> QEMUFile *qemu_fopen_socket(int fd, const char *mode);
> +QEMUFile *qemu_fopen_rdma(void *opaque, const char *mode);
> QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
> int qemu_get_fd(QEMUFile *f);
> int qemu_fclose(QEMUFile *f);
> int64_t qemu_ftell(QEMUFile *f);
> void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
> void qemu_put_byte(QEMUFile *f, int v);
> +int qemu_drain(QEMUFile *f);
>
> static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
> {
> diff --git a/savevm.c b/savevm.c
> index 35c8d1e..9b90b7f 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -32,6 +32,7 @@
> #include "qemu/timer.h"
> #include "audio/audio.h"
> #include "migration/migration.h"
> +#include "migration/rdma.h"
> #include "qemu/sockets.h"
> #include "qemu/queue.h"
> #include "sysemu/cpus.h"
> @@ -143,6 +144,13 @@ typedef struct QEMUFileSocket
> QEMUFile *file;
> } QEMUFileSocket;
>
> +typedef struct QEMUFileRDMA
> +{
> + void *rdma;
This is an RDMAData *. Please avoid using void * as much as possible.
> + size_t len;
> + QEMUFile *file;
> +} QEMUFileRDMA;
> +
> typedef struct {
> Coroutine *co;
> int fd;
> @@ -178,6 +186,66 @@ static int socket_get_fd(void *opaque)
> return s->fd;
> }
>
> +/*
> + * SEND messages for none-live state only.
> + * pc.ram is handled elsewhere...
> + */
> +static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
> +{
> + QEMUFileRDMA *r = opaque;
> + size_t remaining = size;
> + uint8_t * data = (void *) buf;
> +
> + /*
> + * Although we're sending non-live
> + * state here, push out any writes that
> + * we're queued up for pc.ram anyway.
> + */
> + if (qemu_rdma_write_flush(r->rdma) < 0)
> + return -EIO;
> +
> + while(remaining) {
> + r->len = MIN(remaining, RDMA_SEND_INCREMENT);
> + remaining -= r->len;
> +
> + if(qemu_rdma_exchange_send(r->rdma, data, r->len) < 0)
> + return -EINVAL;
> +
> + data += r->len;
> + }
> +
> + return size;
> +}
> +
> +/*
> + * RDMA links don't use bytestreams, so we have to
> + * return bytes to QEMUFile opportunistically.
> + */
> +static int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
> +{
> + QEMUFileRDMA *r = opaque;
> +
> + /*
> + * First, we hold on to the last SEND message we
> + * were given and dish out the bytes until we run
> + * out of bytes.
> + */
> + if((r->len = qemu_rdma_fill(r->rdma, buf, size)))
> + return r->len;
> +
> + /*
> + * Once we run out, we block and wait for another
> + * SEND message to arrive.
> + */
> + if(qemu_rdma_exchange_recv(r->rdma) < 0)
> + return -EINVAL;
> +
> + /*
> + * SEND was received with new bytes, now try again.
> + */
> + return qemu_rdma_fill(r->rdma, buf, size);
> +}
Please move these functions closer to qemu_fopen_rdma (or better, to an
RDMA-specific file altogether). Also, using qemu_rdma_fill introduces a
dependency of savevm.c on migration-rdma.c. There should be no such
dependency; migration-rdma.c should be used only by migration.c.
> static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
> {
> QEMUFileSocket *s = opaque;
> @@ -390,16 +458,24 @@ static const QEMUFileOps socket_write_ops = {
> .close = socket_close
> };
>
> -QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> +static bool qemu_mode_is_not_valid(const char * mode)
> {
> - QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket));
> -
> if (mode == NULL ||
> (mode[0] != 'r' && mode[0] != 'w') ||
> mode[1] != 'b' || mode[2] != 0) {
> fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
> - return NULL;
> + return true;
> }
> +
> + return false;
> +}
> +
> +QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> +{
> + QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket));
> +
> + if(qemu_mode_is_not_valid(mode))
> + return NULL;
>
> s->fd = fd;
> if (mode[0] == 'w') {
> @@ -411,16 +487,66 @@ QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> return s->file;
> }
>
> +static int qemu_rdma_close(void *opaque)
> +{
> + QEMUFileRDMA *r = opaque;
> + if(r->rdma) {
> + qemu_rdma_cleanup(r->rdma);
> + g_free(r->rdma);
> + }
> + g_free(r);
> + return 0;
> +}
> +
> +void * migrate_use_rdma(QEMUFile *f)
> +{
> + QEMUFileRDMA *r = f->opaque;
> +
> + return qemu_rdma_enabled(r->rdma) ? r->rdma : NULL;
You cannot be sure that f->opaque->rdma is a valid pointer. For
example, the first field in a socket QEMUFile's is a file descriptor.
Instead, you could use a qemu_file_ops_are(const QEMUFile *, const
QEMUFileOps *) function that checks if the file uses the given ops.
Then, migrate_use_rdma can simply check if the QEMUFile is using the
RDMA ops structure.
With this change, the "enabled" field of RDMAData should go.
> +}
> +
> +static int qemu_rdma_drain_completion(void *opaque)
> +{
> + QEMUFileRDMA *r = opaque;
> + r->len = 0;
> + return qemu_rdma_drain_cq(r->rdma);
> +}
> +
> +static const QEMUFileOps rdma_read_ops = {
> + .get_buffer = qemu_rdma_get_buffer,
> + .close = qemu_rdma_close,
> +};
> +
> +static const QEMUFileOps rdma_write_ops = {
> + .put_buffer = qemu_rdma_put_buffer,
> + .close = qemu_rdma_close,
> + .drain = qemu_rdma_drain_completion,
> +};
> +
> +QEMUFile *qemu_fopen_rdma(void *opaque, const char * mode)
> +{
> + QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA));
> +
> + if(qemu_mode_is_not_valid(mode))
> + return NULL;
> +
> + r->rdma = opaque;
> +
> + if (mode[0] == 'w') {
> + r->file = qemu_fopen_ops(r, &rdma_write_ops);
> + } else {
> + r->file = qemu_fopen_ops(r, &rdma_read_ops);
> + }
> +
> + return r->file;
> +}
> +
> QEMUFile *qemu_fopen(const char *filename, const char *mode)
> {
> QEMUFileStdio *s;
>
> - if (mode == NULL ||
> - (mode[0] != 'r' && mode[0] != 'w') ||
> - mode[1] != 'b' || mode[2] != 0) {
> - fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
> - return NULL;
> - }
> + if(qemu_mode_is_not_valid(mode))
> + return NULL;
>
> s = g_malloc0(sizeof(QEMUFileStdio));
>
> @@ -497,6 +623,24 @@ static void qemu_file_set_error(QEMUFile *f, int ret)
> }
> }
>
> +/*
> + * Called only for RDMA right now at the end
> + * of each live iteration of memory.
> + *
> + * 'drain' from a QEMUFile perspective means
> + * to flush the outbound send buffer
> + * (if one exists).
> + *
> + * For RDMA, this means to make sure we've
> + * received completion queue (CQ) messages
> + * successfully for all of the RDMA writes
> + * that we requested.
> + */
> +int qemu_drain(QEMUFile *f)
> +{
> + return f->ops->drain ? f->ops->drain(f->opaque) : 0;
> +}
Hmm, this is very similar to qemu_fflush, but not quite. :/
Why exactly is this needed?
> /** Flushes QEMUFile buffer
> *
> */
> @@ -723,6 +867,8 @@ int qemu_get_byte(QEMUFile *f)
> int64_t qemu_ftell(QEMUFile *f)
> {
> qemu_fflush(f);
> + if(migrate_use_rdma(f))
> + return delta_norm_mig_bytes_transferred();
Not needed, and another undesirable dependency (savevm.c ->
arch_init.c). Just update f->pos in save_rdma_page.
This is taking shape. Thanks for persevering!
Paolo
> return f->pos;
> }
>
> @@ -1737,6 +1883,12 @@ void qemu_savevm_state_complete(QEMUFile *f)
> }
> }
>
> + if ((ret = qemu_drain(f)) < 0) {
> + fprintf(stderr, "failed to drain RDMA first!\n");
> + qemu_file_set_error(f, ret);
> + return;
> + }
> +
> QTAILQ_FOREACH(se, &savevm_handlers, entry) {
> int len;
>
>
next prev parent reply other threads:[~2013-03-18 9:09 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-18 3:18 [Qemu-devel] [RFC PATCH RDMA support v4: 00/10] cleaner ramblocks and documentation mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 01/10] ./configure --enable-rdma mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 02/10] check for CONFIG_RDMA mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 03/10] more verbose documentation of the RDMA transport mrhines
2013-03-18 10:40 ` Michael S. Tsirkin
2013-03-18 20:24 ` Michael R. Hines
2013-03-18 21:26 ` Michael S. Tsirkin
2013-03-18 23:23 ` Michael R. Hines
2013-03-19 8:19 ` Michael S. Tsirkin
2013-03-19 13:21 ` Michael R. Hines
2013-03-19 15:08 ` Michael R. Hines
2013-03-19 15:16 ` Michael S. Tsirkin
2013-03-19 15:32 ` Michael R. Hines
2013-03-19 15:36 ` Michael S. Tsirkin
2013-03-19 17:09 ` Michael R. Hines
2013-03-19 17:14 ` Paolo Bonzini
2013-03-19 17:23 ` Michael S. Tsirkin
2013-03-19 17:40 ` Michael R. Hines
2013-03-19 17:52 ` Paolo Bonzini
2013-03-19 18:04 ` Michael R. Hines
2013-03-20 13:07 ` Michael S. Tsirkin
2013-03-20 15:15 ` Michael R. Hines
2013-03-20 15:22 ` Michael R. Hines
2013-03-20 15:55 ` Michael S. Tsirkin
2013-03-20 16:08 ` Michael R. Hines
2013-03-20 19:06 ` Michael S. Tsirkin
2013-03-20 20:20 ` Michael R. Hines
2013-03-20 20:31 ` Michael S. Tsirkin
2013-03-20 20:39 ` Michael R. Hines
2013-03-20 20:46 ` Michael S. Tsirkin
2013-03-20 20:56 ` Michael R. Hines
2013-03-21 5:20 ` Michael S. Tsirkin
2013-03-20 20:24 ` Michael R. Hines
2013-03-20 20:37 ` Michael S. Tsirkin
2013-03-20 20:45 ` Michael R. Hines
2013-03-20 20:52 ` Michael S. Tsirkin
2013-03-19 17:49 ` Michael R. Hines
2013-03-21 6:11 ` Michael S. Tsirkin
2013-03-21 15:22 ` Michael R. Hines
2013-04-05 20:45 ` Michael R. Hines
2013-04-05 20:46 ` Michael R. Hines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 04/10] iterators for getting the RAMBlocks mrhines
2013-03-18 8:48 ` Paolo Bonzini
2013-03-18 20:25 ` Michael R. Hines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 05/10] reuse function for parsing the QMP 'migrate' string mrhines
2013-03-18 3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 06/10] core RDMA migration code (rdma.c) mrhines
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 07/10] connection-establishment for RDMA mrhines
2013-03-18 8:56 ` Paolo Bonzini
2013-03-18 20:26 ` Michael R. Hines
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA mrhines
2013-03-18 9:09 ` Paolo Bonzini [this message]
2013-03-18 20:33 ` Michael R. Hines
2013-03-19 9:18 ` Paolo Bonzini
2013-03-19 13:12 ` Michael R. Hines
2013-03-19 13:25 ` Paolo Bonzini
2013-03-19 13:40 ` Michael R. Hines
2013-03-19 13:45 ` Paolo Bonzini
2013-03-19 14:10 ` Michael R. Hines
2013-03-19 14:22 ` Paolo Bonzini
2013-03-19 15:02 ` [Qemu-devel] [Bug]? (RDMA-related) ballooned memory not consulted during migration? Michael R. Hines
2013-03-19 15:12 ` Michael R. Hines
2013-03-19 15:17 ` Michael S. Tsirkin
2013-03-19 18:27 ` [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA Michael R. Hines
2013-03-19 18:40 ` Paolo Bonzini
2013-03-20 15:20 ` Paolo Bonzini
2013-03-20 16:09 ` Michael R. Hines
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 09/10] check for QMP string and bypass nonblock() calls mrhines
2013-03-18 8:47 ` Paolo Bonzini
2013-03-18 20:37 ` Michael R. Hines
2013-03-19 9:23 ` Paolo Bonzini
2013-03-19 13:08 ` Michael R. Hines
2013-03-19 13:20 ` Paolo Bonzini
2013-03-18 3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 10/10] send pc.ram over RDMA mrhines
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5146D9BF.3030407@redhat.com \
--to=pbonzini@redhat.com \
--cc=abali@us.ibm.com \
--cc=aliguori@us.ibm.com \
--cc=gokul@us.ibm.com \
--cc=mrhines@linux.vnet.ibm.com \
--cc=mrhines@us.ibm.com \
--cc=mst@redhat.com \
--cc=owasserm@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.