qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: mrhines@linux.vnet.ibm.com
Cc: aliguori@us.ibm.com, mst@redhat.com, qemu-devel@nongnu.org,
	owasserm@redhat.com, abali@us.ibm.com, mrhines@us.ibm.com,
	gokul@us.ibm.com
Subject: Re: [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA
Date: Mon, 18 Mar 2013 10:09:19 +0100	[thread overview]
Message-ID: <5146D9BF.3030407@redhat.com> (raw)
In-Reply-To: <1363576743-6146-9-git-send-email-mrhines@linux.vnet.ibm.com>

Il 18/03/2013 04:19, mrhines@linux.vnet.ibm.com ha scritto:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
> 
> This compiles with and without --enable-rdma.
> 
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
>  include/migration/qemu-file.h |   10 +++
>  savevm.c                      |  172 ++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 172 insertions(+), 10 deletions(-)
> 
> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> index df81261..9046751 100644
> --- a/include/migration/qemu-file.h
> +++ b/include/migration/qemu-file.h
> @@ -51,23 +51,33 @@ typedef int (QEMUFileCloseFunc)(void *opaque);
>   */
>  typedef int (QEMUFileGetFD)(void *opaque);
>  
> +/* 
> + * 'drain' from a QEMUFile perspective means
> + * to flush the outbound send buffer
> + * (if one exists). (Only used by RDMA right now)
> + */
> +typedef int (QEMUFileDrainFunc)(void *opaque);
> +
>  typedef struct QEMUFileOps {
>      QEMUFilePutBufferFunc *put_buffer;
>      QEMUFileGetBufferFunc *get_buffer;
>      QEMUFileCloseFunc *close;
>      QEMUFileGetFD *get_fd;
> +    QEMUFileDrainFunc *drain;
>  } QEMUFileOps;
>  
>  QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
>  QEMUFile *qemu_fopen(const char *filename, const char *mode);
>  QEMUFile *qemu_fdopen(int fd, const char *mode);
>  QEMUFile *qemu_fopen_socket(int fd, const char *mode);
> +QEMUFile *qemu_fopen_rdma(void *opaque, const char *mode);
>  QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
>  int qemu_get_fd(QEMUFile *f);
>  int qemu_fclose(QEMUFile *f);
>  int64_t qemu_ftell(QEMUFile *f);
>  void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
>  void qemu_put_byte(QEMUFile *f, int v);
> +int qemu_drain(QEMUFile *f);
>  
>  static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
>  {
> diff --git a/savevm.c b/savevm.c
> index 35c8d1e..9b90b7f 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -32,6 +32,7 @@
>  #include "qemu/timer.h"
>  #include "audio/audio.h"
>  #include "migration/migration.h"
> +#include "migration/rdma.h"
>  #include "qemu/sockets.h"
>  #include "qemu/queue.h"
>  #include "sysemu/cpus.h"
> @@ -143,6 +144,13 @@ typedef struct QEMUFileSocket
>      QEMUFile *file;
>  } QEMUFileSocket;
>  
> +typedef struct QEMUFileRDMA
> +{
> +    void *rdma;

This is an RDMAData *.  Please avoid using void * as much as possible.

> +    size_t len;
> +    QEMUFile *file;
> +} QEMUFileRDMA;
> +
>  typedef struct {
>      Coroutine *co;
>      int fd;
> @@ -178,6 +186,66 @@ static int socket_get_fd(void *opaque)
>      return s->fd;
>  }
>  
> +/*
> + * SEND messages for none-live state only.
> + * pc.ram is handled elsewhere...
> + */
> +static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
> +{
> +    QEMUFileRDMA *r = opaque;
> +    size_t remaining = size;
> +    uint8_t * data = (void *) buf;
> +
> +    /*
> +     * Although we're sending non-live
> +     * state here, push out any writes that
> +     * we're queued up for pc.ram anyway.
> +     */
> +    if (qemu_rdma_write_flush(r->rdma) < 0)
> +        return -EIO;
> +
> +    while(remaining) {
> +        r->len = MIN(remaining, RDMA_SEND_INCREMENT);
> +        remaining -= r->len;
> +
> +        if(qemu_rdma_exchange_send(r->rdma, data, r->len) < 0)
> +                return -EINVAL;
> +
> +        data += r->len;
> +    }
> +
> +    return size;
> +} 
> +
> +/*
> + * RDMA links don't use bytestreams, so we have to
> + * return bytes to QEMUFile opportunistically.
> + */
> +static int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
> +{
> +    QEMUFileRDMA *r = opaque;
> +
> +    /*
> +     * First, we hold on to the last SEND message we 
> +     * were given and dish out the bytes until we run 
> +     * out of bytes.
> +     */
> +    if((r->len = qemu_rdma_fill(r->rdma, buf, size)))
> +	return r->len; 
> +
> +     /*
> +      * Once we run out, we block and wait for another
> +      * SEND message to arrive.
> +      */
> +    if(qemu_rdma_exchange_recv(r->rdma) < 0)
> +	return -EINVAL;
> +
> +    /*
> +     * SEND was received with new bytes, now try again.
> +     */
> +    return qemu_rdma_fill(r->rdma, buf, size);
> +} 

Please move these functions closer to qemu_fopen_rdma (or better, to an
RDMA-specific file altogether).  Also, using qemu_rdma_fill introduces a
dependency of savevm.c on migration-rdma.c.  There should be no such
dependency; migration-rdma.c should be used only by migration.c.

>  static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
>  {
>      QEMUFileSocket *s = opaque;
> @@ -390,16 +458,24 @@ static const QEMUFileOps socket_write_ops = {
>      .close =      socket_close
>  };
>  
> -QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> +static bool qemu_mode_is_not_valid(const char * mode)
>  {
> -    QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket));
> -
>      if (mode == NULL ||
>          (mode[0] != 'r' && mode[0] != 'w') ||
>          mode[1] != 'b' || mode[2] != 0) {
>          fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
> -        return NULL;
> +        return true;
>      }
> +    
> +    return false;
> +}
> +
> +QEMUFile *qemu_fopen_socket(int fd, const char *mode)
> +{
> +    QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket));
> +
> +    if(qemu_mode_is_not_valid(mode))
> +	return NULL;
>  
>      s->fd = fd;
>      if (mode[0] == 'w') {
> @@ -411,16 +487,66 @@ QEMUFile *qemu_fopen_socket(int fd, const char *mode)
>      return s->file;
>  }
>  
> +static int qemu_rdma_close(void *opaque)
> +{
> +    QEMUFileRDMA *r = opaque;
> +    if(r->rdma) {
> +        qemu_rdma_cleanup(r->rdma);
> +        g_free(r->rdma);
> +    }
> +    g_free(r);
> +    return 0;
> +}
> +
> +void * migrate_use_rdma(QEMUFile *f)
> +{
> +    QEMUFileRDMA *r = f->opaque;
> +
> +    return qemu_rdma_enabled(r->rdma) ? r->rdma : NULL;

You cannot be sure that f->opaque->rdma is a valid pointer.  For
example, the first field in a socket QEMUFile's is a file descriptor.

Instead, you could use a qemu_file_ops_are(const QEMUFile *, const
QEMUFileOps *) function that checks if the file uses the given ops.
Then, migrate_use_rdma can simply check if the QEMUFile is using the
RDMA ops structure.

With this change, the "enabled" field of RDMAData should go.

> +}
> +
> +static int qemu_rdma_drain_completion(void *opaque)
> +{
> +    QEMUFileRDMA *r = opaque;
> +    r->len = 0;
> +    return qemu_rdma_drain_cq(r->rdma);
> +}
> +
> +static const QEMUFileOps rdma_read_ops = {
> +    .get_buffer = qemu_rdma_get_buffer,
> +    .close =      qemu_rdma_close,
> +};
> +
> +static const QEMUFileOps rdma_write_ops = {
> +    .put_buffer = qemu_rdma_put_buffer,
> +    .close =      qemu_rdma_close,
> +    .drain =	  qemu_rdma_drain_completion,
> +};
> +
> +QEMUFile *qemu_fopen_rdma(void *opaque, const char * mode)
> +{
> +    QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA));
> +
> +    if(qemu_mode_is_not_valid(mode))
> +	return NULL;
> +
> +    r->rdma = opaque;
> +
> +    if (mode[0] == 'w') {
> +        r->file = qemu_fopen_ops(r, &rdma_write_ops);
> +    } else {
> +        r->file = qemu_fopen_ops(r, &rdma_read_ops);
> +    }
> +
> +    return r->file;
> +}
> +
>  QEMUFile *qemu_fopen(const char *filename, const char *mode)
>  {
>      QEMUFileStdio *s;
>  
> -    if (mode == NULL ||
> -	(mode[0] != 'r' && mode[0] != 'w') ||
> -	mode[1] != 'b' || mode[2] != 0) {
> -        fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
> -        return NULL;
> -    }
> +    if(qemu_mode_is_not_valid(mode))
> +	return NULL;
>  
>      s = g_malloc0(sizeof(QEMUFileStdio));
>  
> @@ -497,6 +623,24 @@ static void qemu_file_set_error(QEMUFile *f, int ret)
>      }
>  }
>  
> +/*
> + * Called only for RDMA right now at the end 
> + * of each live iteration of memory.
> + *
> + * 'drain' from a QEMUFile perspective means
> + * to flush the outbound send buffer
> + * (if one exists). 
> + *
> + * For RDMA, this means to make sure we've
> + * received completion queue (CQ) messages
> + * successfully for all of the RDMA writes
> + * that we requested.
> + */ 
> +int qemu_drain(QEMUFile *f)
> +{
> +    return f->ops->drain ? f->ops->drain(f->opaque) : 0;
> +}

Hmm, this is very similar to qemu_fflush, but not quite. :/

Why exactly is this needed?

>  /** Flushes QEMUFile buffer
>   *
>   */
> @@ -723,6 +867,8 @@ int qemu_get_byte(QEMUFile *f)
>  int64_t qemu_ftell(QEMUFile *f)
>  {
>      qemu_fflush(f);
> +    if(migrate_use_rdma(f))
> +	return delta_norm_mig_bytes_transferred();

Not needed, and another undesirable dependency (savevm.c ->
arch_init.c).  Just update f->pos in save_rdma_page.

This is taking shape.  Thanks for persevering!

Paolo

>      return f->pos;
>  }
>  
> @@ -1737,6 +1883,12 @@ void qemu_savevm_state_complete(QEMUFile *f)
>          }
>      }
>  
> +    if ((ret = qemu_drain(f)) < 0) {
> +	fprintf(stderr, "failed to drain RDMA first!\n");
> +        qemu_file_set_error(f, ret);
> +	return;
> +    }
> +
>      QTAILQ_FOREACH(se, &savevm_handlers, entry) {
>          int len;
>  
> 

  reply	other threads:[~2013-03-18  9:09 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-18  3:18 [Qemu-devel] [RFC PATCH RDMA support v4: 00/10] cleaner ramblocks and documentation mrhines
2013-03-18  3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 01/10] ./configure --enable-rdma mrhines
2013-03-18  3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 02/10] check for CONFIG_RDMA mrhines
2013-03-18  3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 03/10] more verbose documentation of the RDMA transport mrhines
2013-03-18 10:40   ` Michael S. Tsirkin
2013-03-18 20:24     ` Michael R. Hines
2013-03-18 21:26       ` Michael S. Tsirkin
2013-03-18 23:23         ` Michael R. Hines
2013-03-19  8:19           ` Michael S. Tsirkin
2013-03-19 13:21             ` Michael R. Hines
2013-03-19 15:08             ` Michael R. Hines
2013-03-19 15:16               ` Michael S. Tsirkin
2013-03-19 15:32                 ` Michael R. Hines
2013-03-19 15:36                   ` Michael S. Tsirkin
2013-03-19 17:09                     ` Michael R. Hines
2013-03-19 17:14                       ` Paolo Bonzini
2013-03-19 17:23                         ` Michael S. Tsirkin
2013-03-19 17:40                         ` Michael R. Hines
2013-03-19 17:52                           ` Paolo Bonzini
2013-03-19 18:04                             ` Michael R. Hines
2013-03-20 13:07                             ` Michael S. Tsirkin
2013-03-20 15:15                               ` Michael R. Hines
2013-03-20 15:22                                 ` Michael R. Hines
2013-03-20 15:55                                 ` Michael S. Tsirkin
2013-03-20 16:08                                   ` Michael R. Hines
2013-03-20 19:06                                     ` Michael S. Tsirkin
2013-03-20 20:20                                       ` Michael R. Hines
2013-03-20 20:31                                         ` Michael S. Tsirkin
2013-03-20 20:39                                           ` Michael R. Hines
2013-03-20 20:46                                             ` Michael S. Tsirkin
2013-03-20 20:56                                               ` Michael R. Hines
2013-03-21  5:20                                                 ` Michael S. Tsirkin
2013-03-20 20:24                                   ` Michael R. Hines
2013-03-20 20:37                                     ` Michael S. Tsirkin
2013-03-20 20:45                                       ` Michael R. Hines
2013-03-20 20:52                                         ` Michael S. Tsirkin
2013-03-19 17:49                         ` Michael R. Hines
2013-03-21  6:11                           ` Michael S. Tsirkin
2013-03-21 15:22                             ` Michael R. Hines
2013-04-05 20:45                             ` Michael R. Hines
2013-04-05 20:46                             ` Michael R. Hines
2013-03-18  3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 04/10] iterators for getting the RAMBlocks mrhines
2013-03-18  8:48   ` Paolo Bonzini
2013-03-18 20:25     ` Michael R. Hines
2013-03-18  3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 05/10] reuse function for parsing the QMP 'migrate' string mrhines
2013-03-18  3:18 ` [Qemu-devel] [RFC PATCH RDMA support v4: 06/10] core RDMA migration code (rdma.c) mrhines
2013-03-18  3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 07/10] connection-establishment for RDMA mrhines
2013-03-18  8:56   ` Paolo Bonzini
2013-03-18 20:26     ` Michael R. Hines
2013-03-18  3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA mrhines
2013-03-18  9:09   ` Paolo Bonzini [this message]
2013-03-18 20:33     ` Michael R. Hines
2013-03-19  9:18       ` Paolo Bonzini
2013-03-19 13:12         ` Michael R. Hines
2013-03-19 13:25           ` Paolo Bonzini
2013-03-19 13:40             ` Michael R. Hines
2013-03-19 13:45               ` Paolo Bonzini
2013-03-19 14:10                 ` Michael R. Hines
2013-03-19 14:22                   ` Paolo Bonzini
2013-03-19 15:02                     ` [Qemu-devel] [Bug]? (RDMA-related) ballooned memory not consulted during migration? Michael R. Hines
2013-03-19 15:12                       ` Michael R. Hines
2013-03-19 15:17                         ` Michael S. Tsirkin
2013-03-19 18:27                     ` [Qemu-devel] [RFC PATCH RDMA support v4: 08/10] introduce QEMUFileRDMA Michael R. Hines
2013-03-19 18:40                       ` Paolo Bonzini
2013-03-20 15:20                         ` Paolo Bonzini
2013-03-20 16:09                           ` Michael R. Hines
2013-03-18  3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 09/10] check for QMP string and bypass nonblock() calls mrhines
2013-03-18  8:47   ` Paolo Bonzini
2013-03-18 20:37     ` Michael R. Hines
2013-03-19  9:23       ` Paolo Bonzini
2013-03-19 13:08         ` Michael R. Hines
2013-03-19 13:20           ` Paolo Bonzini
2013-03-18  3:19 ` [Qemu-devel] [RFC PATCH RDMA support v4: 10/10] send pc.ram over RDMA mrhines

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5146D9BF.3030407@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=abali@us.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=gokul@us.ibm.com \
    --cc=mrhines@linux.vnet.ibm.com \
    --cc=mrhines@us.ibm.com \
    --cc=mst@redhat.com \
    --cc=owasserm@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).