qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Steven Sistare <steven.sistare@oracle.com>
To: Peter Xu <peterx@redhat.com>
Cc: qemu-devel@nongnu.org, Fabiano Rosas <farosas@suse.de>,
	David Hildenbrand <david@redhat.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Eduardo Habkost <eduardo@habkost.net>,
	Philippe Mathieu-Daude <philmd@linaro.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	"Daniel P. Berrange" <berrange@redhat.com>,
	Markus Armbruster <armbru@redhat.com>
Subject: Re: [PATCH V2 02/11] migration: cpr-state
Date: Sat, 20 Jul 2024 15:53:58 -0400	[thread overview]
Message-ID: <dbacdeeb-69ba-4b3f-89ff-0704a88339d5@oracle.com> (raw)
In-Reply-To: <ZpqAQ6L6XtIyLREq@x1n>

On 7/19/2024 11:03 AM, Peter Xu wrote:
> On Sun, Jun 30, 2024 at 12:40:25PM -0700, Steve Sistare wrote:
>> CPR must save state that is needed after QEMU is restarted, when devices
>> are realized.  Thus the extra state cannot be saved in the migration stream,
>> as objects must already exist before that stream can be loaded.  Instead,
>> define auxilliary state structures and vmstate descriptions, not associated
>> with any registered object, and serialize the aux state to a cpr-specific
>> stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
>> restarts, before devices are realized.
>>
>> Provide accessors for clients to register file descriptors for saving.
>> The mechanism for passing the fd's to the new process will be specific
>> to each migration mode, and added in subsequent patches.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>   include/migration/cpr.h |  21 ++++++
>>   migration/cpr.c         | 188 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   migration/meson.build   |   1 +
>>   migration/migration.c   |   6 ++
>>   migration/trace-events  |   5 ++
>>   system/vl.c             |   3 +
>>   6 files changed, 224 insertions(+)
>>   create mode 100644 include/migration/cpr.h
>>   create mode 100644 migration/cpr.c
>>
>> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
>> new file mode 100644
>> index 0000000..8e7e705
>> --- /dev/null
>> +++ b/include/migration/cpr.h
>> @@ -0,0 +1,21 @@
>> +/*
>> + * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef MIGRATION_CPR_H
>> +#define MIGRATION_CPR_H
>> +
>> +typedef int (*cpr_walk_fd_cb)(int fd);
>> +void cpr_save_fd(const char *name, int id, int fd);
>> +void cpr_delete_fd(const char *name, int id);
>> +int cpr_find_fd(const char *name, int id);
>> +int cpr_walk_fd(cpr_walk_fd_cb cb);
>> +void cpr_resave_fd(const char *name, int id, int fd);
>> +
>> +int cpr_state_save(Error **errp);
>> +int cpr_state_load(Error **errp);
>> +
>> +#endif
>> diff --git a/migration/cpr.c b/migration/cpr.c
>> new file mode 100644
>> index 0000000..313e74e
>> --- /dev/null
>> +++ b/migration/cpr.c
>> @@ -0,0 +1,188 @@
>> +/*
>> + * Copyright (c) 2021-2024 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "migration/cpr.h"
>> +#include "migration/misc.h"
>> +#include "migration/qemu-file.h"
>> +#include "migration/savevm.h"
>> +#include "migration/vmstate.h"
>> +#include "sysemu/runstate.h"
>> +#include "trace.h"
>> +
>> +/*************************************************************************/
>> +/* cpr state container for all information to be saved. */
>> +
>> +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
>> +
>> +typedef struct CprState {
>> +    CprFdList fds;
>> +} CprState;
>> +
>> +static CprState cpr_state;
>> +
>> +/****************************************************************************/
>> +
>> +typedef struct CprFd {
>> +    char *name;
>> +    unsigned int namelen;
>> +    int id;
>> +    int fd;
> 
> [1]
> 
>> +    QLIST_ENTRY(CprFd) next;
>> +} CprFd;
>> +
>> +static const VMStateDescription vmstate_cpr_fd = {
>> +    .name = "cpr fd",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT32(namelen, CprFd),
>> +        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
>> +        VMSTATE_INT32(id, CprFd),
>> +        VMSTATE_INT32(fd, CprFd),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>> +void cpr_save_fd(const char *name, int id, int fd)
>> +{
>> +    CprFd *elem = g_new0(CprFd, 1);
>> +
>> +    trace_cpr_save_fd(name, id, fd);
>> +    elem->name = g_strdup(name);
>> +    elem->namelen = strlen(name) + 1;
>> +    elem->id = id;
>> +    elem->fd = fd;
>> +    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
>> +}
>> +
>> +static CprFd *find_fd(CprFdList *head, const char *name, int id)
>> +{
>> +    CprFd *elem;
>> +
>> +    QLIST_FOREACH(elem, head, next) {
>> +        if (!strcmp(elem->name, name) && elem->id == id) {
>> +            return elem;
>> +        }
>> +    }
>> +    return NULL;
>> +}
>> +
>> +void cpr_delete_fd(const char *name, int id)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +
>> +    if (elem) {
>> +        QLIST_REMOVE(elem, next);
>> +        g_free(elem->name);
>> +        g_free(elem);
>> +    }
>> +
>> +    trace_cpr_delete_fd(name, id);
>> +}
>> +
>> +int cpr_find_fd(const char *name, int id)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +    int fd = elem ? elem->fd : -1;
>> +
>> +    trace_cpr_find_fd(name, id, fd);
>> +    return fd;
>> +}
>> +
>> +int cpr_walk_fd(cpr_walk_fd_cb cb)
>> +{
>> +    CprFd *elem;
>> +
>> +    QLIST_FOREACH(elem, &cpr_state.fds, next) {
>> +        if (elem->fd >= 0 && cb(elem->fd)) {
>> +            return 1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +void cpr_resave_fd(const char *name, int id, int fd)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +    int old_fd = elem ? elem->fd : -1;
>> +
>> +    if (old_fd < 0) {
>> +        cpr_save_fd(name, id, fd);
> 
> I don't think I know well on when old_fd<0 would happen yet, as this series
> doesn't look like to use this function at all.  From that POV, maybe nice
> to add a comment above [1] for "fd" field.
cpr_resave_fd can simplify client logic.  It allows the same name,fd,id triplet to
be (re) saved without creating a duplicate entry.  The vfio series uses it.  Yes,
I should add some brief API docs in the header file.

> Meanwhile, do we need to remove the old_fd<0 element here, or is it
> intended to keep that and the new CprFD?

old_fd < 0 is not an entry, it means no entry was found.

>> +    } else if (old_fd != fd) {
>> +        error_setg(&error_fatal,
>> +                   "internal error: cpr fd '%s' id %d value %d "
>> +                   "already saved with a different value %d",
>> +                   name, id, fd, old_fd);
>> +    }
>> +}
>> +/*************************************************************************/
>> +#define CPR_STATE "CprState"
>> +
>> +static const VMStateDescription vmstate_cpr_state = {
>> +    .name = CPR_STATE,
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +/*************************************************************************/
>> +
>> +int cpr_state_save(Error **errp)
>> +{
>> +    int ret;
>> +    QEMUFile *f;
>> +
>> +    /* set f based on mode in a later patch in this series */
>> +    return 0;
>> +
>> +    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
>> +    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
> 
> Having magic/version makes sense to me, though I'd suggest we use CPR new
> magic/versions, so that if we see an binary dump we know what it is, and we
> don't mixup a CPR image against a migration stream image.

Will do.

>> +
>> +    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
> 
> s/0/NULL/

Will do.

>> +    if (ret) {
>> +        error_setg(errp, "vmstate_save_state error %d", ret);
>> +    }
> 
> Can consider using vmstate_save_state_with_err().

Cool, will do.

>> +
>> +    qemu_fclose(f);
>> +    return ret;
>> +}
>> +
>> +int cpr_state_load(Error **errp)
>> +{
>> +    int ret;
>> +    uint32_t v;
>> +    QEMUFile *f;
>> +
>> +    /* set f based on mode in a later patch in this series */
>> +    return 0;
>> +
>> +    v = qemu_get_be32(f);
>> +    if (v != QEMU_VM_FILE_MAGIC) {
>> +        error_setg(errp, "Not a migration stream (bad magic %x)", v);
>> +        qemu_fclose(f);
>> +        return -EINVAL;
>> +    }
>> +    v = qemu_get_be32(f);
>> +    if (v != QEMU_VM_FILE_VERSION) {
>> +        error_setg(errp, "Unsupported migration stream version %d", v);
>> +        qemu_fclose(f);
>> +        return -ENOTSUP;
>> +    }
>> +
>> +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
>> +    if (ret) {
>> +        error_setg(errp, "vmstate_load_state error %d", ret);
>> +    }
> 
> Simiarly, can use vmstate_save_state_with_err().

Hmm, vmstate_load_state_with_err does not exist.

>> +
>> +    qemu_fclose(f);
>> +    return ret;
>> +}
>> +
>> diff --git a/migration/meson.build b/migration/meson.build
>> index 5ce2acb4..87feb4c 100644
>> --- a/migration/meson.build
>> +++ b/migration/meson.build
>> @@ -13,6 +13,7 @@ system_ss.add(files(
>>     'block-dirty-bitmap.c',
>>     'channel.c',
>>     'channel-block.c',
>> +  'cpr.c',
>>     'dirtyrate.c',
>>     'exec.c',
>>     'fd.c',
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 3dea06d..e394ad7 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -27,6 +27,7 @@
>>   #include "sysemu/cpu-throttle.h"
>>   #include "rdma.h"
>>   #include "ram.h"
>> +#include "migration/cpr.h"
>>   #include "migration/global_state.h"
>>   #include "migration/misc.h"
>>   #include "migration.h"
>> @@ -2118,6 +2119,10 @@ void qmp_migrate(const char *uri, bool has_channels,
>>           }
>>       }
>>   
>> +    if (cpr_state_save(&local_err)) {
>> +        goto out;
>> +    }
>> +
>>       if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
>>           SocketAddress *saddr = &addr->u.socket;
>>           if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
>> @@ -2142,6 +2147,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>>                             MIGRATION_STATUS_FAILED);
>>       }
>>   
>> +out:
>>       if (local_err) {
>>           if (!resume_requested) {
>>               yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>> diff --git a/migration/trace-events b/migration/trace-events
>> index 0b7c332..173f2c0 100644
>> --- a/migration/trace-events
>> +++ b/migration/trace-events
>> @@ -340,6 +340,11 @@ colo_receive_message(const char *msg) "Receive '%s' message"
>>   # colo-failover.c
>>   colo_failover_set_state(const char *new_state) "new state %s"
>>   
>> +# cpr.c
>> +cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
>> +cpr_delete_fd(const char *name, int id) "%s, id %d"
>> +cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
>> +
>>   # block-dirty-bitmap.c
>>   send_bitmap_header_enter(void) ""
>>   send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
>> diff --git a/system/vl.c b/system/vl.c
>> index 03951be..6521ee3 100644
>> --- a/system/vl.c
>> +++ b/system/vl.c
>> @@ -77,6 +77,7 @@
>>   #include "hw/block/block.h"
>>   #include "hw/i386/x86.h"
>>   #include "hw/i386/pc.h"
>> +#include "migration/cpr.h"
>>   #include "migration/misc.h"
>>   #include "migration/snapshot.h"
>>   #include "sysemu/tpm.h"
>> @@ -3713,6 +3714,8 @@ void qemu_init(int argc, char **argv)
>>   
>>       qemu_create_machine(machine_opts_dict);
>>   
>> +    cpr_state_load(&error_fatal);
> 
> Might be good to add a rich comment here explaining the decision on why
> loading here; I think most of the tricks lie here.  E.g., it needs to be
> before XXX and it needs to be after YYY.

Will do.

- Steve


  reply	other threads:[~2024-07-20 19:55 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-30 19:40 [PATCH V2 00/11] Live update: cpr-exec Steve Sistare
2024-06-30 19:40 ` [PATCH V2 01/11] machine: alloc-anon option Steve Sistare
2024-07-15 17:52   ` Fabiano Rosas
2024-07-16  9:19   ` Igor Mammedov
2024-07-17 19:24     ` Peter Xu
2024-07-18 15:43       ` Steven Sistare
2024-07-18 16:22         ` Peter Xu
2024-07-20 20:35       ` Steven Sistare
2024-08-04 16:20         ` Peter Xu
2024-07-20 20:28     ` Steven Sistare
2024-07-22  9:10       ` David Hildenbrand
2024-07-29 12:29       ` Igor Mammedov
2024-08-08 18:32         ` Steven Sistare
2024-08-12 18:37           ` Steven Sistare
2024-08-13 15:35             ` Peter Xu
2024-08-13 17:00               ` Alex Williamson
2024-08-13 18:45                 ` Peter Xu
2024-08-13 18:56                   ` Steven Sistare
2024-08-13 18:46                 ` Steven Sistare
2024-08-13 18:49                   ` Steven Sistare
2024-08-13 17:34               ` Steven Sistare
2024-08-13 19:02                 ` Peter Xu
2024-06-30 19:40 ` [PATCH V2 02/11] migration: cpr-state Steve Sistare
2024-07-17 18:39   ` Fabiano Rosas
2024-07-19 15:03   ` Peter Xu
2024-07-20 19:53     ` Steven Sistare [this message]
2024-06-30 19:40 ` [PATCH V2 03/11] migration: save cpr mode Steve Sistare
2024-07-17 18:39   ` Fabiano Rosas
2024-07-18 15:47     ` Steven Sistare
2024-06-30 19:40 ` [PATCH V2 04/11] migration: stop vm earlier for cpr Steve Sistare
2024-07-17 18:59   ` Fabiano Rosas
2024-07-20 20:00     ` Steven Sistare
2024-07-22 13:42       ` Fabiano Rosas
2024-08-06 20:52         ` Steven Sistare
2024-06-30 19:40 ` [PATCH V2 05/11] physmem: preserve ram blocks " Steve Sistare
2024-06-30 19:40 ` [PATCH V2 06/11] migration: fix mismatched GPAs during cpr Steve Sistare
2024-07-19 16:28   ` Peter Xu
2024-07-20 21:28     ` Steven Sistare
2024-08-07 21:04     ` Steven Sistare
2024-08-13 20:43       ` Peter Xu
2024-08-15 20:54         ` Steven Sistare
2024-08-16 14:43           ` Peter Xu
2024-08-16 17:10             ` Steven Sistare
2024-08-21 16:57               ` Peter Xu
2024-06-30 19:40 ` [PATCH V2 07/11] oslib: qemu_clear_cloexec Steve Sistare
2024-06-30 19:40 ` [PATCH V2 08/11] vl: helper to request exec Steve Sistare
2024-06-30 19:40 ` [PATCH V2 09/11] migration: cpr-exec-command parameter Steve Sistare
2024-06-30 19:40 ` [PATCH V2 10/11] migration: cpr-exec save and load Steve Sistare
2024-06-30 19:40 ` [PATCH V2 11/11] migration: cpr-exec mode Steve Sistare
2024-07-18 15:56 ` [PATCH V2 00/11] Live update: cpr-exec Peter Xu
2024-07-20 21:26   ` Steven Sistare
2024-08-04 16:10     ` Peter Xu
2024-08-07 19:47       ` Steven Sistare
2024-08-13 20:12         ` Peter Xu
2024-08-20 16:28           ` [PATCH V2 00/11] Live update: cpr-exec (reconnections) Steven Sistare
2024-07-22  8:59   ` [PATCH V2 00/11] Live update: cpr-exec David Hildenbrand
2024-08-04 15:43     ` Peter Xu
2024-08-05  9:52       ` David Hildenbrand
2024-08-05 10:06         ` David Hildenbrand
2024-08-05 10:01   ` Daniel P. Berrangé
2024-08-06 20:56     ` Steven Sistare
2024-08-13 19:46       ` Peter Xu
2024-08-15 20:55         ` Steven Sistare
2024-08-16 15:06           ` Peter Xu
2024-08-16 15:16             ` Daniel P. Berrangé
2024-08-16 15:19               ` Steven Sistare
2024-08-16 15:34               ` Peter Xu
2024-08-16 16:00                 ` Daniel P. Berrangé
2024-08-16 16:17                   ` Peter Xu
2024-08-16 16:28                     ` Daniel P. Berrangé
2024-08-16 17:09                     ` Steven Sistare
2024-08-21 18:34                       ` Peter Xu
2024-09-04 20:58                         ` Steven Sistare
2024-09-04 22:23                           ` Peter Xu
2024-09-05  9:49                             ` Daniel P. Berrangé
2024-09-05  9:43                           ` Daniel P. Berrangé
2024-09-05  9:30                       ` Daniel P. Berrangé

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dbacdeeb-69ba-4b3f-89ff-0704a88339d5@oracle.com \
    --to=steven.sistare@oracle.com \
    --cc=armbru@redhat.com \
    --cc=berrange@redhat.com \
    --cc=david@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=farosas@suse.de \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).