* [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile
2024-06-30 19:44 [RFC V1 0/6] Live update: cpr-transfer Steve Sistare
@ 2024-06-30 19:44 ` Steve Sistare
2024-08-02 8:20 ` Euan Turner
2024-08-15 20:58 ` Peter Xu
2024-06-30 19:44 ` [RFC V1 2/6] migration: VMSTATE_FD Steve Sistare
` (5 subsequent siblings)
6 siblings, 2 replies; 30+ messages in thread
From: Steve Sistare @ 2024-06-30 19:44 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster, Steve Sistare
Define functions to put/get file descriptors to/from a QEMUFile, for qio
channels that support SCM_RIGHTS. Maintain ordering such that
put(A), put(fd), put(B)
followed by
get(A), get(fd), get(B)
always succeeds. Other get orderings may succeed but are not guaranteed.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
migration/qemu-file.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
migration/qemu-file.h | 2 ++
migration/trace-events | 2 ++
3 files changed, 83 insertions(+), 4 deletions(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index b6d2f58..424c27d 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -37,6 +37,11 @@
#define IO_BUF_SIZE 32768
#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
+typedef struct FdEntry {
+ QTAILQ_ENTRY(FdEntry) entry;
+ int fd;
+} FdEntry;
+
struct QEMUFile {
QIOChannel *ioc;
bool is_writable;
@@ -51,6 +56,9 @@ struct QEMUFile {
int last_error;
Error *last_error_obj;
+
+ bool fd_pass;
+ QTAILQ_HEAD(, FdEntry) fds;
};
/*
@@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
object_ref(ioc);
f->ioc = ioc;
f->is_writable = is_writable;
+ f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
+ QTAILQ_INIT(&f->fds);
return f;
}
@@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
int len;
int pending;
Error *local_error = NULL;
+ g_autofree int *fds = NULL;
+ size_t nfd = 0;
+ int **pfds = f->fd_pass ? &fds : NULL;
+ size_t *pnfd = f->fd_pass ? &nfd : NULL;
assert(!qemu_file_is_writable(f));
@@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
}
do {
- len = qio_channel_read(f->ioc,
- (char *)f->buf + pending,
- IO_BUF_SIZE - pending,
- &local_error);
+ struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
+ len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
+ &local_error);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
qio_channel_yield(f->ioc, G_IO_IN);
@@ -348,9 +361,65 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
qemu_file_set_error_obj(f, len, local_error);
}
+ for (int i = 0; i < nfd; i++) {
+ FdEntry *fde = g_new0(FdEntry, 1);
+ fde->fd = fds[i];
+ QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
+ }
+
return len;
}
+int qemu_file_put_fd(QEMUFile *f, int fd)
+{
+ int ret = 0;
+ QIOChannel *ioc = qemu_file_get_ioc(f);
+ Error *err = NULL;
+ struct iovec iov = { (void *)" ", 1 };
+
+ /*
+ * Send a dummy byte so qemu_fill_buffer on the receiving side does not
+ * fail with a len=0 error. Flush first to maintain ordering wrt other
+ * data.
+ */
+
+ qemu_fflush(f);
+ if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
+ error_report_err(error_copy(err));
+ qemu_file_set_error_obj(f, -EIO, err);
+ ret = -1;
+ }
+ trace_qemu_file_put_fd(f->ioc->name, fd, ret);
+ return 0;
+}
+
+int qemu_file_get_fd(QEMUFile *f)
+{
+ int fd = -1;
+ FdEntry *fde;
+
+ if (!f->fd_pass) {
+ Error *err = NULL;
+ error_setg(&err, "%s does not support fd passing", f->ioc->name);
+ error_report_err(error_copy(err));
+ qemu_file_set_error_obj(f, -EIO, err);
+ goto out;
+ }
+
+ /* Force the dummy byte and its fd passenger to appear. */
+ qemu_peek_byte(f, 0);
+
+ fde = QTAILQ_FIRST(&f->fds);
+ if (fde) {
+ qemu_get_byte(f); /* Drop the dummy byte */
+ fd = fde->fd;
+ QTAILQ_REMOVE(&f->fds, fde, entry);
+ }
+out:
+ trace_qemu_file_get_fd(f->ioc->name, fd);
+ return fd;
+}
+
/** Closes the file
*
* Returns negative error value if any error happened on previous operations or
@@ -361,11 +430,17 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
*/
int qemu_fclose(QEMUFile *f)
{
+ FdEntry *fde, *next;
int ret = qemu_fflush(f);
int ret2 = qio_channel_close(f->ioc, NULL);
if (ret >= 0) {
ret = ret2;
}
+ QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
+ warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
+ close(fde->fd);
+ g_free(fde);
+ }
g_clear_pointer(&f->ioc, object_unref);
error_free(f->last_error_obj);
g_free(f);
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 11c2120..3e47a20 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -79,5 +79,7 @@ size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
off_t pos);
QIOChannel *qemu_file_get_ioc(QEMUFile *file);
+int qemu_file_put_fd(QEMUFile *f, int fd);
+int qemu_file_get_fd(QEMUFile *f);
#endif
diff --git a/migration/trace-events b/migration/trace-events
index 173f2c0..064b22d 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -88,6 +88,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
# qemu-file.c
qemu_file_fclose(void) ""
+qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d"
+qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d"
# ram.c
get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
--
1.8.3.1
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile
2024-06-30 19:44 ` [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile Steve Sistare
@ 2024-08-02 8:20 ` Euan Turner
2024-08-05 19:06 ` Steven Sistare
2024-08-15 20:58 ` Peter Xu
1 sibling, 1 reply; 30+ messages in thread
From: Euan Turner @ 2024-08-02 8:20 UTC (permalink / raw)
To: Steve Sistare, qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster
On 30/06/2024 20:44, Steve Sistare wrote:
> Define functions to put/get file descriptors to/from a QEMUFile, for qio
> channels that support SCM_RIGHTS. Maintain ordering such that
> put(A), put(fd), put(B)
> followed by
> get(A), get(fd), get(B)
> always succeeds. Other get orderings may succeed but are not guaranteed.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
> migration/qemu-file.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
> migration/qemu-file.h | 2 ++
> migration/trace-events | 2 ++
> 3 files changed, 83 insertions(+), 4 deletions(-)
>
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index b6d2f58..424c27d 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -37,6 +37,11 @@
> #define IO_BUF_SIZE 32768
> #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
>
> +typedef struct FdEntry {
> + QTAILQ_ENTRY(FdEntry) entry;
> + int fd;
> +} FdEntry;
> +
> struct QEMUFile {
> QIOChannel *ioc;
> bool is_writable;
> @@ -51,6 +56,9 @@ struct QEMUFile {
>
> int last_error;
> Error *last_error_obj;
> +
> + bool fd_pass;
> + QTAILQ_HEAD(, FdEntry) fds;
> };
>
> /*
> @@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
> object_ref(ioc);
> f->ioc = ioc;
> f->is_writable = is_writable;
> + f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
> + QTAILQ_INIT(&f->fds);
>
> return f;
> }
> @@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> int len;
> int pending;
> Error *local_error = NULL;
> + g_autofree int *fds = NULL;
> + size_t nfd = 0;
> + int **pfds = f->fd_pass ? &fds : NULL;
> + size_t *pnfd = f->fd_pass ? &nfd : NULL;
>
> assert(!qemu_file_is_writable(f));
>
> @@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> }
>
> do {
> - len = qio_channel_read(f->ioc,
> - (char *)f->buf + pending,
> - IO_BUF_SIZE - pending,
> - &local_error);
> + struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
> + len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
> + &local_error);
> if (len == QIO_CHANNEL_ERR_BLOCK) {
> if (qemu_in_coroutine()) {
> qio_channel_yield(f->ioc, G_IO_IN);
> @@ -348,9 +361,65 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> qemu_file_set_error_obj(f, len, local_error);
> }
>
> + for (int i = 0; i < nfd; i++) {
> + FdEntry *fde = g_new0(FdEntry, 1);
> + fde->fd = fds[i];
> + QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
> + }
> +
> return len;
> }
>
> +int qemu_file_put_fd(QEMUFile *f, int fd)
> +{
> + int ret = 0;
> + QIOChannel *ioc = qemu_file_get_ioc(f);
> + Error *err = NULL;
> + struct iovec iov = { (void *)" ", 1 };
> +
> + /*
> + * Send a dummy byte so qemu_fill_buffer on the receiving side does not
> + * fail with a len=0 error. Flush first to maintain ordering wrt other
> + * data.
> + */
> +
> + qemu_fflush(f);
> + if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
> + error_report_err(error_copy(err));
> + qemu_file_set_error_obj(f, -EIO, err);
> + ret = -1;
> + }
> + trace_qemu_file_put_fd(f->ioc->name, fd, ret);
> + return 0;
I think you intended return ret?
> +}
> +
> +int qemu_file_get_fd(QEMUFile *f)
> +{
> + int fd = -1;
> + FdEntry *fde;
> +
> + if (!f->fd_pass) {
> + Error *err = NULL;
> + error_setg(&err, "%s does not support fd passing", f->ioc->name);
> + error_report_err(error_copy(err));
> + qemu_file_set_error_obj(f, -EIO, err);
> + goto out;
> + }
> +
> + /* Force the dummy byte and its fd passenger to appear. */
> + qemu_peek_byte(f, 0);
> +
> + fde = QTAILQ_FIRST(&f->fds);
> + if (fde) {
> + qemu_get_byte(f); /* Drop the dummy byte */
> + fd = fde->fd;
> + QTAILQ_REMOVE(&f->fds, fde, entry);
> + }
> +out:
> + trace_qemu_file_get_fd(f->ioc->name, fd);
> + return fd;
> +}
> +
> /** Closes the file
> *
> * Returns negative error value if any error happened on previous operations or
> @@ -361,11 +430,17 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> */
> int qemu_fclose(QEMUFile *f)
> {
> + FdEntry *fde, *next;
> int ret = qemu_fflush(f);
> int ret2 = qio_channel_close(f->ioc, NULL);
> if (ret >= 0) {
> ret = ret2;
> }
> + QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
> + warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
> + close(fde->fd);
> + g_free(fde);
> + }
> g_clear_pointer(&f->ioc, object_unref);
> error_free(f->last_error_obj);
> g_free(f);
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 11c2120..3e47a20 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -79,5 +79,7 @@ size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
> off_t pos);
>
> QIOChannel *qemu_file_get_ioc(QEMUFile *file);
> +int qemu_file_put_fd(QEMUFile *f, int fd);
> +int qemu_file_get_fd(QEMUFile *f);
>
> #endif
> diff --git a/migration/trace-events b/migration/trace-events
> index 173f2c0..064b22d 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -88,6 +88,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
>
> # qemu-file.c
> qemu_file_fclose(void) ""
> +qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d"
> +qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d"
>
> # ram.c
> get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile
2024-08-02 8:20 ` Euan Turner
@ 2024-08-05 19:06 ` Steven Sistare
0 siblings, 0 replies; 30+ messages in thread
From: Steven Sistare @ 2024-08-05 19:06 UTC (permalink / raw)
To: Euan Turner, qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster
On 8/2/2024 4:20 AM, Euan Turner wrote:
> On 30/06/2024 20:44, Steve Sistare wrote:
>> Define functions to put/get file descriptors to/from a QEMUFile, for qio
>> channels that support SCM_RIGHTS. Maintain ordering such that
>> put(A), put(fd), put(B)
>> followed by
>> get(A), get(fd), get(B)
>> always succeeds. Other get orderings may succeed but are not guaranteed.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>> migration/qemu-file.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
>> migration/qemu-file.h | 2 ++
>> migration/trace-events | 2 ++
>> 3 files changed, 83 insertions(+), 4 deletions(-)
>>
>> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
>> index b6d2f58..424c27d 100644
>> --- a/migration/qemu-file.c
>> +++ b/migration/qemu-file.c
>> @@ -37,6 +37,11 @@
>> #define IO_BUF_SIZE 32768
>> #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
>> +typedef struct FdEntry {
>> + QTAILQ_ENTRY(FdEntry) entry;
>> + int fd;
>> +} FdEntry;
>> +
>> struct QEMUFile {
>> QIOChannel *ioc;
>> bool is_writable;
>> @@ -51,6 +56,9 @@ struct QEMUFile {
>> int last_error;
>> Error *last_error_obj;
>> +
>> + bool fd_pass;
>> + QTAILQ_HEAD(, FdEntry) fds;
>> };
>> /*
>> @@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
>> object_ref(ioc);
>> f->ioc = ioc;
>> f->is_writable = is_writable;
>> + f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
>> + QTAILQ_INIT(&f->fds);
>> return f;
>> }
>> @@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>> int len;
>> int pending;
>> Error *local_error = NULL;
>> + g_autofree int *fds = NULL;
>> + size_t nfd = 0;
>> + int **pfds = f->fd_pass ? &fds : NULL;
>> + size_t *pnfd = f->fd_pass ? &nfd : NULL;
>> assert(!qemu_file_is_writable(f));
>> @@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>> }
>> do {
>> - len = qio_channel_read(f->ioc,
>> - (char *)f->buf + pending,
>> - IO_BUF_SIZE - pending,
>> - &local_error);
>> + struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
>> + len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
>> + &local_error);
>> if (len == QIO_CHANNEL_ERR_BLOCK) {
>> if (qemu_in_coroutine()) {
>> qio_channel_yield(f->ioc, G_IO_IN);
>> @@ -348,9 +361,65 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>> qemu_file_set_error_obj(f, len, local_error);
>> }
>> + for (int i = 0; i < nfd; i++) {
>> + FdEntry *fde = g_new0(FdEntry, 1);
>> + fde->fd = fds[i];
>> + QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
>> + }
>> +
>> return len;
>> }
>> +int qemu_file_put_fd(QEMUFile *f, int fd)
>> +{
>> + int ret = 0;
>> + QIOChannel *ioc = qemu_file_get_ioc(f);
>> + Error *err = NULL;
>> + struct iovec iov = { (void *)" ", 1 };
>> +
>> + /*
>> + * Send a dummy byte so qemu_fill_buffer on the receiving side does not
>> + * fail with a len=0 error. Flush first to maintain ordering wrt other
>> + * data.
>> + */
>> +
>> + qemu_fflush(f);
>> + if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
>> + error_report_err(error_copy(err));
>> + qemu_file_set_error_obj(f, -EIO, err);
>> + ret = -1;
>> + }
>> + trace_qemu_file_put_fd(f->ioc->name, fd, ret);
>> + return 0;
> I think you intended return ret?
Yes, thanks - steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile
2024-06-30 19:44 ` [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile Steve Sistare
2024-08-02 8:20 ` Euan Turner
@ 2024-08-15 20:58 ` Peter Xu
2024-08-16 15:13 ` Steven Sistare
1 sibling, 1 reply; 30+ messages in thread
From: Peter Xu @ 2024-08-15 20:58 UTC (permalink / raw)
To: Steve Sistare; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On Sun, Jun 30, 2024 at 12:44:03PM -0700, Steve Sistare wrote:
> Define functions to put/get file descriptors to/from a QEMUFile, for qio
> channels that support SCM_RIGHTS. Maintain ordering such that
> put(A), put(fd), put(B)
> followed by
> get(A), get(fd), get(B)
> always succeeds. Other get orderings may succeed but are not guaranteed.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
It's a slight pity that we'll extend fd to qemufiles, rather than changing
qemufiles back to iochannels.. for the long term we want to remove qemufile.
Would you think we can start to introduce iochannel-compatible vmstate
loader from cpr-[exec/transfer] here? The idea is that we'd want
vmstate_load_iochannel() then take that from an iochannel and start getting
rid of qemufile API. It'll already bring two benefits:
- We don't need this patch then I assume, but stick with iochannel API
- We can have Error** as last parameter of vmstate_load_iochannel(), then
as we discussed in the other thread cpr_state_load() can fail with
explicit errors without error_report()s (and as you pointed out, the
load side of Error** support is yet missing)
There's a 3rd potential benefit, and will come to play when we want to
allow multifd threads to load device states / VMSDs at some point, as
multifd doesn't maintain qemufiles, but only iochannels.
I'm not sure whether it adds too much to you yet, but I'm curious how you
think about it.
Thanks,
> ---
> migration/qemu-file.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
> migration/qemu-file.h | 2 ++
> migration/trace-events | 2 ++
> 3 files changed, 83 insertions(+), 4 deletions(-)
>
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index b6d2f58..424c27d 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -37,6 +37,11 @@
> #define IO_BUF_SIZE 32768
> #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
>
> +typedef struct FdEntry {
> + QTAILQ_ENTRY(FdEntry) entry;
> + int fd;
> +} FdEntry;
> +
> struct QEMUFile {
> QIOChannel *ioc;
> bool is_writable;
> @@ -51,6 +56,9 @@ struct QEMUFile {
>
> int last_error;
> Error *last_error_obj;
> +
> + bool fd_pass;
> + QTAILQ_HEAD(, FdEntry) fds;
> };
>
> /*
> @@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
> object_ref(ioc);
> f->ioc = ioc;
> f->is_writable = is_writable;
> + f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
> + QTAILQ_INIT(&f->fds);
>
> return f;
> }
> @@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> int len;
> int pending;
> Error *local_error = NULL;
> + g_autofree int *fds = NULL;
> + size_t nfd = 0;
> + int **pfds = f->fd_pass ? &fds : NULL;
> + size_t *pnfd = f->fd_pass ? &nfd : NULL;
>
> assert(!qemu_file_is_writable(f));
>
> @@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> }
>
> do {
> - len = qio_channel_read(f->ioc,
> - (char *)f->buf + pending,
> - IO_BUF_SIZE - pending,
> - &local_error);
> + struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
> + len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
> + &local_error);
> if (len == QIO_CHANNEL_ERR_BLOCK) {
> if (qemu_in_coroutine()) {
> qio_channel_yield(f->ioc, G_IO_IN);
> @@ -348,9 +361,65 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> qemu_file_set_error_obj(f, len, local_error);
> }
>
> + for (int i = 0; i < nfd; i++) {
> + FdEntry *fde = g_new0(FdEntry, 1);
> + fde->fd = fds[i];
> + QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
> + }
> +
> return len;
> }
>
> +int qemu_file_put_fd(QEMUFile *f, int fd)
> +{
> + int ret = 0;
> + QIOChannel *ioc = qemu_file_get_ioc(f);
> + Error *err = NULL;
> + struct iovec iov = { (void *)" ", 1 };
> +
> + /*
> + * Send a dummy byte so qemu_fill_buffer on the receiving side does not
> + * fail with a len=0 error. Flush first to maintain ordering wrt other
> + * data.
> + */
> +
> + qemu_fflush(f);
> + if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
> + error_report_err(error_copy(err));
> + qemu_file_set_error_obj(f, -EIO, err);
> + ret = -1;
> + }
> + trace_qemu_file_put_fd(f->ioc->name, fd, ret);
> + return 0;
> +}
> +
> +int qemu_file_get_fd(QEMUFile *f)
> +{
> + int fd = -1;
> + FdEntry *fde;
> +
> + if (!f->fd_pass) {
> + Error *err = NULL;
> + error_setg(&err, "%s does not support fd passing", f->ioc->name);
> + error_report_err(error_copy(err));
> + qemu_file_set_error_obj(f, -EIO, err);
> + goto out;
> + }
> +
> + /* Force the dummy byte and its fd passenger to appear. */
> + qemu_peek_byte(f, 0);
> +
> + fde = QTAILQ_FIRST(&f->fds);
> + if (fde) {
> + qemu_get_byte(f); /* Drop the dummy byte */
> + fd = fde->fd;
> + QTAILQ_REMOVE(&f->fds, fde, entry);
> + }
> +out:
> + trace_qemu_file_get_fd(f->ioc->name, fd);
> + return fd;
> +}
> +
> /** Closes the file
> *
> * Returns negative error value if any error happened on previous operations or
> @@ -361,11 +430,17 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> */
> int qemu_fclose(QEMUFile *f)
> {
> + FdEntry *fde, *next;
> int ret = qemu_fflush(f);
> int ret2 = qio_channel_close(f->ioc, NULL);
> if (ret >= 0) {
> ret = ret2;
> }
> + QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
> + warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
> + close(fde->fd);
> + g_free(fde);
> + }
> g_clear_pointer(&f->ioc, object_unref);
> error_free(f->last_error_obj);
> g_free(f);
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 11c2120..3e47a20 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -79,5 +79,7 @@ size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
> off_t pos);
>
> QIOChannel *qemu_file_get_ioc(QEMUFile *file);
> +int qemu_file_put_fd(QEMUFile *f, int fd);
> +int qemu_file_get_fd(QEMUFile *f);
>
> #endif
> diff --git a/migration/trace-events b/migration/trace-events
> index 173f2c0..064b22d 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -88,6 +88,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
>
> # qemu-file.c
> qemu_file_fclose(void) ""
> +qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d"
> +qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d"
>
> # ram.c
> get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
> --
> 1.8.3.1
>
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile
2024-08-15 20:58 ` Peter Xu
@ 2024-08-16 15:13 ` Steven Sistare
2024-08-16 15:51 ` Peter Xu
0 siblings, 1 reply; 30+ messages in thread
From: Steven Sistare @ 2024-08-16 15:13 UTC (permalink / raw)
To: Peter Xu; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 8/15/2024 4:58 PM, Peter Xu wrote:
> On Sun, Jun 30, 2024 at 12:44:03PM -0700, Steve Sistare wrote:
>> Define functions to put/get file descriptors to/from a QEMUFile, for qio
>> channels that support SCM_RIGHTS. Maintain ordering such that
>> put(A), put(fd), put(B)
>> followed by
>> get(A), get(fd), get(B)
>> always succeeds. Other get orderings may succeed but are not guaranteed.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>
> It's a slight pity that we'll extend fd to qemufiles, rather than changing
> qemufiles back to iochannels.. for the long term we want to remove qemufile.
Thanks, I did not know that removing QEMUFile is a goal.
Currently QEMUFile buffers I/O in userland to reduce system calls. Do you
plan to eliminate that buffering, or move it to QIOChannel, perhaps in a
new QIOChannelBuffered class?
If you eliminate the buffering, then migration might take longer.
If you keep the buffering, then this patch's logic will still be needed
in the QIOChannelBuffered code.
> Would you think we can start to introduce iochannel-compatible vmstate
> loader from cpr-[exec/transfer] here? The idea is that we'd want
> vmstate_load_iochannel() then take that from an iochannel and start getting
> rid of qemufile API. It'll already bring two benefits:
>
> - We don't need this patch then I assume, but stick with iochannel API
>
> - We can have Error** as last parameter of vmstate_load_iochannel(), then
> as we discussed in the other thread cpr_state_load() can fail with
> explicit errors without error_report()s (and as you pointed out, the
> load side of Error** support is yet missing)
>
> There's a 3rd potential benefit, and will come to play when we want to
> allow multifd threads to load device states / VMSDs at some point, as
> multifd doesn't maintain qemufiles, but only iochannels.
>
> I'm not sure whether it adds too much to you yet, but I'm curious how you
> think about it.
A decent idea, but the task quickly mushrooms. All of the VMSTATE macros used
in cpr.c would need to be converted, and that stack is deep. eg:
VMSTATE_INT32
vmstate_info_int32
put_int32
qemu_put_sbe32s
qemu_put_byte
add_buf_to_iovec
qemu_fflush
qio_channel_writev_all
- Steve
>> ---
>> migration/qemu-file.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
>> migration/qemu-file.h | 2 ++
>> migration/trace-events | 2 ++
>> 3 files changed, 83 insertions(+), 4 deletions(-)
>>
>> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
>> index b6d2f58..424c27d 100644
>> --- a/migration/qemu-file.c
>> +++ b/migration/qemu-file.c
>> @@ -37,6 +37,11 @@
>> #define IO_BUF_SIZE 32768
>> #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
>>
>> +typedef struct FdEntry {
>> + QTAILQ_ENTRY(FdEntry) entry;
>> + int fd;
>> +} FdEntry;
>> +
>> struct QEMUFile {
>> QIOChannel *ioc;
>> bool is_writable;
>> @@ -51,6 +56,9 @@ struct QEMUFile {
>>
>> int last_error;
>> Error *last_error_obj;
>> +
>> + bool fd_pass;
>> + QTAILQ_HEAD(, FdEntry) fds;
>> };
>>
>> /*
>> @@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
>> object_ref(ioc);
>> f->ioc = ioc;
>> f->is_writable = is_writable;
>> + f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
>> + QTAILQ_INIT(&f->fds);
>>
>> return f;
>> }
>> @@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>> int len;
>> int pending;
>> Error *local_error = NULL;
>> + g_autofree int *fds = NULL;
>> + size_t nfd = 0;
>> + int **pfds = f->fd_pass ? &fds : NULL;
>> + size_t *pnfd = f->fd_pass ? &nfd : NULL;
>>
>> assert(!qemu_file_is_writable(f));
>>
>> @@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>> }
>>
>> do {
>> - len = qio_channel_read(f->ioc,
>> - (char *)f->buf + pending,
>> - IO_BUF_SIZE - pending,
>> - &local_error);
>> + struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
>> + len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
>> + &local_error);
>> if (len == QIO_CHANNEL_ERR_BLOCK) {
>> if (qemu_in_coroutine()) {
>> qio_channel_yield(f->ioc, G_IO_IN);
>> @@ -348,9 +361,65 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>> qemu_file_set_error_obj(f, len, local_error);
>> }
>>
>> + for (int i = 0; i < nfd; i++) {
>> + FdEntry *fde = g_new0(FdEntry, 1);
>> + fde->fd = fds[i];
>> + QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
>> + }
>> +
>> return len;
>> }
>>
>> +int qemu_file_put_fd(QEMUFile *f, int fd)
>> +{
>> + int ret = 0;
>> + QIOChannel *ioc = qemu_file_get_ioc(f);
>> + Error *err = NULL;
>> + struct iovec iov = { (void *)" ", 1 };
>> +
>> + /*
>> + * Send a dummy byte so qemu_fill_buffer on the receiving side does not
>> + * fail with a len=0 error. Flush first to maintain ordering wrt other
>> + * data.
>> + */
>> +
>> + qemu_fflush(f);
>> + if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
>> + error_report_err(error_copy(err));
>> + qemu_file_set_error_obj(f, -EIO, err);
>> + ret = -1;
>> + }
>> + trace_qemu_file_put_fd(f->ioc->name, fd, ret);
>> + return 0;
>> +}
>> +
>> +int qemu_file_get_fd(QEMUFile *f)
>> +{
>> + int fd = -1;
>> + FdEntry *fde;
>> +
>> + if (!f->fd_pass) {
>> + Error *err = NULL;
>> + error_setg(&err, "%s does not support fd passing", f->ioc->name);
>> + error_report_err(error_copy(err));
>> + qemu_file_set_error_obj(f, -EIO, err);
>> + goto out;
>> + }
>> +
>> + /* Force the dummy byte and its fd passenger to appear. */
>> + qemu_peek_byte(f, 0);
>> +
>> + fde = QTAILQ_FIRST(&f->fds);
>> + if (fde) {
>> + qemu_get_byte(f); /* Drop the dummy byte */
>> + fd = fde->fd;
>> + QTAILQ_REMOVE(&f->fds, fde, entry);
>> + }
>> +out:
>> + trace_qemu_file_get_fd(f->ioc->name, fd);
>> + return fd;
>> +}
>> +
>> /** Closes the file
>> *
>> * Returns negative error value if any error happened on previous operations or
>> @@ -361,11 +430,17 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>> */
>> int qemu_fclose(QEMUFile *f)
>> {
>> + FdEntry *fde, *next;
>> int ret = qemu_fflush(f);
>> int ret2 = qio_channel_close(f->ioc, NULL);
>> if (ret >= 0) {
>> ret = ret2;
>> }
>> + QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
>> + warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
>> + close(fde->fd);
>> + g_free(fde);
>> + }
>> g_clear_pointer(&f->ioc, object_unref);
>> error_free(f->last_error_obj);
>> g_free(f);
>> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
>> index 11c2120..3e47a20 100644
>> --- a/migration/qemu-file.h
>> +++ b/migration/qemu-file.h
>> @@ -79,5 +79,7 @@ size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
>> off_t pos);
>>
>> QIOChannel *qemu_file_get_ioc(QEMUFile *file);
>> +int qemu_file_put_fd(QEMUFile *f, int fd);
>> +int qemu_file_get_fd(QEMUFile *f);
>>
>> #endif
>> diff --git a/migration/trace-events b/migration/trace-events
>> index 173f2c0..064b22d 100644
>> --- a/migration/trace-events
>> +++ b/migration/trace-events
>> @@ -88,6 +88,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
>>
>> # qemu-file.c
>> qemu_file_fclose(void) ""
>> +qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d"
>> +qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d"
>>
>> # ram.c
>> get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
>> --
>> 1.8.3.1
>>
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile
2024-08-16 15:13 ` Steven Sistare
@ 2024-08-16 15:51 ` Peter Xu
0 siblings, 0 replies; 30+ messages in thread
From: Peter Xu @ 2024-08-16 15:51 UTC (permalink / raw)
To: Steven Sistare
Cc: qemu-devel, Fabiano Rosas, Markus Armbruster,
Daniel P. Berrangé
On Fri, Aug 16, 2024 at 11:13:20AM -0400, Steven Sistare wrote:
> On 8/15/2024 4:58 PM, Peter Xu wrote:
> > On Sun, Jun 30, 2024 at 12:44:03PM -0700, Steve Sistare wrote:
> > > Define functions to put/get file descriptors to/from a QEMUFile, for qio
> > > channels that support SCM_RIGHTS. Maintain ordering such that
> > > put(A), put(fd), put(B)
> > > followed by
> > > get(A), get(fd), get(B)
> > > always succeeds. Other get orderings may succeed but are not guaranteed.
> > >
> > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> >
> > It's a slight pity that we'll extend fd to qemufiles, rather than changing
> > qemufiles back to iochannels.. for the long term we want to remove qemufile.
>
> Thanks, I did not know that removing QEMUFile is a goal.
>
> Currently QEMUFile buffers I/O in userland to reduce system calls. Do you
> plan to eliminate that buffering, or move it to QIOChannel, perhaps in a
> new QIOChannelBuffered class?
>
> If you eliminate the buffering, then migration might take longer.
> If you keep the buffering, then this patch's logic will still be needed
> in the QIOChannelBuffered code.
+Dan.
Yes probably the buffering is still needed ultimately. It's just that it
currently involves qemufile which is (hopefully..) destined to be either
removed or updated.
For this cpr states, I'm not sure the buffered iochannel is a must, e.g. I
wonder what happens if we start with using iochannels directly without
buffering, then after replacing that with buffered iochannels, it'll simply
improve on top without changing much of the code - I mean, buffered
iochannel should still be able to be casted into an iochannel anyway.
>
> > Would you think we can start to introduce iochannel-compatible vmstate
> > loader from cpr-[exec/transfer] here? The idea is that we'd want
> > vmstate_load_iochannel() then take that from an iochannel and start getting
> > rid of qemufile API. It'll already bring two benefits:
> >
> > - We don't need this patch then I assume, but stick with iochannel API
> >
> > - We can have Error** as last parameter of vmstate_load_iochannel(), then
> > as we discussed in the other thread cpr_state_load() can fail with
> > explicit errors without error_report()s (and as you pointed out, the
> > load side of Error** support is yet missing)
> >
> > There's a 3rd potential benefit, and will come to play when we want to
> > allow multifd threads to load device states / VMSDs at some point, as
> > multifd doesn't maintain qemufiles, but only iochannels.
> >
> > I'm not sure whether it adds too much to you yet, but I'm curious how you
> > think about it.
>
> A decent idea, but the task quickly mushrooms. All of the VMSTATE macros used
> in cpr.c would need to be converted, and that stack is deep. eg:
>
> VMSTATE_INT32
> vmstate_info_int32
> put_int32
> qemu_put_sbe32s
> qemu_put_byte
> add_buf_to_iovec
> qemu_fflush
> qio_channel_writev_all
Right, right after I sent the email I noticed this too..
The chance is the new iochannel API only resolves whatever needed for cpr
early states, currently:
static const VMStateDescription vmstate_cpr_state = {
.name = CPR_STATE,
.version_id = 1,
.minimum_version_id = 1,
.pre_save = cpr_state_presave,
.fields = (VMStateField[]) {
VMSTATE_UINT32(mode, CprState),
VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_cpr_fd = {
.name = "cpr fd",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(namelen, CprFd),
VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
VMSTATE_INT32(id, CprFd),
VMSTATE_FD(fd, CprFd),
VMSTATE_END_OF_LIST()
}
};
IIUC, a summary of a few things only so far: UINT32/INT32, VBUFFER, FD.
Here FD is the new one, which we'll need to support that anyway, either
with the qemufile API or iochannel API. Then the rest looks pretty
limited. IOW, the new iochannel-based vmstate_load() doesn't yet need to
know anything else but these types of fields. While I don't think I have a
full grasp yet on everything; that just sounds like the right direction to
go for the longer term.
Said that, as you said there can still be plenty of work there. Steve,
feel free to think about it and take whatever approach you like.
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* [RFC V1 2/6] migration: VMSTATE_FD
2024-06-30 19:44 [RFC V1 0/6] Live update: cpr-transfer Steve Sistare
2024-06-30 19:44 ` [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile Steve Sistare
@ 2024-06-30 19:44 ` Steve Sistare
2024-06-30 19:44 ` [RFC V1 3/6] migration: cpr-transfer save and load Steve Sistare
` (4 subsequent siblings)
6 siblings, 0 replies; 30+ messages in thread
From: Steve Sistare @ 2024-06-30 19:44 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster, Steve Sistare
Define VMSTATE_FD for declaring a file descriptor field in a
VMStateDescription.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
include/migration/vmstate.h | 9 +++++++++
migration/vmstate-types.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+)
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index f313f2f..a1dfab4 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -230,6 +230,7 @@ extern const VMStateInfo vmstate_info_uint8;
extern const VMStateInfo vmstate_info_uint16;
extern const VMStateInfo vmstate_info_uint32;
extern const VMStateInfo vmstate_info_uint64;
+extern const VMStateInfo vmstate_info_fd;
/** Put this in the stream when migrating a null pointer.*/
#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
@@ -902,6 +903,9 @@ extern const VMStateInfo vmstate_info_qlist;
#define VMSTATE_UINT64_V(_f, _s, _v) \
VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
+#define VMSTATE_FD_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_fd, int32_t)
+
#ifdef CONFIG_LINUX
#define VMSTATE_U8_V(_f, _s, _v) \
@@ -936,6 +940,9 @@ extern const VMStateInfo vmstate_info_qlist;
#define VMSTATE_UINT64(_f, _s) \
VMSTATE_UINT64_V(_f, _s, 0)
+#define VMSTATE_FD(_f, _s) \
+ VMSTATE_FD_V(_f, _s, 0)
+
#ifdef CONFIG_LINUX
#define VMSTATE_U8(_f, _s) \
@@ -1009,6 +1016,8 @@ extern const VMStateInfo vmstate_info_qlist;
#define VMSTATE_UINT64_TEST(_f, _s, _t) \
VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint64, uint64_t)
+#define VMSTATE_FD_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_fd, int32_t)
#define VMSTATE_TIMER_PTR_TEST(_f, _s, _test) \
VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index e83bfcc..6e45a4a 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -314,6 +314,38 @@ const VMStateInfo vmstate_info_uint64 = {
.put = put_uint64,
};
+/* File descriptor communicated via SCM_RIGHTS */
+
+static int get_fd(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field)
+{
+ int32_t *v = pv;
+ qemu_get_sbe32s(f, v);
+ if (*v < 0) {
+ return 0;
+ }
+ *v = qemu_file_get_fd(f);
+ return 0;
+}
+
+static int put_fd(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field, JSONWriter *vmdesc)
+{
+ int32_t *v = pv;
+
+ qemu_put_sbe32s(f, v);
+ if (*v < 0) {
+ return 0;
+ }
+ return qemu_file_put_fd(f, *v);
+}
+
+const VMStateInfo vmstate_info_fd = {
+ .name = "fd",
+ .get = get_fd,
+ .put = put_fd,
+};
+
static int get_nullptr(QEMUFile *f, void *pv, size_t size,
const VMStateField *field)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [RFC V1 3/6] migration: cpr-transfer save and load
2024-06-30 19:44 [RFC V1 0/6] Live update: cpr-transfer Steve Sistare
2024-06-30 19:44 ` [RFC V1 1/6] migration: SCM_RIGHTS for QEMUFile Steve Sistare
2024-06-30 19:44 ` [RFC V1 2/6] migration: VMSTATE_FD Steve Sistare
@ 2024-06-30 19:44 ` Steve Sistare
2024-06-30 19:44 ` [RFC V1 4/6] migration: cpr-uri parameter Steve Sistare
` (3 subsequent siblings)
6 siblings, 0 replies; 30+ messages in thread
From: Steve Sistare @ 2024-06-30 19:44 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster, Steve Sistare
Add functions to create a QEMUFile based on a unix URI, for saving or
loading, for use by cpr-transfer mode to preserve CPR state.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
include/migration/cpr.h | 3 ++
migration/cpr-transfer.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
migration/meson.build | 1 +
3 files changed, 85 insertions(+)
create mode 100644 migration/cpr-transfer.c
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index c6c60f8..9aae94c 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -32,4 +32,7 @@ bool cpr_exec_has_state(void);
void cpr_exec_unpersist_state(void);
void cpr_mig_init(void);
void cpr_unpreserve_fds(void);
+
+QEMUFile *cpr_transfer_output(const char *uri, Error **errp);
+QEMUFile *cpr_transfer_input(const char *uri, Error **errp);
#endif
diff --git a/migration/cpr-transfer.c b/migration/cpr-transfer.c
new file mode 100644
index 0000000..fb9ecd8
--- /dev/null
+++ b/migration/cpr-transfer.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022, 2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "io/channel-file.h"
+#include "io/channel-socket.h"
+#include "io/net-listener.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/savevm.h"
+#include "migration/qemu-file.h"
+#include "migration/vmstate.h"
+
+QEMUFile *cpr_transfer_output(const char *uri, Error **errp)
+{
+ g_autoptr(MigrationChannel) channel = NULL;
+ QIOChannel *ioc;
+
+ if (!migrate_uri_parse(uri, &channel, errp)) {
+ return NULL;
+ }
+
+ if (channel->addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
+ channel->addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) {
+
+ QIOChannelSocket *sioc = qio_channel_socket_new();
+ SocketAddress *saddr = &channel->addr->u.socket;
+
+ if (qio_channel_socket_connect_sync(sioc, saddr, errp)) {
+ object_unref(OBJECT(sioc));
+ return NULL;
+ }
+ ioc = QIO_CHANNEL(sioc);
+
+ } else {
+ error_setg(errp, "bad cpr-uri %s; must be unix:", uri);
+ return NULL;
+ }
+
+ qio_channel_set_name(ioc, "cpr-out");
+ return qemu_file_new_output(ioc);
+}
+
+QEMUFile *cpr_transfer_input(const char *uri, Error **errp)
+{
+ g_autoptr(MigrationChannel) channel = NULL;
+ QIOChannel *ioc;
+
+ if (!migrate_uri_parse(uri, &channel, errp)) {
+ return NULL;
+ }
+
+ if (channel->addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
+ channel->addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) {
+
+ QIOChannelSocket *sioc;
+ SocketAddress *saddr = &channel->addr->u.socket;
+ QIONetListener *listener = qio_net_listener_new();
+
+ qio_net_listener_set_name(listener, "cpr-socket-listener");
+ if (qio_net_listener_open_sync(listener, saddr, 1, errp) < 0) {
+ object_unref(OBJECT(listener));
+ return NULL;
+ }
+
+ sioc = qio_net_listener_wait_client(listener);
+ ioc = QIO_CHANNEL(sioc);
+
+ } else {
+ error_setg(errp, "bad cpr-uri %s; must be unix:", uri);
+ return NULL;
+ }
+
+ qio_channel_set_name(ioc, "cpr-in");
+ return qemu_file_new_input(ioc);
+}
diff --git a/migration/meson.build b/migration/meson.build
index dd1d315..f722980 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -15,6 +15,7 @@ system_ss.add(files(
'channel-block.c',
'cpr.c',
'cpr-exec.c',
+ 'cpr-transfer.c',
'dirtyrate.c',
'exec.c',
'fd.c',
--
1.8.3.1
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [RFC V1 4/6] migration: cpr-uri parameter
2024-06-30 19:44 [RFC V1 0/6] Live update: cpr-transfer Steve Sistare
` (2 preceding siblings ...)
2024-06-30 19:44 ` [RFC V1 3/6] migration: cpr-transfer save and load Steve Sistare
@ 2024-06-30 19:44 ` Steve Sistare
2024-08-15 20:46 ` Peter Xu
2024-06-30 19:44 ` [RFC V1 5/6] migration: cpr-uri option Steve Sistare
` (2 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Steve Sistare @ 2024-06-30 19:44 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster, Steve Sistare
Define the cpr-uri migration parameter to specify the URI to which
CPR vmstate is saved for cpr-transfer mode.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
migration/migration-hmp-cmds.c | 10 ++++++++++
migration/options.c | 29 +++++++++++++++++++++++++++++
migration/options.h | 1 +
qapi/migration.json | 12 ++++++++++++
4 files changed, 52 insertions(+)
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 16a4b00..4ede831 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -371,6 +371,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
params->direct_io ? "on" : "off");
}
+ assert(params->cpr_uri);
+ monitor_printf(mon, "%s: '%s'\n",
+ MigrationParameter_str(MIGRATION_PARAMETER_CPR_URI),
+ params->cpr_uri);
+
assert(params->has_cpr_exec_command);
monitor_print_cpr_exec_command(mon, params->cpr_exec_command);
}
@@ -650,6 +655,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_direct_io = true;
visit_type_bool(v, param, &p->direct_io, &err);
break;
+ case MIGRATION_PARAMETER_CPR_URI:
+ p->cpr_uri = g_new0(StrOrNull, 1);
+ p->cpr_uri->type = QTYPE_QSTRING;
+ visit_type_str(v, param, &p->cpr_uri->u.s, &err);
+ break;
case MIGRATION_PARAMETER_CPR_EXEC_COMMAND: {
g_autofree char **strv = g_strsplit(valuestr ?: "", " ", -1);
strList **tail = &p->cpr_exec_command;
diff --git a/migration/options.c b/migration/options.c
index b8d5f72..7526f9f 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -163,6 +163,8 @@ Property migration_properties[] = {
DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
ZERO_PAGE_DETECTION_MULTIFD),
+ DEFINE_PROP_STRING("cpr-uri", MigrationState,
+ parameters.cpr_uri),
/* Migration capabilities */
DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -848,6 +850,13 @@ ZeroPageDetection migrate_zero_page_detection(void)
return s->parameters.zero_page_detection;
}
+const char *migrate_cpr_uri(void)
+{
+ MigrationState *s = migrate_get_current();
+
+ return s->parameters.cpr_uri;
+}
+
/* parameters helpers */
AnnounceParameters *migrate_announce_params(void)
@@ -931,6 +940,7 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->zero_page_detection = s->parameters.zero_page_detection;
params->has_direct_io = true;
params->direct_io = s->parameters.direct_io;
+ params->cpr_uri = g_strdup(s->parameters.cpr_uri);
params->has_cpr_exec_command = true;
params->cpr_exec_command = QAPI_CLONE(strList,
s->parameters.cpr_exec_command);
@@ -967,6 +977,7 @@ void migrate_params_init(MigrationParameters *params)
params->has_mode = true;
params->has_zero_page_detection = true;
params->has_direct_io = true;
+ params->cpr_uri = g_strdup("");
params->has_cpr_exec_command = true;
}
@@ -1257,9 +1268,15 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
dest->direct_io = params->direct_io;
}
+ if (params->cpr_uri) {
+ assert(params->cpr_uri->type == QTYPE_QSTRING);
+ dest->cpr_uri = params->cpr_uri->u.s;
+ }
+
if (params->has_cpr_exec_command) {
dest->cpr_exec_command = params->cpr_exec_command;
}
+
}
static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1390,6 +1407,12 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
s->parameters.direct_io = params->direct_io;
}
+ if (params->cpr_uri) {
+ g_free(s->parameters.cpr_uri);
+ assert(params->cpr_uri->type == QTYPE_QSTRING);
+ s->parameters.cpr_uri = g_strdup(params->cpr_uri->u.s);
+ }
+
if (params->has_cpr_exec_command) {
qapi_free_strList(s->parameters.cpr_exec_command);
s->parameters.cpr_exec_command =
@@ -1421,6 +1444,12 @@ void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
params->tls_authz->u.s = strdup("");
}
+ if (params->cpr_uri && params->cpr_uri->type == QTYPE_QNULL) {
+ qobject_unref(params->cpr_uri->u.n);
+ params->cpr_uri->type = QTYPE_QSTRING;
+ params->cpr_uri->u.s = strdup("");
+ }
+
migrate_params_test_apply(params, &tmp);
if (!migrate_params_check(&tmp, errp)) {
diff --git a/migration/options.h b/migration/options.h
index a239702..7492fcf 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -85,6 +85,7 @@ const char *migrate_tls_creds(void);
const char *migrate_tls_hostname(void);
uint64_t migrate_xbzrle_cache_size(void);
ZeroPageDetection migrate_zero_page_detection(void);
+const char *migrate_cpr_uri(void);
/* parameters helpers */
diff --git a/qapi/migration.json b/qapi/migration.json
index 4e626df..df62456 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -851,6 +851,9 @@
# only has effect if the @mapped-ram capability is enabled.
# (Since 9.1)
#
+# @cpr-uri: URI for an additional migration channel needed by
+# @cpr-transfer mode. (Since 9.1)
+#
# @cpr-exec-command: Command to start the new QEMU process when @mode
# is @cpr-exec. The first list element is the program's filename,
# the remainder its arguments. (Since 9.1)
@@ -881,6 +884,7 @@
'mode',
'zero-page-detection',
'direct-io',
+ 'cpr-uri',
'cpr-exec-command'] }
##
@@ -1031,6 +1035,9 @@
# only has effect if the @mapped-ram capability is enabled.
# (Since 9.1)
#
+# @cpr-uri: URI for an additional migration channel needed by
+# @cpr-transfer mode. (Since 9.1)
+#
# @cpr-exec-command: Command to start the new QEMU process when @mode
# is @cpr-exec. The first list element is the program's filename,
# the remainder its arguments. (Since 9.1)
@@ -1076,6 +1083,7 @@
'*mode': 'MigMode',
'*zero-page-detection': 'ZeroPageDetection',
'*direct-io': 'bool',
+ '*cpr-uri': 'StrOrNull',
'*cpr-exec-command': [ 'str' ]} }
##
@@ -1240,6 +1248,9 @@
# only has effect if the @mapped-ram capability is enabled.
# (Since 9.1)
#
+# @cpr-uri: URI for an additional migration channel needed by
+# @cpr-transfer mode. (Since 9.1)
+#
# @cpr-exec-command: Command to start the new QEMU process when @mode
# is @cpr-exec. The first list element is the program's filename,
# the remainder its arguments. (Since 9.1)
@@ -1282,6 +1293,7 @@
'*mode': 'MigMode',
'*zero-page-detection': 'ZeroPageDetection',
'*direct-io': 'bool',
+ '*cpr-uri': 'str',
'*cpr-exec-command': [ 'str' ]} }
##
--
1.8.3.1
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [RFC V1 4/6] migration: cpr-uri parameter
2024-06-30 19:44 ` [RFC V1 4/6] migration: cpr-uri parameter Steve Sistare
@ 2024-08-15 20:46 ` Peter Xu
2024-08-16 15:13 ` Steven Sistare
0 siblings, 1 reply; 30+ messages in thread
From: Peter Xu @ 2024-08-15 20:46 UTC (permalink / raw)
To: Steve Sistare; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On Sun, Jun 30, 2024 at 12:44:06PM -0700, Steve Sistare wrote:
> Define the cpr-uri migration parameter to specify the URI to which
> CPR vmstate is saved for cpr-transfer mode.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
So I left the idea in my reply to the cover letter to reuse "-cpr-uri" in
qemu cmdline on dst node, and I wonder if it'll also save this one on src
with the same idea.
In general, with cpr-transfer we always stick with unix socket, and only
one pair of it.
When migration starts, cpr_state_save() dumps things and cpr_state_load()
loads things. Then the socket gets reused on both sides to be the
migration channel.
IIUC we can already reuse QMP command URI on src so we don't need anything
new. On dst, we may need "-incoming-cpr" to tell QEMU we need to kick off
CPR early loads; it's a pity we don't have way to specify migration mode
there, otherwise IIUC we can even save "-incoming-cpr" but reuse "-incoming".
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 4/6] migration: cpr-uri parameter
2024-08-15 20:46 ` Peter Xu
@ 2024-08-16 15:13 ` Steven Sistare
0 siblings, 0 replies; 30+ messages in thread
From: Steven Sistare @ 2024-08-16 15:13 UTC (permalink / raw)
To: Peter Xu; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 8/15/2024 4:46 PM, Peter Xu wrote:
> On Sun, Jun 30, 2024 at 12:44:06PM -0700, Steve Sistare wrote:
>> Define the cpr-uri migration parameter to specify the URI to which
>> CPR vmstate is saved for cpr-transfer mode.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>
> So I left the idea in my reply to the cover letter to reuse "-cpr-uri" in
> qemu cmdline on dst node, and I wonder if it'll also save this one on src
> with the same idea.
>
> In general, with cpr-transfer we always stick with unix socket, and only
> one pair of it.
>
> When migration starts, cpr_state_save() dumps things and cpr_state_load()
> loads things. Then the socket gets reused on both sides to be the
> migration channel.
>
> IIUC we can already reuse QMP command URI on src so we don't need anything
> new. On dst, we may need "-incoming-cpr" to tell QEMU we need to kick off
> CPR early loads; it's a pity we don't have way to specify migration mode
> there, otherwise IIUC we can even save "-incoming-cpr" but reuse "-incoming".
I also considered using the existing migration URI on both the source and target,
and accepting it twice, by defining an -incoming-mode parameter. But, I still
think it would be a dis-service to our users to eliminate all flexibility for the
URI type.
- Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* [RFC V1 5/6] migration: cpr-uri option
2024-06-30 19:44 [RFC V1 0/6] Live update: cpr-transfer Steve Sistare
` (3 preceding siblings ...)
2024-06-30 19:44 ` [RFC V1 4/6] migration: cpr-uri parameter Steve Sistare
@ 2024-06-30 19:44 ` Steve Sistare
2024-06-30 19:44 ` [RFC V1 6/6] migration: cpr-transfer mode Steve Sistare
2024-07-18 15:36 ` [RFC V1 0/6] Live update: cpr-transfer Peter Xu
6 siblings, 0 replies; 30+ messages in thread
From: Steve Sistare @ 2024-06-30 19:44 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster, Steve Sistare
Define the cpr-uri QEMU command-line option to specify the URI from
which CPR vmstate is loaded for cpr-transfer mode.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
include/migration/cpr.h | 1 +
migration/cpr.c | 7 +++++++
qemu-options.hx | 8 ++++++++
system/vl.c | 3 +++
4 files changed, 19 insertions(+)
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index 9aae94c..bc62493 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -22,6 +22,7 @@ int cpr_find_fd(const char *name, int id);
int cpr_walk_fd(cpr_walk_fd_cb cb);
void cpr_resave_fd(const char *name, int id, int fd);
+void cpr_set_cpr_uri(const char *uri);
int cpr_state_save(Error **errp);
int cpr_state_load(Error **errp);
diff --git a/migration/cpr.c b/migration/cpr.c
index f756c15..50c130c 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -157,6 +157,13 @@ static const VMStateDescription vmstate_cpr_state = {
};
/*************************************************************************/
+static char *cpr_uri;
+
+void cpr_set_cpr_uri(const char *uri)
+{
+ cpr_uri = g_strdup(uri);
+}
+
int cpr_state_save(Error **errp)
{
int ret;
diff --git a/qemu-options.hx b/qemu-options.hx
index 595b693..a6b5253 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4776,6 +4776,14 @@ SRST
ERST
+DEF("cpr-uri", HAS_ARG, QEMU_OPTION_cpr_uri, \
+ "-cpr-uri unix:socketpath\n",
+ QEMU_ARCH_ALL)
+SRST
+``-cpr-uri unix:socketpath``
+ URI for incoming CPR state, for the cpr-transfer migration mode.
+ERST
+
DEF("incoming", HAS_ARG, QEMU_OPTION_incoming, \
"-incoming tcp:[host]:port[,to=maxport][,ipv4=on|off][,ipv6=on|off]\n" \
"-incoming rdma:host:port[,ipv4=on|off][,ipv6=on|off]\n" \
diff --git a/system/vl.c b/system/vl.c
index 6521ee3..32015ac 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -3483,6 +3483,9 @@ void qemu_init(int argc, char **argv)
exit(1);
}
break;
+ case QEMU_OPTION_cpr_uri:
+ cpr_set_cpr_uri(optarg);
+ break;
case QEMU_OPTION_incoming:
if (!incoming) {
runstate_set(RUN_STATE_INMIGRATE);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [RFC V1 6/6] migration: cpr-transfer mode
2024-06-30 19:44 [RFC V1 0/6] Live update: cpr-transfer Steve Sistare
` (4 preceding siblings ...)
2024-06-30 19:44 ` [RFC V1 5/6] migration: cpr-uri option Steve Sistare
@ 2024-06-30 19:44 ` Steve Sistare
2024-08-13 21:27 ` Steven Sistare
2024-07-18 15:36 ` [RFC V1 0/6] Live update: cpr-transfer Peter Xu
6 siblings, 1 reply; 30+ messages in thread
From: Steve Sistare @ 2024-06-30 19:44 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster, Steve Sistare
Add the cpr-transfer migration mode. Usage:
qemu-system-$arch -machine anon-alloc=memfd ...
start new QEMU with "-incoming <uri-1> -cpr-uri <uri-2>"
Issue commands to old QEMU:
migrate_set_parameter mode cpr-transfer
migrate_set_parameter cpr-uri <uri-2>
migrate -d <uri-1>
The migrate command stops the VM, saves CPR state to uri-2, saves
normal migration state to uri-1, and old QEMU enters the postmigrate
state. The user starts new QEMU on the same host as old QEMU, with the
same arguments as old QEMU, plus the -incoming option. Guest RAM is
preserved in place, albeit with new virtual addresses in new QEMU.
This mode requires a second migration channel, specified by the
cpr-uri migration property on the outgoing side, and by the cpr-uri
QEMU command-line option on the incoming side. The channel must
be a type, such as unix socket, that supports SCM_RIGHTS.
Memory-backend objects must have the share=on attribute, but
memory-backend-epc is not supported. The VM must be started with
the '-machine anon-alloc=memfd' option, which allows anonymous
memory to be transferred in place to the new process. The memfds
are kept open by sending the descriptors to new QEMU via the
cpr-uri, which must support SCM_RIGHTS, and they are mmap'd
in new QEMU.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
migration/cpr.c | 9 ++++++++-
migration/migration.c | 37 +++++++++++++++++++++++++++++++++++++
migration/ram.c | 1 +
migration/vmstate-types.c | 5 +++--
qapi/migration.json | 26 +++++++++++++++++++++++++-
stubs/vmstate.c | 7 +++++++
6 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/migration/cpr.c b/migration/cpr.c
index 50c130c..7ac01a9 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -58,7 +58,7 @@ static const VMStateDescription vmstate_cpr_fd = {
VMSTATE_UINT32(namelen, CprFd),
VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
VMSTATE_INT32(id, CprFd),
- VMSTATE_INT32(fd, CprFd),
+ VMSTATE_FD(fd, CprFd),
VMSTATE_END_OF_LIST()
}
};
@@ -172,6 +172,8 @@ int cpr_state_save(Error **errp)
if (mode == MIG_MODE_CPR_EXEC) {
f = cpr_exec_output(errp);
+ } else if (mode == MIG_MODE_CPR_TRANSFER) {
+ f = cpr_transfer_output(migrate_cpr_uri(), errp);
} else {
return 0;
}
@@ -209,6 +211,11 @@ int cpr_state_load(Error **errp)
*/
if (cpr_exec_has_state()) {
f = cpr_exec_input(errp);
+ if (cpr_uri) {
+ warn_report("ignoring cpr-uri option for migration mode cpr-exec");
+ }
+ } else if (cpr_uri) {
+ f = cpr_transfer_input(cpr_uri, errp);
} else {
return 0;
}
diff --git a/migration/migration.c b/migration/migration.c
index a4a020e..65a36a6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -77,6 +77,7 @@ static NotifierWithReturnList migration_state_notifiers[MIG_MODE__MAX] = {
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_EXEC),
+ NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER),
};
/* Messages sent on the return path from destination to source */
@@ -205,6 +206,12 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
return false;
}
+ if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
+ addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ error_setg(errp, "Migration requires streamable transport (eg unix)");
+ return false;
+ }
+
return true;
}
@@ -1697,6 +1704,7 @@ bool migrate_mode_is_cpr(MigrationState *s)
{
MigMode mode = s->parameters.mode;
return mode == MIG_MODE_CPR_REBOOT ||
+ mode == MIG_MODE_CPR_TRANSFER ||
mode == MIG_MODE_CPR_EXEC;
}
@@ -2038,6 +2046,12 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
return false;
}
+ if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
+ !s->parameters.cpr_uri) {
+ error_setg(errp, "cpr-transfer mode requires setting cpr-uri");
+ return false;
+ }
+
if (migration_is_blocked(errp)) {
return false;
}
@@ -2144,6 +2158,29 @@ void qmp_migrate(const char *uri, bool has_channels,
goto out;
}
+ /*
+ * For cpr-transfer mode, the target first reads CPR state, which cannot
+ * complete until cpr_state_save above finishes, then the target creates
+ * the migration channel and listens. We must wait for the channel to
+ * be created before connecting to it.
+ *
+ * This implementation of waiting is a hack. It restricts the channel
+ * type, and will loop forever if the target dies. It should be defined
+ * as a main-loop event that calls connect on the back end.
+ */
+ if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
+ SocketAddress *saddr = &addr->u.socket;
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
+ saddr->type == SOCKET_ADDRESS_TYPE_UNIX) {
+ while (access(saddr->u.fd.str, F_OK)) {
+ usleep(1000000);
+ }
+ } else {
+ error_setg(&local_err, "cpr-transfer requires a unix channel");
+ goto out;
+ }
+ }
+
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
SocketAddress *saddr = &addr->u.socket;
if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
diff --git a/migration/ram.c b/migration/ram.c
index 45b8f00..1e1e05e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -219,6 +219,7 @@ bool migrate_ram_is_ignored(RAMBlock *block)
MigMode mode = migrate_mode();
return !qemu_ram_is_migratable(block) ||
mode == MIG_MODE_CPR_EXEC ||
+ mode == MIG_MODE_CPR_TRANSFER ||
(migrate_ignore_shared() && qemu_ram_is_shared(block)
&& qemu_ram_is_named_file(block));
}
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index 6e45a4a..618b7fb 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -15,6 +15,7 @@
#include "qemu-file.h"
#include "migration.h"
#include "migration/vmstate.h"
+#include "migration/client-options.h"
#include "qemu/error-report.h"
#include "qemu/queue.h"
#include "trace.h"
@@ -321,7 +322,7 @@ static int get_fd(QEMUFile *f, void *pv, size_t size,
{
int32_t *v = pv;
qemu_get_sbe32s(f, v);
- if (*v < 0) {
+ if (*v < 0 || migrate_mode() == MIG_MODE_CPR_EXEC) {
return 0;
}
*v = qemu_file_get_fd(f);
@@ -334,7 +335,7 @@ static int put_fd(QEMUFile *f, void *pv, size_t size,
int32_t *v = pv;
qemu_put_sbe32s(f, v);
- if (*v < 0) {
+ if (*v < 0 || migrate_mode() == MIG_MODE_CPR_EXEC) {
return 0;
}
return qemu_file_put_fd(f, *v);
diff --git a/qapi/migration.json b/qapi/migration.json
index df62456..cd2d949 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -626,9 +626,33 @@
# with the '-machine anon-alloc=memfd' option.
#
# (since 9.1)
+#
+# @cpr-transfer: This mode allows the user to transfer a guest to a
+# new QEMU instance on the same host with minimal guest pause
+# time, by preserving guest RAM in place, albeit with new virtual
+# addresses in new QEMU.
+#
+# The user starts new QEMU on the same host as old QEMU, with the
+# the same arguments as old QEMU, plus the -incoming option. The
+# user issues the migrate command to old QEMU, which stops the VM,
+# saves state to the migration channels, and enters the postmigrate
+# state. Execution resumes in new QEMU. Guest RAM is preserved in
+# place, albeit with new virtual addresses in new QEMU.
+#
+# This mode requires a second migration channel, specified by the
+# cpr-uri migration property on the outgoing side, and by
+# the cpr-uri QEMU command-line option on the incoming
+# side. The channel must be a type, such as unix socket, that
+# supports SCM_RIGHTS.
+#
+# Memory-backend objects must have the share=on attribute, but
+# memory-backend-epc is not supported. The VM must be started
+# with the '-machine anon-alloc=memfd' option.
+#
+# (since 9.1)
##
{ 'enum': 'MigMode',
- 'data': [ 'normal', 'cpr-reboot', 'cpr-exec' ] }
+ 'data': [ 'normal', 'cpr-reboot', 'cpr-exec', 'cpr-transfer' ] }
##
# @ZeroPageDetection:
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index 8513d92..c190762 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,5 +1,7 @@
#include "qemu/osdep.h"
#include "migration/vmstate.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration/client-options.h"
int vmstate_register_with_alias_id(VMStateIf *obj,
uint32_t instance_id,
@@ -21,3 +23,8 @@ bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
{
return true;
}
+
+MigMode migrate_mode(void)
+{
+ return MIG_MODE_NORMAL;
+}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [RFC V1 6/6] migration: cpr-transfer mode
2024-06-30 19:44 ` [RFC V1 6/6] migration: cpr-transfer mode Steve Sistare
@ 2024-08-13 21:27 ` Steven Sistare
0 siblings, 0 replies; 30+ messages in thread
From: Steven Sistare @ 2024-08-13 21:27 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Xu, Fabiano Rosas, Markus Armbruster
On 6/30/2024 3:44 PM, Steve Sistare wrote:
> Add the cpr-transfer migration mode. Usage:
[...]
> @@ -2144,6 +2158,29 @@ void qmp_migrate(const char *uri, bool has_channels,
> goto out;
> }
>
> + /*
> + * For cpr-transfer mode, the target first reads CPR state, which cannot
> + * complete until cpr_state_save above finishes, then the target creates
> + * the migration channel and listens. We must wait for the channel to
> + * be created before connecting to it.
> + *
> + * This implementation of waiting is a hack. It restricts the channel
> + * type, and will loop forever if the target dies. It should be defined
> + * as a main-loop event that calls connect on the back end.
> + */
> + if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
> + SocketAddress *saddr = &addr->u.socket;
> + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
> + saddr->type == SOCKET_ADDRESS_TYPE_UNIX) {
> + while (access(saddr->u.fd.str, F_OK)) {
> + usleep(1000000);
> + }
I now have a proper fix for this handshake in my work space.
- Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-06-30 19:44 [RFC V1 0/6] Live update: cpr-transfer Steve Sistare
` (5 preceding siblings ...)
2024-06-30 19:44 ` [RFC V1 6/6] migration: cpr-transfer mode Steve Sistare
@ 2024-07-18 15:36 ` Peter Xu
2024-07-20 20:07 ` Steven Sistare
6 siblings, 1 reply; 30+ messages in thread
From: Peter Xu @ 2024-07-18 15:36 UTC (permalink / raw)
To: Steve Sistare; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On Sun, Jun 30, 2024 at 12:44:02PM -0700, Steve Sistare wrote:
> What?
>
> This patch series adds the live migration cpr-transfer mode, which
> allows the user to transfer a guest to a new QEMU instance on the same
> host. It is identical to cpr-exec in most respects, except as described
> below.
I definitely prefer this one more than the exec solution, thanks for trying
this out. It's a matter of whether we'll need both, my answer would be
no..
>
> The new user-visible interfaces are:
> * cpr-transfer (MigMode migration parameter)
> * cpr-uri (migration parameter)
I wonder whether this parameter can be avoided already, maybe we can let
cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
in the same channel?
> * cpr-uri (command-line argument)
>
> In this mode, the user starts new QEMU on the same host as old QEMU, with
> the same arguments as old QEMU, plus the -incoming and the -cpr-uri options.
> The user issues the migrate command to old QEMU, which stops the VM, saves
> state to the migration channels, and enters the postmigrate state. Execution
> resumes in new QEMU.
>
> This mode requires a second migration channel, specified by the cpr-uri
> migration property on the outgoing side, and by the cpr-uri QEMU command-line
> option on the incoming side. The channel must be a type, such as unix socket,
> that supports SCM_RIGHTS.
>
> This series depends on the series "Live update: cpr-exec mode".
>
> Why?
>
> cpr-transfer offers the same benefits as cpr-exec mode, but with a model
> for launching new QEMU that may be more natural for some management packages.
>
> How?
>
> The file descriptors are kept open by sending them to new QEMU via the
> cpr-uri, which must support SCM_RIGHTS.
>
> Example:
>
> In this example, we simply restart the same version of QEMU, but in
> a real scenario one would use a new QEMU binary path in terminal 2.
>
> Terminal 1: start old QEMU
> # qemu-kvm -monitor stdio -object
> memory-backend-file,id=ram0,size=4G,mem-path=/dev/shm/ram0,share=on
> -m 4G -machine anon-alloc=memfd ...
>
> Terminal 2: start new QEMU
> # qemu-kvm ... -incoming unix:vm.sock -cpr-uri unix:cpr.sock
>
> Terminal 1:
> QEMU 9.1.50 monitor - type 'help' for more information
> (qemu) info status
> VM status: running
> (qemu) migrate_set_parameter mode cpr-transfer
> (qemu) migrate_set_parameter cpr-uri unix:cpr.sock
> (qemu) migrate -d unix:vm.sock
> (qemu) info status
> VM status: paused (postmigrate)
>
> Terminal 2:
> QEMU 9.1.50 monitor - type 'help' for more information
> (qemu) info status
> VM status: running
>
> Steve Sistare (6):
> migration: SCM_RIGHTS for QEMUFile
> migration: VMSTATE_FD
> migration: cpr-transfer save and load
> migration: cpr-uri parameter
> migration: cpr-uri option
> migration: cpr-transfer mode
>
> include/migration/cpr.h | 4 ++
> include/migration/vmstate.h | 9 +++++
> migration/cpr-transfer.c | 81 +++++++++++++++++++++++++++++++++++++++++
> migration/cpr.c | 16 +++++++-
> migration/meson.build | 1 +
> migration/migration-hmp-cmds.c | 10 +++++
> migration/migration.c | 37 +++++++++++++++++++
> migration/options.c | 29 +++++++++++++++
> migration/options.h | 1 +
> migration/qemu-file.c | 83 ++++++++++++++++++++++++++++++++++++++++--
> migration/qemu-file.h | 2 +
> migration/ram.c | 1 +
> migration/trace-events | 2 +
> migration/vmstate-types.c | 33 +++++++++++++++++
> qapi/migration.json | 38 ++++++++++++++++++-
> qemu-options.hx | 8 ++++
> stubs/vmstate.c | 7 ++++
> system/vl.c | 3 ++
> 18 files changed, 359 insertions(+), 6 deletions(-)
> create mode 100644 migration/cpr-transfer.c
>
> --
> 1.8.3.1
>
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-07-18 15:36 ` [RFC V1 0/6] Live update: cpr-transfer Peter Xu
@ 2024-07-20 20:07 ` Steven Sistare
2024-08-15 20:28 ` Peter Xu
0 siblings, 1 reply; 30+ messages in thread
From: Steven Sistare @ 2024-07-20 20:07 UTC (permalink / raw)
To: Peter Xu; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 7/18/2024 11:36 AM, Peter Xu wrote:
> On Sun, Jun 30, 2024 at 12:44:02PM -0700, Steve Sistare wrote:
>> What?
>>
>> This patch series adds the live migration cpr-transfer mode, which
>> allows the user to transfer a guest to a new QEMU instance on the same
>> host. It is identical to cpr-exec in most respects, except as described
>> below.
>
> I definitely prefer this one more than the exec solution, thanks for trying
> this out. It's a matter of whether we'll need both, my answer would be
> no..
>
>>
>> The new user-visible interfaces are:
>> * cpr-transfer (MigMode migration parameter)
>> * cpr-uri (migration parameter)
>
> I wonder whether this parameter can be avoided already, maybe we can let
> cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
> in the same channel?
You saw the answer in another thread, but I repeat it here for others benefit:
"CPR state cannot be sent over the normal migration channel, because devices
and backends are created prior to reading the channel, so this mode sends
CPR state over a second migration channel that is not visible to the user.
New QEMU reads the second channel prior to creating devices or backends."
- Steve
>> * cpr-uri (command-line argument)
>>
>> In this mode, the user starts new QEMU on the same host as old QEMU, with
>> the same arguments as old QEMU, plus the -incoming and the -cpr-uri options.
>> The user issues the migrate command to old QEMU, which stops the VM, saves
>> state to the migration channels, and enters the postmigrate state. Execution
>> resumes in new QEMU.
>>
>> This mode requires a second migration channel, specified by the cpr-uri
>> migration property on the outgoing side, and by the cpr-uri QEMU command-line
>> option on the incoming side. The channel must be a type, such as unix socket,
>> that supports SCM_RIGHTS.
>>
>> This series depends on the series "Live update: cpr-exec mode".
>>
>> Why?
>>
>> cpr-transfer offers the same benefits as cpr-exec mode, but with a model
>> for launching new QEMU that may be more natural for some management packages.
>>
>> How?
>>
>> The file descriptors are kept open by sending them to new QEMU via the
>> cpr-uri, which must support SCM_RIGHTS.
>>
>> Example:
>>
>> In this example, we simply restart the same version of QEMU, but in
>> a real scenario one would use a new QEMU binary path in terminal 2.
>>
>> Terminal 1: start old QEMU
>> # qemu-kvm -monitor stdio -object
>> memory-backend-file,id=ram0,size=4G,mem-path=/dev/shm/ram0,share=on
>> -m 4G -machine anon-alloc=memfd ...
>>
>> Terminal 2: start new QEMU
>> # qemu-kvm ... -incoming unix:vm.sock -cpr-uri unix:cpr.sock
>>
>> Terminal 1:
>> QEMU 9.1.50 monitor - type 'help' for more information
>> (qemu) info status
>> VM status: running
>> (qemu) migrate_set_parameter mode cpr-transfer
>> (qemu) migrate_set_parameter cpr-uri unix:cpr.sock
>> (qemu) migrate -d unix:vm.sock
>> (qemu) info status
>> VM status: paused (postmigrate)
>>
>> Terminal 2:
>> QEMU 9.1.50 monitor - type 'help' for more information
>> (qemu) info status
>> VM status: running
>>
>> Steve Sistare (6):
>> migration: SCM_RIGHTS for QEMUFile
>> migration: VMSTATE_FD
>> migration: cpr-transfer save and load
>> migration: cpr-uri parameter
>> migration: cpr-uri option
>> migration: cpr-transfer mode
>>
>> include/migration/cpr.h | 4 ++
>> include/migration/vmstate.h | 9 +++++
>> migration/cpr-transfer.c | 81 +++++++++++++++++++++++++++++++++++++++++
>> migration/cpr.c | 16 +++++++-
>> migration/meson.build | 1 +
>> migration/migration-hmp-cmds.c | 10 +++++
>> migration/migration.c | 37 +++++++++++++++++++
>> migration/options.c | 29 +++++++++++++++
>> migration/options.h | 1 +
>> migration/qemu-file.c | 83 ++++++++++++++++++++++++++++++++++++++++--
>> migration/qemu-file.h | 2 +
>> migration/ram.c | 1 +
>> migration/trace-events | 2 +
>> migration/vmstate-types.c | 33 +++++++++++++++++
>> qapi/migration.json | 38 ++++++++++++++++++-
>> qemu-options.hx | 8 ++++
>> stubs/vmstate.c | 7 ++++
>> system/vl.c | 3 ++
>> 18 files changed, 359 insertions(+), 6 deletions(-)
>> create mode 100644 migration/cpr-transfer.c
>>
>> --
>> 1.8.3.1
>>
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-07-20 20:07 ` Steven Sistare
@ 2024-08-15 20:28 ` Peter Xu
2024-08-16 8:42 ` Daniel P. Berrangé
2024-08-16 15:13 ` Steven Sistare
0 siblings, 2 replies; 30+ messages in thread
From: Peter Xu @ 2024-08-15 20:28 UTC (permalink / raw)
To: Steven Sistare; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
> > > The new user-visible interfaces are:
> > > * cpr-transfer (MigMode migration parameter)
> > > * cpr-uri (migration parameter)
> >
> > I wonder whether this parameter can be avoided already, maybe we can let
> > cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
> > in the same channel?
>
> You saw the answer in another thread, but I repeat it here for others benefit:
>
> "CPR state cannot be sent over the normal migration channel, because devices
> and backends are created prior to reading the channel, so this mode sends
> CPR state over a second migration channel that is not visible to the user.
> New QEMU reads the second channel prior to creating devices or backends."
Today when looking again, I wonder about the other way round: can we make
the new parameter called "-incoming-cpr", working exactly the same as
"cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
be reused for migration incoming ports?
After all, cpr needs to happen already with unix sockets. Having separate
cmdline options grants user to make the other one to be non-unix, but that
doesn't seem to buy us anything.. then it seems easier to always reuse it,
and restrict cpr-transfer to only work with unix sockets for incoming too?
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-15 20:28 ` Peter Xu
@ 2024-08-16 8:42 ` Daniel P. Berrangé
2024-08-16 15:14 ` Steven Sistare
2024-08-16 15:13 ` Steven Sistare
1 sibling, 1 reply; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-08-16 8:42 UTC (permalink / raw)
To: Peter Xu; +Cc: Steven Sistare, qemu-devel, Fabiano Rosas, Markus Armbruster
On Thu, Aug 15, 2024 at 04:28:59PM -0400, Peter Xu wrote:
> On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
> > > > The new user-visible interfaces are:
> > > > * cpr-transfer (MigMode migration parameter)
> > > > * cpr-uri (migration parameter)
> > >
> > > I wonder whether this parameter can be avoided already, maybe we can let
> > > cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
> > > in the same channel?
> >
> > You saw the answer in another thread, but I repeat it here for others benefit:
> >
> > "CPR state cannot be sent over the normal migration channel, because devices
> > and backends are created prior to reading the channel, so this mode sends
> > CPR state over a second migration channel that is not visible to the user.
> > New QEMU reads the second channel prior to creating devices or backends."
>
> Today when looking again, I wonder about the other way round: can we make
> the new parameter called "-incoming-cpr", working exactly the same as
> "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
> be reused for migration incoming ports?
>
> After all, cpr needs to happen already with unix sockets. Having separate
> cmdline options grants user to make the other one to be non-unix, but that
> doesn't seem to buy us anything.. then it seems easier to always reuse it,
> and restrict cpr-transfer to only work with unix sockets for incoming too?
IMHO we should not be adding any new command line parameter at all,
and in fact we should actually deprecate the existing "-incoming",
except when used with "defer".
An application managing migration should be doing all the configuration
via QMP
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-16 8:42 ` Daniel P. Berrangé
@ 2024-08-16 15:14 ` Steven Sistare
2024-08-16 16:07 ` Peter Xu
0 siblings, 1 reply; 30+ messages in thread
From: Steven Sistare @ 2024-08-16 15:14 UTC (permalink / raw)
To: Daniel P. Berrangé, Peter Xu
Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 8/16/2024 4:42 AM, Daniel P. Berrangé wrote:
> On Thu, Aug 15, 2024 at 04:28:59PM -0400, Peter Xu wrote:
>> On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
>>>>> The new user-visible interfaces are:
>>>>> * cpr-transfer (MigMode migration parameter)
>>>>> * cpr-uri (migration parameter)
>>>>
>>>> I wonder whether this parameter can be avoided already, maybe we can let
>>>> cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
>>>> in the same channel?
>>>
>>> You saw the answer in another thread, but I repeat it here for others benefit:
>>>
>>> "CPR state cannot be sent over the normal migration channel, because devices
>>> and backends are created prior to reading the channel, so this mode sends
>>> CPR state over a second migration channel that is not visible to the user.
>>> New QEMU reads the second channel prior to creating devices or backends."
>>
>> Today when looking again, I wonder about the other way round: can we make
>> the new parameter called "-incoming-cpr", working exactly the same as
>> "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
>> be reused for migration incoming ports?
>>
>> After all, cpr needs to happen already with unix sockets. Having separate
>> cmdline options grants user to make the other one to be non-unix, but that
>> doesn't seem to buy us anything.. then it seems easier to always reuse it,
>> and restrict cpr-transfer to only work with unix sockets for incoming too?
>
> IMHO we should not be adding any new command line parameter at all,
> and in fact we should actually deprecate the existing "-incoming",
> except when used with "defer".
>
> An application managing migration should be doing all the configuration
> via QMP
This is devilish hard to implement for cpr-uri, because it must be known
before any backends or devices are created. The existing preconfig phase
occurs too late.
One must define a new precreate phase which occurs before any backends or
devices are created. Requires a new -precreate option and a precreate-exit
qmp command.
Untangle catch-22 dependencies amongst properties, machine, and accelerator,
so that migration_object_init() can be called early, so that migration
commands are supported in the monitor.
Extract monitor specific options and start a monitor (and first create
monitor chardevs, an exception to the "no creation" rule).
If/when someone tackles the "all configuration via QMP" project, I would
be happy to advise, but right now a cpr-uri command-line parameter is
a sane and simple option.
- Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-16 15:14 ` Steven Sistare
@ 2024-08-16 16:07 ` Peter Xu
0 siblings, 0 replies; 30+ messages in thread
From: Peter Xu @ 2024-08-16 16:07 UTC (permalink / raw)
To: Steven Sistare
Cc: Daniel P. Berrangé, qemu-devel, Fabiano Rosas,
Markus Armbruster, Paolo Bonzini
On Fri, Aug 16, 2024 at 11:14:38AM -0400, Steven Sistare wrote:
> On 8/16/2024 4:42 AM, Daniel P. Berrangé wrote:
> > On Thu, Aug 15, 2024 at 04:28:59PM -0400, Peter Xu wrote:
> > > On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
> > > > > > The new user-visible interfaces are:
> > > > > > * cpr-transfer (MigMode migration parameter)
> > > > > > * cpr-uri (migration parameter)
> > > > >
> > > > > I wonder whether this parameter can be avoided already, maybe we can let
> > > > > cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
> > > > > in the same channel?
> > > >
> > > > You saw the answer in another thread, but I repeat it here for others benefit:
> > > >
> > > > "CPR state cannot be sent over the normal migration channel, because devices
> > > > and backends are created prior to reading the channel, so this mode sends
> > > > CPR state over a second migration channel that is not visible to the user.
> > > > New QEMU reads the second channel prior to creating devices or backends."
> > >
> > > Today when looking again, I wonder about the other way round: can we make
> > > the new parameter called "-incoming-cpr", working exactly the same as
> > > "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
> > > be reused for migration incoming ports?
> > >
> > > After all, cpr needs to happen already with unix sockets. Having separate
> > > cmdline options grants user to make the other one to be non-unix, but that
> > > doesn't seem to buy us anything.. then it seems easier to always reuse it,
> > > and restrict cpr-transfer to only work with unix sockets for incoming too?
> >
> > IMHO we should not be adding any new command line parameter at all,
> > and in fact we should actually deprecate the existing "-incoming",
> > except when used with "defer".
> >
> > An application managing migration should be doing all the configuration
> > via QMP
>
> This is devilish hard to implement for cpr-uri, because it must be known
> before any backends or devices are created. The existing preconfig phase
> occurs too late.
>
> One must define a new precreate phase which occurs before any backends or
> devices are created. Requires a new -precreate option and a precreate-exit
> qmp command.
>
> Untangle catch-22 dependencies amongst properties, machine, and accelerator,
> so that migration_object_init() can be called early, so that migration
> commands are supported in the monitor.
>
> Extract monitor specific options and start a monitor (and first create
> monitor chardevs, an exception to the "no creation" rule).
>
> If/when someone tackles the "all configuration via QMP" project, I would
> be happy to advise, but right now a cpr-uri command-line parameter is
> a sane and simple option.
So far it looks ok to me from migration POV, but that definitely sounds
like relevant to whoever cares about this go-via-QMP-only project. Let me
at least copy Paolo as I know he's in that, maybe anyone else too just to
collect feedbacks, or just to let people know one more exception might be
needed (if no NACK will come later).
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-15 20:28 ` Peter Xu
2024-08-16 8:42 ` Daniel P. Berrangé
@ 2024-08-16 15:13 ` Steven Sistare
2024-08-16 15:23 ` Peter Xu
1 sibling, 1 reply; 30+ messages in thread
From: Steven Sistare @ 2024-08-16 15:13 UTC (permalink / raw)
To: Peter Xu; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 8/15/2024 4:28 PM, Peter Xu wrote:
> On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
>>>> The new user-visible interfaces are:
>>>> * cpr-transfer (MigMode migration parameter)
>>>> * cpr-uri (migration parameter)
>>>
>>> I wonder whether this parameter can be avoided already, maybe we can let
>>> cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
>>> in the same channel?
>>
>> You saw the answer in another thread, but I repeat it here for others benefit:
>>
>> "CPR state cannot be sent over the normal migration channel, because devices
>> and backends are created prior to reading the channel, so this mode sends
>> CPR state over a second migration channel that is not visible to the user.
>> New QEMU reads the second channel prior to creating devices or backends."
>
> Today when looking again, I wonder about the other way round: can we make
> the new parameter called "-incoming-cpr", working exactly the same as
> "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
> be reused for migration incoming ports?
>
> After all, cpr needs to happen already with unix sockets. Having separate
> cmdline options grants user to make the other one to be non-unix, but that
> doesn't seem to buy us anything.. then it seems easier to always reuse it,
> and restrict cpr-transfer to only work with unix sockets for incoming too?
This idea also occurred to me, but I dislike the loss of flexibility for
the incoming socket type. The exec URI in particular can do anything, and
we would be eliminating it.
I also think -incoming-cpr has equal or greater "specification complexity" than
-cpr-uri. They both add a new option, and the former also modifies the behavior
of an existing option (disallows it and subsumes it).
- Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-16 15:13 ` Steven Sistare
@ 2024-08-16 15:23 ` Peter Xu
2024-08-16 15:36 ` Daniel P. Berrangé
0 siblings, 1 reply; 30+ messages in thread
From: Peter Xu @ 2024-08-16 15:23 UTC (permalink / raw)
To: Steven Sistare; +Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On Fri, Aug 16, 2024 at 11:13:36AM -0400, Steven Sistare wrote:
> On 8/15/2024 4:28 PM, Peter Xu wrote:
> > On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
> > > > > The new user-visible interfaces are:
> > > > > * cpr-transfer (MigMode migration parameter)
> > > > > * cpr-uri (migration parameter)
> > > >
> > > > I wonder whether this parameter can be avoided already, maybe we can let
> > > > cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
> > > > in the same channel?
> > >
> > > You saw the answer in another thread, but I repeat it here for others benefit:
> > >
> > > "CPR state cannot be sent over the normal migration channel, because devices
> > > and backends are created prior to reading the channel, so this mode sends
> > > CPR state over a second migration channel that is not visible to the user.
> > > New QEMU reads the second channel prior to creating devices or backends."
> >
> > Today when looking again, I wonder about the other way round: can we make
> > the new parameter called "-incoming-cpr", working exactly the same as
> > "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
> > be reused for migration incoming ports?
> >
> > After all, cpr needs to happen already with unix sockets. Having separate
> > cmdline options grants user to make the other one to be non-unix, but that
> > doesn't seem to buy us anything.. then it seems easier to always reuse it,
> > and restrict cpr-transfer to only work with unix sockets for incoming too?
>
> This idea also occurred to me, but I dislike the loss of flexibility for
> the incoming socket type. The exec URI in particular can do anything, and
> we would be eliminating it.
Ah, I would be guessing that if Juan is still around then exec URI should
already been marked deprecated and prone to removal soon.. while I tend to
agree that exec does introduce some complexity meanwhile iiuc nobody uses
that in production systems.
What's the exec use case you're picturing? Would that mostly for debugging
purpose, and would that be easily replaceable with another tunnelling like
"ncat" or so?
>
> I also think -incoming-cpr has equal or greater "specification complexity" than
> -cpr-uri. They both add a new option, and the former also modifies the behavior
> of an existing option (disallows it and subsumes it).
Please see Dan's comment elsewhere. I wonder whether we can do it without
new qemu cmdlines if that's what we're looking for for the long term. If
QMP is accessbile before cpr early loads, then we can set cpr mode and
reuse migrate-incoming with no new parameter added.
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-16 15:23 ` Peter Xu
@ 2024-08-16 15:36 ` Daniel P. Berrangé
2024-08-16 15:59 ` Peter Xu
0 siblings, 1 reply; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-08-16 15:36 UTC (permalink / raw)
To: Peter Xu; +Cc: Steven Sistare, qemu-devel, Fabiano Rosas, Markus Armbruster
On Fri, Aug 16, 2024 at 11:23:01AM -0400, Peter Xu wrote:
> On Fri, Aug 16, 2024 at 11:13:36AM -0400, Steven Sistare wrote:
> > On 8/15/2024 4:28 PM, Peter Xu wrote:
> > > On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
> > > > > > The new user-visible interfaces are:
> > > > > > * cpr-transfer (MigMode migration parameter)
> > > > > > * cpr-uri (migration parameter)
> > > > >
> > > > > I wonder whether this parameter can be avoided already, maybe we can let
> > > > > cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
> > > > > in the same channel?
> > > >
> > > > You saw the answer in another thread, but I repeat it here for others benefit:
> > > >
> > > > "CPR state cannot be sent over the normal migration channel, because devices
> > > > and backends are created prior to reading the channel, so this mode sends
> > > > CPR state over a second migration channel that is not visible to the user.
> > > > New QEMU reads the second channel prior to creating devices or backends."
> > >
> > > Today when looking again, I wonder about the other way round: can we make
> > > the new parameter called "-incoming-cpr", working exactly the same as
> > > "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
> > > be reused for migration incoming ports?
> > >
> > > After all, cpr needs to happen already with unix sockets. Having separate
> > > cmdline options grants user to make the other one to be non-unix, but that
> > > doesn't seem to buy us anything.. then it seems easier to always reuse it,
> > > and restrict cpr-transfer to only work with unix sockets for incoming too?
> >
> > This idea also occurred to me, but I dislike the loss of flexibility for
> > the incoming socket type. The exec URI in particular can do anything, and
> > we would be eliminating it.
>
> Ah, I would be guessing that if Juan is still around then exec URI should
> already been marked deprecated and prone to removal soon.. while I tend to
> agree that exec does introduce some complexity meanwhile iiuc nobody uses
> that in production systems.
>
> What's the exec use case you're picturing? Would that mostly for debugging
> purpose, and would that be easily replaceable with another tunnelling like
> "ncat" or so?
Conceptually "exec:" is a nice thing, but from a practical POV it
introduces difficulties for QEMU. QEMU doesn't know if the exec'd
command will provide a unidirectional channel or bidirectional
channel, so has to assume the worst - unidirectional. It also can't
know if it is safe to run the exec multiple times, or is only valid
to run it once - so afgai nhas to assume once only.
We could fix those by adding further flags in the migration address
to indicate if its bi-directional & multi-channel safe.
Technically "exec" is obsolete given "fd", but then that applies to
literally all protocols. Implementing them in QEMU is a more user
friendly thing.
Exec was more compelling when QEMU's other protocols were less
mature, lacking TLS for example, but I still find it interesting
as a facility.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-16 15:36 ` Daniel P. Berrangé
@ 2024-08-16 15:59 ` Peter Xu
2024-08-16 18:34 ` Steven Sistare
0 siblings, 1 reply; 30+ messages in thread
From: Peter Xu @ 2024-08-16 15:59 UTC (permalink / raw)
To: Daniel P. Berrangé
Cc: Steven Sistare, qemu-devel, Fabiano Rosas, Markus Armbruster
On Fri, Aug 16, 2024 at 04:36:58PM +0100, Daniel P. Berrangé wrote:
> On Fri, Aug 16, 2024 at 11:23:01AM -0400, Peter Xu wrote:
> > On Fri, Aug 16, 2024 at 11:13:36AM -0400, Steven Sistare wrote:
> > > On 8/15/2024 4:28 PM, Peter Xu wrote:
> > > > On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
> > > > > > > The new user-visible interfaces are:
> > > > > > > * cpr-transfer (MigMode migration parameter)
> > > > > > > * cpr-uri (migration parameter)
> > > > > >
> > > > > > I wonder whether this parameter can be avoided already, maybe we can let
> > > > > > cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
> > > > > > in the same channel?
> > > > >
> > > > > You saw the answer in another thread, but I repeat it here for others benefit:
> > > > >
> > > > > "CPR state cannot be sent over the normal migration channel, because devices
> > > > > and backends are created prior to reading the channel, so this mode sends
> > > > > CPR state over a second migration channel that is not visible to the user.
> > > > > New QEMU reads the second channel prior to creating devices or backends."
> > > >
> > > > Today when looking again, I wonder about the other way round: can we make
> > > > the new parameter called "-incoming-cpr", working exactly the same as
> > > > "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
> > > > be reused for migration incoming ports?
> > > >
> > > > After all, cpr needs to happen already with unix sockets. Having separate
> > > > cmdline options grants user to make the other one to be non-unix, but that
> > > > doesn't seem to buy us anything.. then it seems easier to always reuse it,
> > > > and restrict cpr-transfer to only work with unix sockets for incoming too?
> > >
> > > This idea also occurred to me, but I dislike the loss of flexibility for
> > > the incoming socket type. The exec URI in particular can do anything, and
> > > we would be eliminating it.
> >
> > Ah, I would be guessing that if Juan is still around then exec URI should
> > already been marked deprecated and prone to removal soon.. while I tend to
> > agree that exec does introduce some complexity meanwhile iiuc nobody uses
> > that in production systems.
> >
> > What's the exec use case you're picturing? Would that mostly for debugging
> > purpose, and would that be easily replaceable with another tunnelling like
> > "ncat" or so?
>
> Conceptually "exec:" is a nice thing, but from a practical POV it
> introduces difficulties for QEMU. QEMU doesn't know if the exec'd
> command will provide a unidirectional channel or bidirectional
> channel, so has to assume the worst - unidirectional. It also can't
> know if it is safe to run the exec multiple times, or is only valid
> to run it once - so afgai nhas to assume once only.
>
> We could fix those by adding further flags in the migration address
> to indicate if its bi-directional & multi-channel safe.
>
> Technically "exec" is obsolete given "fd", but then that applies to
> literally all protocols. Implementing them in QEMU is a more user
> friendly thing.
>
> Exec was more compelling when QEMU's other protocols were less
> mature, lacking TLS for example, but I still find it interesting
> as a facility.
Right, it's an interesting idea on its own. It's just that when QEMU grows
into not only a tool anymore it adds burden on top as you discussed, in
which case we consider dropping things as wins (and we already started
doing so at least in migration, but iiuc it's not limited to migration).
Again, it looks reasonable to drop because I think it's too easy to tool-up
the same "exec:" function with ncat or similar things. E.g. kubevirt does
TLS even today without qemu's TLS, and AFAIU that's based on unix sockets
not exec, and it tunnels to the daemon for TLS encryption (which is prone
of removal, though). So even that is not leveraged as we thought.
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-16 15:59 ` Peter Xu
@ 2024-08-16 18:34 ` Steven Sistare
2024-08-20 16:29 ` Steven Sistare
0 siblings, 1 reply; 30+ messages in thread
From: Steven Sistare @ 2024-08-16 18:34 UTC (permalink / raw)
To: Peter Xu, Daniel P. Berrangé
Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 8/16/2024 11:59 AM, Peter Xu wrote:
> On Fri, Aug 16, 2024 at 04:36:58PM +0100, Daniel P. Berrangé wrote:
>> On Fri, Aug 16, 2024 at 11:23:01AM -0400, Peter Xu wrote:
>>> On Fri, Aug 16, 2024 at 11:13:36AM -0400, Steven Sistare wrote:
>>>> On 8/15/2024 4:28 PM, Peter Xu wrote:
>>>>> On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
>>>>>>>> The new user-visible interfaces are:
>>>>>>>> * cpr-transfer (MigMode migration parameter)
>>>>>>>> * cpr-uri (migration parameter)
>>>>>>>
>>>>>>> I wonder whether this parameter can be avoided already, maybe we can let
>>>>>>> cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
>>>>>>> in the same channel?
>>>>>>
>>>>>> You saw the answer in another thread, but I repeat it here for others benefit:
>>>>>>
>>>>>> "CPR state cannot be sent over the normal migration channel, because devices
>>>>>> and backends are created prior to reading the channel, so this mode sends
>>>>>> CPR state over a second migration channel that is not visible to the user.
>>>>>> New QEMU reads the second channel prior to creating devices or backends."
>>>>>
>>>>> Today when looking again, I wonder about the other way round: can we make
>>>>> the new parameter called "-incoming-cpr", working exactly the same as
>>>>> "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
>>>>> be reused for migration incoming ports?
>>>>>
>>>>> After all, cpr needs to happen already with unix sockets. Having separate
>>>>> cmdline options grants user to make the other one to be non-unix, but that
>>>>> doesn't seem to buy us anything.. then it seems easier to always reuse it,
>>>>> and restrict cpr-transfer to only work with unix sockets for incoming too?
>>>>
>>>> This idea also occurred to me, but I dislike the loss of flexibility for
>>>> the incoming socket type. The exec URI in particular can do anything, and
>>>> we would be eliminating it.
>>>
>>> Ah, I would be guessing that if Juan is still around then exec URI should
>>> already been marked deprecated and prone to removal soon.. while I tend to
>>> agree that exec does introduce some complexity meanwhile iiuc nobody uses
>>> that in production systems.
>>>
>>> What's the exec use case you're picturing? Would that mostly for debugging
>>> purpose, and would that be easily replaceable with another tunnelling like
>>> "ncat" or so?
>>
>> Conceptually "exec:" is a nice thing, but from a practical POV it
>> introduces difficulties for QEMU. QEMU doesn't know if the exec'd
>> command will provide a unidirectional channel or bidirectional
>> channel, so has to assume the worst - unidirectional. It also can't
>> know if it is safe to run the exec multiple times, or is only valid
>> to run it once - so afgai nhas to assume once only.
>>
>> We could fix those by adding further flags in the migration address
>> to indicate if its bi-directional & multi-channel safe.
>>
>> Technically "exec" is obsolete given "fd", but then that applies to
>> literally all protocols. Implementing them in QEMU is a more user
>> friendly thing.
>>
>> Exec was more compelling when QEMU's other protocols were less
>> mature, lacking TLS for example, but I still find it interesting
>> as a facility.
>
> Right, it's an interesting idea on its own. It's just that when QEMU grows
> into not only a tool anymore it adds burden on top as you discussed, in
> which case we consider dropping things as wins (and we already started
> doing so at least in migration, but iiuc it's not limited to migration).
>
> Again, it looks reasonable to drop because I think it's too easy to tool-up
> the same "exec:" function with ncat or similar things. E.g. kubevirt does
> TLS even today without qemu's TLS, and AFAIU that's based on unix sockets
> not exec, and it tunnels to the daemon for TLS encryption (which is prone
> of removal, though). So even that is not leveraged as we thought.
Also, the "fd" URI would not work. We could not read from it once for cpr state,
reopen it, and read again for migration state.
Nor multifd.
- Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-16 18:34 ` Steven Sistare
@ 2024-08-20 16:29 ` Steven Sistare
2024-09-04 21:14 ` Steven Sistare
2024-09-05 17:30 ` Peter Xu
0 siblings, 2 replies; 30+ messages in thread
From: Steven Sistare @ 2024-08-20 16:29 UTC (permalink / raw)
To: Peter Xu, Daniel P. Berrangé
Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 8/16/2024 2:34 PM, Steven Sistare wrote:
> On 8/16/2024 11:59 AM, Peter Xu wrote:
>> On Fri, Aug 16, 2024 at 04:36:58PM +0100, Daniel P. Berrangé wrote:
>>> On Fri, Aug 16, 2024 at 11:23:01AM -0400, Peter Xu wrote:
>>>> On Fri, Aug 16, 2024 at 11:13:36AM -0400, Steven Sistare wrote:
>>>>> On 8/15/2024 4:28 PM, Peter Xu wrote:
>>>>>> On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
>>>>>>>>> The new user-visible interfaces are:
>>>>>>>>> * cpr-transfer (MigMode migration parameter)
>>>>>>>>> * cpr-uri (migration parameter)
>>>>>>>>
>>>>>>>> I wonder whether this parameter can be avoided already, maybe we can let
>>>>>>>> cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
>>>>>>>> in the same channel?
>>>>>>>
>>>>>>> You saw the answer in another thread, but I repeat it here for others benefit:
>>>>>>>
>>>>>>> "CPR state cannot be sent over the normal migration channel, because devices
>>>>>>> and backends are created prior to reading the channel, so this mode sends
>>>>>>> CPR state over a second migration channel that is not visible to the user.
>>>>>>> New QEMU reads the second channel prior to creating devices or backends."
>>>>>>
>>>>>> Today when looking again, I wonder about the other way round: can we make
>>>>>> the new parameter called "-incoming-cpr", working exactly the same as
>>>>>> "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
>>>>>> be reused for migration incoming ports?
>>>>>>
>>>>>> After all, cpr needs to happen already with unix sockets. Having separate
>>>>>> cmdline options grants user to make the other one to be non-unix, but that
>>>>>> doesn't seem to buy us anything.. then it seems easier to always reuse it,
>>>>>> and restrict cpr-transfer to only work with unix sockets for incoming too?
>>>>>
>>>>> This idea also occurred to me, but I dislike the loss of flexibility for
>>>>> the incoming socket type. The exec URI in particular can do anything, and
>>>>> we would be eliminating it.
>>>>
>>>> Ah, I would be guessing that if Juan is still around then exec URI should
>>>> already been marked deprecated and prone to removal soon.. while I tend to
>>>> agree that exec does introduce some complexity meanwhile iiuc nobody uses
>>>> that in production systems.
>>>>
>>>> What's the exec use case you're picturing? Would that mostly for debugging
>>>> purpose, and would that be easily replaceable with another tunnelling like
>>>> "ncat" or so?
>>>
>>> Conceptually "exec:" is a nice thing, but from a practical POV it
>>> introduces difficulties for QEMU. QEMU doesn't know if the exec'd
>>> command will provide a unidirectional channel or bidirectional
>>> channel, so has to assume the worst - unidirectional. It also can't
>>> know if it is safe to run the exec multiple times, or is only valid
>>> to run it once - so afgai nhas to assume once only.
>>>
>>> We could fix those by adding further flags in the migration address
>>> to indicate if its bi-directional & multi-channel safe.
>>>
>>> Technically "exec" is obsolete given "fd", but then that applies to
>>> literally all protocols. Implementing them in QEMU is a more user
>>> friendly thing.
>>>
>>> Exec was more compelling when QEMU's other protocols were less
>>> mature, lacking TLS for example, but I still find it interesting
>>> as a facility.
>>
>> Right, it's an interesting idea on its own. It's just that when QEMU grows
>> into not only a tool anymore it adds burden on top as you discussed, in
>> which case we consider dropping things as wins (and we already started
>> doing so at least in migration, but iiuc it's not limited to migration).
>>
>> Again, it looks reasonable to drop because I think it's too easy to tool-up
>> the same "exec:" function with ncat or similar things. E.g. kubevirt does
>> TLS even today without qemu's TLS, and AFAIU that's based on unix sockets
>> not exec, and it tunnels to the daemon for TLS encryption (which is prone
>> of removal, though). So even that is not leveraged as we thought.
>
> Also, the "fd" URI would not work. We could not read from it once for cpr state,
> reopen it, and read again for migration state.
>
> Nor multifd.
Am I wrong?
I still go back to my original statement: -incoming-cpr has equal or greater
"specification complexity" than -cpr-uri. It is not simpler, and comes with
restrictions.
- Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-20 16:29 ` Steven Sistare
@ 2024-09-04 21:14 ` Steven Sistare
2024-09-04 22:09 ` Peter Xu
2024-09-05 17:30 ` Peter Xu
1 sibling, 1 reply; 30+ messages in thread
From: Steven Sistare @ 2024-09-04 21:14 UTC (permalink / raw)
To: Peter Xu, Daniel P. Berrangé
Cc: qemu-devel, Fabiano Rosas, Markus Armbruster
On 8/20/2024 12:29 PM, Steven Sistare wrote:
> On 8/16/2024 2:34 PM, Steven Sistare wrote:
>> On 8/16/2024 11:59 AM, Peter Xu wrote:
>>> On Fri, Aug 16, 2024 at 04:36:58PM +0100, Daniel P. Berrangé wrote:
>>>> On Fri, Aug 16, 2024 at 11:23:01AM -0400, Peter Xu wrote:
>>>>> On Fri, Aug 16, 2024 at 11:13:36AM -0400, Steven Sistare wrote:
>>>>>> On 8/15/2024 4:28 PM, Peter Xu wrote:
>>>>>>> On Sat, Jul 20, 2024 at 04:07:50PM -0400, Steven Sistare wrote:
>>>>>>>>>> The new user-visible interfaces are:
>>>>>>>>>> * cpr-transfer (MigMode migration parameter)
>>>>>>>>>> * cpr-uri (migration parameter)
>>>>>>>>>
>>>>>>>>> I wonder whether this parameter can be avoided already, maybe we can let
>>>>>>>>> cpr-transfer depend on unix socket in -incoming, then integrate fd sharing
>>>>>>>>> in the same channel?
>>>>>>>>
>>>>>>>> You saw the answer in another thread, but I repeat it here for others benefit:
>>>>>>>>
>>>>>>>> "CPR state cannot be sent over the normal migration channel, because devices
>>>>>>>> and backends are created prior to reading the channel, so this mode sends
>>>>>>>> CPR state over a second migration channel that is not visible to the user.
>>>>>>>> New QEMU reads the second channel prior to creating devices or backends."
>>>>>>>
>>>>>>> Today when looking again, I wonder about the other way round: can we make
>>>>>>> the new parameter called "-incoming-cpr", working exactly the same as
>>>>>>> "cpr-uri" qemu cmdline, but then after cpr is loaded it'll be automatically
>>>>>>> be reused for migration incoming ports?
>>>>>>>
>>>>>>> After all, cpr needs to happen already with unix sockets. Having separate
>>>>>>> cmdline options grants user to make the other one to be non-unix, but that
>>>>>>> doesn't seem to buy us anything.. then it seems easier to always reuse it,
>>>>>>> and restrict cpr-transfer to only work with unix sockets for incoming too?
>>>>>>
>>>>>> This idea also occurred to me, but I dislike the loss of flexibility for
>>>>>> the incoming socket type. The exec URI in particular can do anything, and
>>>>>> we would be eliminating it.
>>>>>
>>>>> Ah, I would be guessing that if Juan is still around then exec URI should
>>>>> already been marked deprecated and prone to removal soon.. while I tend to
>>>>> agree that exec does introduce some complexity meanwhile iiuc nobody uses
>>>>> that in production systems.
>>>>>
>>>>> What's the exec use case you're picturing? Would that mostly for debugging
>>>>> purpose, and would that be easily replaceable with another tunnelling like
>>>>> "ncat" or so?
>>>>
>>>> Conceptually "exec:" is a nice thing, but from a practical POV it
>>>> introduces difficulties for QEMU. QEMU doesn't know if the exec'd
>>>> command will provide a unidirectional channel or bidirectional
>>>> channel, so has to assume the worst - unidirectional. It also can't
>>>> know if it is safe to run the exec multiple times, or is only valid
>>>> to run it once - so afgai nhas to assume once only.
>>>>
>>>> We could fix those by adding further flags in the migration address
>>>> to indicate if its bi-directional & multi-channel safe.
>>>>
>>>> Technically "exec" is obsolete given "fd", but then that applies to
>>>> literally all protocols. Implementing them in QEMU is a more user
>>>> friendly thing.
>>>>
>>>> Exec was more compelling when QEMU's other protocols were less
>>>> mature, lacking TLS for example, but I still find it interesting
>>>> as a facility.
>>>
>>> Right, it's an interesting idea on its own. It's just that when QEMU grows
>>> into not only a tool anymore it adds burden on top as you discussed, in
>>> which case we consider dropping things as wins (and we already started
>>> doing so at least in migration, but iiuc it's not limited to migration).
>>>
>>> Again, it looks reasonable to drop because I think it's too easy to tool-up
>>> the same "exec:" function with ncat or similar things. E.g. kubevirt does
>>> TLS even today without qemu's TLS, and AFAIU that's based on unix sockets
>>> not exec, and it tunnels to the daemon for TLS encryption (which is prone
>>> of removal, though). So even that is not leveraged as we thought.
>>
>> Also, the "fd" URI would not work. We could not read from it once for cpr state,
>> reopen it, and read again for migration state.
>>
>> Nor multifd.
>
> Am I wrong?
>
> I still go back to my original statement: -incoming-cpr has equal or greater
> "specification complexity" than -cpr-uri. It is not simpler, and comes with
> restrictions.
Hi Peter, before I post V2 of this series, I would like to reach agreement on this
interface. I cannot tell if you have gone quiet on this thread because you agree,
disagree, or are on vacation!
- Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-09-04 21:14 ` Steven Sistare
@ 2024-09-04 22:09 ` Peter Xu
0 siblings, 0 replies; 30+ messages in thread
From: Peter Xu @ 2024-09-04 22:09 UTC (permalink / raw)
To: Steven Sistare
Cc: Daniel P. Berrangé, qemu-devel, Fabiano Rosas,
Markus Armbruster
On Wed, Sep 04, 2024 at 05:14:37PM -0400, Steven Sistare wrote:
> Hi Peter, before I post V2 of this series, I would like to reach agreement on this
> interface. I cannot tell if you have gone quiet on this thread because you agree,
> disagree, or are on vacation!
Hey Steve,
I'm not on vacation. :) But indeed I had some other stuff to look at
recently, so can be slower (than my usual slow..).
I think I finished reading this one, but I'll double check tomorrow and let
you know if there's something.
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC V1 0/6] Live update: cpr-transfer
2024-08-20 16:29 ` Steven Sistare
2024-09-04 21:14 ` Steven Sistare
@ 2024-09-05 17:30 ` Peter Xu
1 sibling, 0 replies; 30+ messages in thread
From: Peter Xu @ 2024-09-05 17:30 UTC (permalink / raw)
To: Steven Sistare
Cc: Daniel P. Berrangé, qemu-devel, Fabiano Rosas,
Markus Armbruster, Paolo Bonzini
On Tue, Aug 20, 2024 at 12:29:07PM -0400, Steven Sistare wrote:
> I still go back to my original statement: -incoming-cpr has equal or greater
> "specification complexity" than -cpr-uri. It is not simpler, and comes with
> restrictions.
I'm ok with it, as long as Dan and others are ok. I remember I should have
copied Paolo but I didn't see him in the loop, do it again.
Some context: it's about introducing yet another qemu cmdline for
cpr-transfer to specify a URI port for early stage loading of QEMU internal
stuff, which Dan raised concern on going against managing the VM using QMP
only and removing cmdline efforts.
For a generic follow up to yesterday's email: I was trying to re-read the
thread but I found that there can be a lot changes, so I figured maybe I
should wait for the new version. IOW nothing else I can think of yet to
comment on the series.
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 30+ messages in thread