From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Yan Zhao <yan.y.zhao@intel.com>
Cc: alex.williamson@redhat.com, qemu-devel@nongnu.org,
intel-gvt-dev@lists.freedesktop.org,
Zhengxiao.zx@alibaba-inc.com, yi.l.liu@intel.com,
eskultet@redhat.com, ziye.yang@intel.com, cohuck@redhat.com,
shuangtai.tst@alibaba-inc.com, zhi.a.wang@intel.com,
mlevitsk@redhat.com, pasic@linux.ibm.com, aik@ozlabs.ru,
eauger@redhat.com, felipe@nutanix.com,
jonathan.davies@nutanix.com, changpeng.liu@intel.com,
Ken.Xue@amd.com, kwankhede@nvidia.com, kevin.tian@intel.com,
cjia@nvidia.com, arei.gonglei@huawei.com, kvm@vger.kernel.org
Subject: Re: [Qemu-devel] [PATCH 5/5] vfio/migration: support device memory capability
Date: Tue, 19 Feb 2019 11:25:43 +0000 [thread overview]
Message-ID: <20190219112542.GB2941@work-vm> (raw)
In-Reply-To: <1550566380-3788-1-git-send-email-yan.y.zhao@intel.com>
* Yan Zhao (yan.y.zhao@intel.com) wrote:
> If a device has device memory capability, save/load data from device memory
> in pre-copy and stop-and-copy phases.
>
> LOGGING state is set for device memory for dirty page logging:
> in LOGGING state, get device memory returns whole device memory snapshot;
> outside LOGGING state, get device memory returns dirty data since last get
> operation.
>
> Usually, device memory is very big, qemu needs to chunk it into several
> pieces each with size of device memory region.
>
> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> ---
> hw/vfio/migration.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++++--
> hw/vfio/pci.h | 1 +
> 2 files changed, 231 insertions(+), 5 deletions(-)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 16d6395..f1e9309 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -203,6 +203,201 @@ static int vfio_load_data_device_config(VFIOPCIDevice *vdev,
> return 0;
> }
>
> +static int vfio_get_device_memory_size(VFIOPCIDevice *vdev)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + uint64_t len;
> + int sz;
> +
> + sz = sizeof(len);
> + if (pread(vbasedev->fd, &len, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_memory.size))
> + != sz) {
> + error_report("vfio: Failed to get length of device memory");
> + return -1;
> + }
> + vdev->migration->devmem_size = len;
> + return 0;
> +}
> +
> +static int vfio_set_device_memory_size(VFIOPCIDevice *vdev, uint64_t size)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + int sz;
> +
> + sz = sizeof(size);
> + if (pwrite(vbasedev->fd, &size, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_memory.size))
> + != sz) {
> + error_report("vfio: Failed to set length of device comemory");
> + return -1;
> + }
> + vdev->migration->devmem_size = size;
> + return 0;
> +}
> +
> +static
> +int vfio_save_data_device_memory_chunk(VFIOPCIDevice *vdev, QEMUFile *f,
> + uint64_t pos, uint64_t len)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + VFIORegion *region_devmem =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_DEVICE_MEMORY];
> + void *dest;
> + uint32_t sz;
> + uint8_t *buf = NULL;
> + uint32_t action = VFIO_DEVICE_DATA_ACTION_GET_BUFFER;
> +
> + if (len > region_devmem->size) {
> + return -1;
> + }
> +
> + sz = sizeof(pos);
> + if (pwrite(vbasedev->fd, &pos, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_memory.pos))
> + != sz) {
> + error_report("vfio: Failed to set save buffer pos");
> + return -1;
> + }
> + sz = sizeof(action);
> + if (pwrite(vbasedev->fd, &action, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_memory.action))
> + != sz) {
> + error_report("vfio: Failed to set save buffer action");
> + return -1;
> + }
> +
> + if (!vfio_device_state_region_mmaped(region_devmem)) {
> + buf = g_malloc(len);
> + if (buf == NULL) {
> + error_report("vfio: Failed to allocate memory for migrate");
> + return -1;
> + }
> + if (pread(vbasedev->fd, buf, len, region_devmem->fd_offset) != len) {
> + error_report("vfio: error load device memory buffer");
That's forgotten to g_free(buf)
> + return -1;
> + }
> + qemu_put_be64(f, len);
> + qemu_put_be64(f, pos);
> + qemu_put_buffer(f, buf, len);
> + g_free(buf);
> + } else {
> + dest = region_devmem->mmaps[0].mmap;
> + qemu_put_be64(f, len);
> + qemu_put_be64(f, pos);
> + qemu_put_buffer(f, dest, len);
> + }
> + return 0;
> +}
> +
> +static int vfio_save_data_device_memory(VFIOPCIDevice *vdev, QEMUFile *f)
> +{
> + VFIORegion *region_devmem =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_DEVICE_MEMORY];
> + uint64_t total_len = vdev->migration->devmem_size;
> + uint64_t pos = 0;
> +
> + qemu_put_be64(f, total_len);
> + while (pos < total_len) {
> + uint64_t len = region_devmem->size;
> +
> + if (pos + len >= total_len) {
> + len = total_len - pos;
> + }
> + if (vfio_save_data_device_memory_chunk(vdev, f, pos, len)) {
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static
> +int vfio_load_data_device_memory_chunk(VFIOPCIDevice *vdev, QEMUFile *f,
> + uint64_t pos, uint64_t len)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + VFIORegion *region_devmem =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_DEVICE_MEMORY];
> +
> + void *dest;
> + uint32_t sz;
> + uint8_t *buf = NULL;
> + uint32_t action = VFIO_DEVICE_DATA_ACTION_SET_BUFFER;
> +
> + if (len > region_devmem->size) {
> + return -1;
> + }
> +
> + sz = sizeof(pos);
> + if (pwrite(vbasedev->fd, &pos, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_memory.pos))
> + != sz) {
> + error_report("vfio: Failed to set device memory buffer pos");
> + return -1;
> + }
> + if (!vfio_device_state_region_mmaped(region_devmem)) {
> + buf = g_malloc(len);
> + if (buf == NULL) {
> + error_report("vfio: Failed to allocate memory for migrate");
> + return -1;
> + }
> + qemu_get_buffer(f, buf, len);
> + if (pwrite(vbasedev->fd, buf, len,
> + region_devmem->fd_offset) != len) {
> + error_report("vfio: Failed to load devie memory buffer");
Again, failed to free buf
> + return -1;
> + }
> + g_free(buf);
> + } else {
> + dest = region_devmem->mmaps[0].mmap;
> + qemu_get_buffer(f, dest, len);
> + }
You might want to use qemu_file_get_error(f) before writing the data
to the device, to check for the case of a read error on the migration
stream that happened somewhere in the pevious qemu_get's
> + sz = sizeof(action);
> + if (pwrite(vbasedev->fd, &action, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_memory.action))
> + != sz) {
> + error_report("vfio: Failed to set load device memory buffer action");
> + return -1;
> + }
> +
> + return 0;
> +
> +}
> +
> +static int vfio_load_data_device_memory(VFIOPCIDevice *vdev,
> + QEMUFile *f, uint64_t total_len)
> +{
> + uint64_t pos = 0, len = 0;
> +
> + vfio_set_device_memory_size(vdev, total_len);
> +
> + while (pos + len < total_len) {
> + len = qemu_get_be64(f);
> + pos = qemu_get_be64(f);
Please check len/pos - always assume that the migration stream could
be (maliciously or accidentally) corrupt.
> + vfio_load_data_device_memory_chunk(vdev, f, pos, len);
> + }
> +
> + return 0;
> +}
> +
> +
> static int vfio_set_dirty_page_bitmap_chunk(VFIOPCIDevice *vdev,
> uint64_t start_addr, uint64_t page_nr)
> {
> @@ -377,6 +572,10 @@ static void vfio_save_live_pending(QEMUFile *f, void *opaque,
> return;
> }
>
> + /* get dirty data size of device memory */
> + vfio_get_device_memory_size(vdev);
> +
> + *res_precopy_only += vdev->migration->devmem_size;
> return;
> }
>
> @@ -388,7 +587,9 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
> return 0;
> }
>
> - return 0;
> + qemu_put_byte(f, VFIO_SAVE_FLAG_DEVMEMORY);
> + /* get dirty data of device memory */
> + return vfio_save_data_device_memory(vdev, f);
> }
>
> static void vfio_pci_load_config(VFIOPCIDevice *vdev, QEMUFile *f)
> @@ -458,6 +659,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
> len = qemu_get_be64(f);
> vfio_load_data_device_config(vdev, f, len);
> break;
> + case VFIO_SAVE_FLAG_DEVMEMORY:
> + len = qemu_get_be64(f);
> + vfio_load_data_device_memory(vdev, f, len);
> + break;
> default:
> ret = -EINVAL;
> }
> @@ -503,6 +708,13 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
> VFIOPCIDevice *vdev = opaque;
> int rc = 0;
>
> + if (vfio_device_data_cap_device_memory(vdev)) {
> + qemu_put_byte(f, VFIO_SAVE_FLAG_DEVMEMORY | VFIO_SAVE_FLAG_CONTINUE);
> + /* get dirty data of device memory */
> + vfio_get_device_memory_size(vdev);
> + rc = vfio_save_data_device_memory(vdev, f);
> + }
> +
> qemu_put_byte(f, VFIO_SAVE_FLAG_PCI | VFIO_SAVE_FLAG_CONTINUE);
> vfio_pci_save_config(vdev, f);
>
> @@ -515,12 +727,22 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>
> static int vfio_save_setup(QEMUFile *f, void *opaque)
> {
> + int rc = 0;
> VFIOPCIDevice *vdev = opaque;
> - qemu_put_byte(f, VFIO_SAVE_FLAG_SETUP);
> +
> + if (vfio_device_data_cap_device_memory(vdev)) {
> + qemu_put_byte(f, VFIO_SAVE_FLAG_SETUP | VFIO_SAVE_FLAG_CONTINUE);
> + qemu_put_byte(f, VFIO_SAVE_FLAG_DEVMEMORY);
> + /* get whole snapshot of device memory */
> + vfio_get_device_memory_size(vdev);
> + rc = vfio_save_data_device_memory(vdev, f);
> + } else {
> + qemu_put_byte(f, VFIO_SAVE_FLAG_SETUP);
> + }
>
> vfio_set_device_state(vdev, VFIO_DEVICE_STATE_RUNNING |
> VFIO_DEVICE_STATE_LOGGING);
> - return 0;
> + return rc;
> }
>
> static int vfio_load_setup(QEMUFile *f, void *opaque)
> @@ -576,8 +798,11 @@ int vfio_migration_init(VFIOPCIDevice *vdev, Error **errp)
> goto error;
> }
>
> - if (vfio_device_data_cap_device_memory(vdev)) {
> - error_report("No suppport of data cap device memory Yet");
> + if (vfio_device_data_cap_device_memory(vdev) &&
> + vfio_device_state_region_setup(vdev,
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_DEVICE_MEMORY],
> + VFIO_REGION_SUBTYPE_DEVICE_STATE_DATA_MEMORY,
> + "device-state-data-device-memory")) {
> goto error;
> }
>
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 4b7b1bb..a2cc64b 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -69,6 +69,7 @@ typedef struct VFIOMigration {
> uint32_t data_caps;
> uint32_t device_state;
> uint64_t devconfig_size;
> + uint64_t devmem_size;
> VMChangeStateEntry *vm_state;
> } VFIOMigration;
>
> --
> 2.7.4
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
next prev parent reply other threads:[~2019-02-19 11:26 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-19 8:50 [Qemu-devel] [PATCH 0/5] QEMU VFIO live migration Yan Zhao
2019-02-19 8:52 ` [Qemu-devel] [PATCH 1/5] vfio/migration: define kernel interfaces Yan Zhao
2019-02-19 13:09 ` Cornelia Huck
2019-02-20 7:36 ` Zhao Yan
2019-02-20 17:08 ` Cornelia Huck
2019-02-21 1:47 ` Zhao Yan
2019-02-19 8:52 ` [Qemu-devel] [PATCH 2/5] vfio/migration: support device of device config capability Yan Zhao
2019-02-19 11:01 ` Dr. David Alan Gilbert
2019-02-20 5:12 ` Zhao Yan
2019-02-20 10:57 ` Dr. David Alan Gilbert
2019-02-19 14:37 ` Cornelia Huck
2019-02-20 22:54 ` Zhao Yan
2019-02-21 10:56 ` Cornelia Huck
2019-02-19 8:52 ` [Qemu-devel] [PATCH 3/5] vfio/migration: tracking of dirty page in system memory Yan Zhao
2019-02-19 8:52 ` [Qemu-devel] [PATCH 4/5] vfio/migration: turn on migration Yan Zhao
2019-02-19 8:53 ` [Qemu-devel] [PATCH 5/5] vfio/migration: support device memory capability Yan Zhao
2019-02-19 11:25 ` Dr. David Alan Gilbert [this message]
2019-02-20 5:17 ` Zhao Yan
2019-02-19 14:42 ` Christophe de Dinechin
2019-02-20 7:58 ` Zhao Yan
2019-02-20 10:14 ` Christophe de Dinechin
2019-02-21 0:07 ` Zhao Yan
2019-02-19 11:32 ` [Qemu-devel] [PATCH 0/5] QEMU VFIO live migration Dr. David Alan Gilbert
2019-02-20 5:28 ` Zhao Yan
2019-02-20 11:01 ` Dr. David Alan Gilbert
2019-02-20 11:28 ` Gonglei (Arei)
2019-02-20 11:42 ` Cornelia Huck
2019-02-20 12:07 ` Gonglei (Arei)
[not found] ` <20190327063509.GD14681@joy-OptiPlex-7040>
[not found] ` <20190327201854.GG2636@work-vm>
[not found] ` <20190327161020.1c013e65@x1.home>
2019-04-01 8:14 ` Cornelia Huck
2019-04-01 8:40 ` Yan Zhao
2019-04-01 14:15 ` Alex Williamson
2019-02-21 0:31 ` Zhao Yan
2019-02-21 9:15 ` Dr. David Alan Gilbert
2019-02-20 11:56 ` Gonglei (Arei)
2019-02-21 0:24 ` Zhao Yan
2019-02-21 1:35 ` Gonglei (Arei)
2019-02-21 1:58 ` Zhao Yan
2019-02-21 3:33 ` Gonglei (Arei)
2019-02-21 4:08 ` Zhao Yan
2019-02-21 5:46 ` Gonglei (Arei)
2019-02-21 2:04 ` Zhao Yan
2019-02-21 3:16 ` Gonglei (Arei)
2019-02-21 4:21 ` Zhao Yan
2019-02-21 5:56 ` Gonglei (Arei)
2019-02-21 20:40 ` Alex Williamson
2019-02-25 2:22 ` Zhao Yan
2019-03-06 0:22 ` Zhao Yan
2019-03-07 17:44 ` Alex Williamson
2019-03-07 23:20 ` Tian, Kevin
2019-03-08 16:11 ` Alex Williamson
2019-03-08 16:21 ` Dr. David Alan Gilbert
2019-03-08 22:02 ` Alex Williamson
2019-03-11 2:33 ` Tian, Kevin
2019-03-11 20:19 ` Alex Williamson
2019-03-12 2:48 ` Tian, Kevin
2019-03-12 2:57 ` Zhao Yan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190219112542.GB2941@work-vm \
--to=dgilbert@redhat.com \
--cc=Ken.Xue@amd.com \
--cc=Zhengxiao.zx@alibaba-inc.com \
--cc=aik@ozlabs.ru \
--cc=alex.williamson@redhat.com \
--cc=arei.gonglei@huawei.com \
--cc=changpeng.liu@intel.com \
--cc=cjia@nvidia.com \
--cc=cohuck@redhat.com \
--cc=eauger@redhat.com \
--cc=eskultet@redhat.com \
--cc=felipe@nutanix.com \
--cc=intel-gvt-dev@lists.freedesktop.org \
--cc=jonathan.davies@nutanix.com \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=kwankhede@nvidia.com \
--cc=mlevitsk@redhat.com \
--cc=pasic@linux.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=shuangtai.tst@alibaba-inc.com \
--cc=yan.y.zhao@intel.com \
--cc=yi.l.liu@intel.com \
--cc=zhi.a.wang@intel.com \
--cc=ziye.yang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).