From: Lan Tianyu <tianyu.lan@intel.com>
To: amit.shah@redhat.com, eblake@redhat.com, eddie.dong@intel.com,
nrupal.jani@intel.com, yang.z.zhang@intel.com, agraf@suse.de,
kvm@vger.kernel.org, pbonzini@redhat.com, qemu-devel@nongnu.org,
emil.s.tantilov@intel.com, ehabkost@redhat.com,
lcapitulino@redhat.com, lersek@redhat.com, mst@redhat.com,
quintela@redhat.com, rth@twiddle.net
Cc: Lan Tianyu <tianyu.lan@intel.com>
Subject: [Qemu-devel] [RFC PATCH 3/3] Qemu: Introduce pci-sriov device type to support VF live migration
Date: Thu, 22 Oct 2015 00:52:37 +0800 [thread overview]
Message-ID: <1445446357-5539-4-git-send-email-tianyu.lan@intel.com> (raw)
In-Reply-To: <1445446357-5539-1-git-send-email-tianyu.lan@intel.com>
This patch is to migrate VF status during migration between
source and target machine.
There are three kinds of VF status involved.
1) PCI configure space regs
2) MSIX configure
3) VF status in the PF driver
The PCI configure space regs and MSIX configure are originally
stored in Qemu.
VF status in the PF driver can be saved and restored via new sysfs
node state_in_pf under VF sysfs directory.
Fake PCI configure space regs "0xF0" to let VF driver to know migration
status. Qemu set reg "0xF0" to 1 when migration starts and set it to 0
when migration completes. VF driver tells Qemu it's ready for migration
via writing 1 to reg "0xF1".
Qemu notifies VF driver about migration status change via new sysfs
node notify_vf to send mailbox msg to VF driver.
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
hw/i386/kvm/Makefile.objs | 2 +-
hw/i386/kvm/pci-assign.c | 2 +-
hw/i386/kvm/sriov.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 215 insertions(+), 2 deletions(-)
create mode 100644 hw/i386/kvm/sriov.c
diff --git a/hw/i386/kvm/Makefile.objs b/hw/i386/kvm/Makefile.objs
index d8bce20..09324e9 100644
--- a/hw/i386/kvm/Makefile.objs
+++ b/hw/i386/kvm/Makefile.objs
@@ -1 +1 @@
-obj-y += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o
+obj-y += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o sriov.o
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 616532d..84c5ff5 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1770,7 +1770,7 @@ static void assign_class_init(ObjectClass *klass, void *data)
k->config_read = assigned_dev_pci_read_config;
k->config_write = assigned_dev_pci_write_config;
dc->props = assigned_dev_properties;
- dc->vmsd = &vmstate_assigned_device;
+// dc->vmsd = &vmstate_assigned_device;
dc->reset = reset_assigned_device;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->desc = "KVM-based PCI passthrough";
diff --git a/hw/i386/kvm/sriov.c b/hw/i386/kvm/sriov.c
new file mode 100644
index 0000000..ac37035
--- /dev/null
+++ b/hw/i386/kvm/sriov.c
@@ -0,0 +1,213 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/io.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "pci-assign.h"
+
+
+#define TYPE_PCI_SRIOV "pci-sriov"
+
+#define SRIOV_LM_SETUP 0x01
+#define SRIOV_LM_COMPLETE 0x02
+
+static int pt_save_pf_buf(struct PCIDevice *pdev, unsigned char **buf,
+ int *len)
+{
+ AssignedDevice *adev = PCI_ASSIGN(pdev);
+ char file[128];
+ FILE *f;
+
+ *len = 0;
+
+ snprintf(file, sizeof(file),
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/state_in_pf",
+ adev->host.domain, adev->host.bus, adev->host.slot,
+ adev->host.function);
+
+ if (!(f = fopen(file, "rb"))) {
+ return -EEXIST;
+ }
+ *buf = g_malloc(4096);
+ *len = fread(*buf, 1, 4096, f);
+ fclose(f);
+
+ return 0;
+}
+
+static void pt_restore_pf_buf(struct PCIDevice *pdev, unsigned char *buf, int len)
+{
+ AssignedDevice *adev = PCI_ASSIGN(pdev);
+ FILE *f;
+ char file[128];
+
+ snprintf(file, sizeof(file),
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/state_in_pf",
+ adev->host.domain, adev->host.bus, adev->host.slot,
+ adev->host.function);
+
+ printf("path: %s\n", file);
+ if (!(f = fopen(file, "wb")))
+ return;
+
+ fwrite(buf, 1, len, f);
+ fclose(f);
+
+}
+
+static void assign_dev_post_load(void *opaque)
+{
+ struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+ AssignedDevice *adev = PCI_ASSIGN(pdev);
+ char file[128];
+ FILE *f;
+
+ snprintf(file, sizeof(file),
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/notify_vf",
+ adev->host.domain, adev->host.bus, adev->host.slot,
+ adev->host.function);
+
+ printf("notify path %s\n", file);
+ if (!(f = fopen(file, "wb")))
+ return;
+
+ fwrite("1", 1, 1, f);
+ fclose(f);
+}
+
+static int assign_dev_load(QEMUFile *f, void *opaque, int version_id)
+{
+ struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+ AssignedDevice *adev = PCI_ASSIGN(pdev);
+ unsigned char *buf = NULL;
+ int ret, len, num;
+
+ if(qemu_get_byte(f)!= SRIOV_LM_COMPLETE)
+ return 0;
+
+ ret = pci_device_load(pdev, f);
+ if (ret) {
+ printf("pci config error %d\n", ret);
+ return ret;
+ }
+
+ qemu_get_sbe32s(f, &num);
+ qemu_get_buffer(f, (unsigned char *)adev->msix_table,
+ num * PCI_MSIX_ENTRY_SIZE);
+ assigned_dev_update_msix(pdev);
+
+ len = qemu_get_be32(f);
+ if (len) {
+ buf = g_malloc(len);
+ qemu_get_buffer(f, buf, len);
+ pt_restore_pf_buf(pdev, buf, len);
+ g_free(buf);
+ }
+
+
+ pci_default_write_config(pdev, 0xf0, 0x00, 1);
+ pci_default_write_config(pdev, 0xf1, 0x00, 1);
+ return 0;
+}
+
+static int assign_dev_save_complete(QEMUFile *f, void *opaque)
+{
+ struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+ AssignedDevice *adev = PCI_ASSIGN(pdev);
+ int len, entries_nr = 0;
+ unsigned char *buf = NULL;
+ int i;
+ MSIXTableEntry *entry = adev->msix_table;
+
+ qemu_put_byte(f, SRIOV_LM_COMPLETE);
+ pci_device_save(pdev, f);
+
+ for (i = 0; i < adev->msix_max; i++, entry++) {
+ if (assigned_dev_msix_skipped(entry)) {
+ continue;
+ }
+ entries_nr++;
+ }
+
+ qemu_put_sbe32s(f, &entries_nr);
+ qemu_put_buffer(f, (unsigned char *)adev->msix_table, entries_nr * PCI_MSIX_ENTRY_SIZE);
+
+ if (pt_save_pf_buf(pdev, &buf, &len))
+ return -EFAULT;
+
+ qemu_put_be32(f, len);
+ if (len) {
+ printf("pf state saved, size %d\n", len);
+ qemu_put_buffer(f, buf, len);
+ }
+
+ return 0;
+}
+
+static int assign_dev_setup(QEMUFile *f, void *opaque)
+{
+ struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+ AssignedDevice *adev = PCI_ASSIGN(pdev);
+ char file[128];
+ FILE *fd;
+
+ pci_default_write_config(pdev, 0xf0, 0x01, 1);
+
+ snprintf(file, sizeof(file),
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/notify_vf",
+ adev->host.domain, adev->host.bus, adev->host.slot,
+ adev->host.function);
+
+ if (!(fd = fopen(file, "wb")))
+ return -EFAULT;
+
+ fwrite("1", 1, 1, fd);
+ fclose(fd);
+
+ printf("notify path %s\n", file);
+ qemu_put_byte(f, SRIOV_LM_SETUP);
+ return 0;
+}
+
+static uint64_t assign_dev_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+ struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+
+ return pci_default_read_config(pdev, 0xf1,1) ?
+ 0 : max_size;
+}
+
+static SaveVMHandlers savevm_pt_handlers = {
+ .save_live_setup = assign_dev_setup,
+ .save_live_complete = assign_dev_save_complete,
+ .save_live_pending = assign_dev_save_pending,
+ .load_state = assign_dev_load,
+ .post_load_state = assign_dev_post_load,
+};
+
+static void sriov_pci_instance_init(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+
+ register_savevm_live(NULL, "pci-assign", 1, 1,
+ &savevm_pt_handlers, pci_dev);
+}
+
+static const TypeInfo sriov_pci_type_info = {
+ .name = TYPE_PCI_SRIOV,
+ .parent = TYPE_PCI_ASSIGN,
+ .instance_init = sriov_pci_instance_init,
+};
+
+static void sriov_register_types(void)
+{
+ type_register_static(&sriov_pci_type_info);
+}
+type_init(sriov_register_types)
--
1.9.3
next prev parent reply other threads:[~2015-10-21 17:04 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-21 16:52 [Qemu-devel] [RFC PATCH 0/3] Qemu/IXGBE: Add live migration support for SRIOV NIC Lan Tianyu
2015-10-21 16:52 ` [Qemu-devel] [RFC PATCH 1/3] Qemu: Add pci-assign.h to share functions and struct definition with new file Lan Tianyu
2015-10-21 16:52 ` [Qemu-devel] [RFC PATCH 2/3] Qemu: Add post_load_state() to run after restoring CPU state Lan Tianyu
2015-10-21 16:52 ` Lan Tianyu [this message]
2015-10-21 18:39 ` [Qemu-devel] [RFC PATCH 0/3] Qemu/IXGBE: Add live migration support for SRIOV NIC Alex Williamson
2015-10-23 3:10 ` Lan Tianyu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1445446357-5539-4-git-send-email-tianyu.lan@intel.com \
--to=tianyu.lan@intel.com \
--cc=agraf@suse.de \
--cc=amit.shah@redhat.com \
--cc=eblake@redhat.com \
--cc=eddie.dong@intel.com \
--cc=ehabkost@redhat.com \
--cc=emil.s.tantilov@intel.com \
--cc=kvm@vger.kernel.org \
--cc=lcapitulino@redhat.com \
--cc=lersek@redhat.com \
--cc=mst@redhat.com \
--cc=nrupal.jani@intel.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
--cc=rth@twiddle.net \
--cc=yang.z.zhang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).