* [PATCH 4/4] kvm: qemu: fix hot remove device
@ 2009-02-10 12:40 Han, Weidong
2009-02-11 18:59 ` Mark McLoughlin
0 siblings, 1 reply; 3+ messages in thread
From: Han, Weidong @ 2009-02-10 12:40 UTC (permalink / raw)
To: 'Avi Kivity'; +Cc: 'kvm@vger.kernel.org'
[-- Attachment #1: Type: text/plain, Size: 11034 bytes --]
when hot remove a device with iommu, should deassign it from guest,
and free it from qemu.
assign_dev_update_irqs may not be invoked when hot add a device,
so need to assign irq after assign device in init_assigned_device.
Signed-off-by: Weidong Han <weidong.han@intel.com>
---
qemu/hw/device-assignment.c | 187 ++++++++++++++++++++++++++++++-------------
qemu/hw/device-assignment.h | 3 +-
qemu/hw/device-hotplug.c | 18 ++++-
3 files changed, 151 insertions(+), 57 deletions(-)
diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index e6d2352..6362798 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -443,7 +443,7 @@ again:
static LIST_HEAD(, AssignedDevInfo) adev_head;
-void free_assigned_device(AssignedDevInfo *adev)
+static void free_assigned_device(AssignedDevInfo *adev)
{
AssignedDevice *dev = adev->assigned_dev;
@@ -487,6 +487,116 @@ static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
return (uint32_t)bus << 8 | (uint32_t)devfn;
}
+static int assign_device(AssignedDevInfo *adev)
+{
+ struct kvm_assigned_pci_dev assigned_dev_data;
+ AssignedDevice *dev = adev->assigned_dev;
+ int r;
+
+ memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+ assigned_dev_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ assigned_dev_data.busnr = dev->h_busnr;
+ assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+ /* We always enable the IOMMU if present
+ * (or when not disabled on the command line)
+ */
+ r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+ if (r && !adev->disable_iommu)
+ assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+#endif
+
+ r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
+ if (r < 0)
+ fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
+ adev->name, strerror(-r));
+ return r;
+}
+
+static void deassign_device(AssignedDevInfo *adev)
+{
+ struct kvm_assigned_pci_dev assigned_dev_data;
+ AssignedDevice *dev = adev->assigned_dev;
+ int r;
+
+ memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+ assigned_dev_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ assigned_dev_data.busnr = dev->h_busnr;
+ assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+ /* We always enable the IOMMU if present
+ * (or when not disabled on the command line)
+ */
+ r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+ if (r && !adev->disable_iommu)
+ assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+#endif
+
+ if (kvm_deassign_pci_device(kvm_context, &assigned_dev_data))
+ fprintf(stderr, "Could not notify kernel about "
+ "deassigned device (%x:%x.%x)\n",
+ dev->h_busnr, PCI_SLOT(dev->h_devfn), PCI_FUNC(dev->h_devfn));
+}
+
+static int assign_irq(AssignedDevInfo *adev)
+{
+ struct kvm_assigned_irq assigned_irq_data;
+ AssignedDevice *dev = adev->assigned_dev;
+ int irq, r = 0;
+
+ irq = pci_map_irq(&dev->dev, dev->intpin);
+ irq = piix_get_irq(irq);
+
+#ifdef TARGET_IA64
+ irq = ipf_map_irq(&dev->dev, irq);
+#endif
+
+ if (dev->girq == irq)
+ return r;
+
+ memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
+ assigned_irq_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ assigned_irq_data.guest_irq = irq;
+ assigned_irq_data.host_irq = dev->real_device.irq;
+ r = kvm_assign_irq(kvm_context, &assigned_irq_data);
+ if (r < 0) {
+ fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
+ adev->name, strerror(-r));
+ fprintf(stderr, "Perhaps you are assigning a device "
+ "that shares an IRQ with another device?\n");
+ return r;
+ }
+
+ dev->girq = irq;
+ return r;
+}
+
+void remove_assigned_device(AssignedDevInfo *adev)
+{
+ deassign_device(adev);
+ free_assigned_device(adev);
+}
+
+AssignedDevInfo *get_assigned_device(int pcibus, int slot)
+{
+ AssignedDevice *assigned_dev = NULL;
+ AssignedDevInfo *adev = NULL;
+
+ LIST_FOREACH(adev, &adev_head, next) {
+ assigned_dev = adev->assigned_dev;
+ if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
+ PCI_SLOT(assigned_dev->dev.devfn) == slot)
+ return adev;
+ }
+
+ return NULL;
+}
+
/* The pci config space got updated. Check if irq numbers have changed
* for our devices
*/
@@ -496,38 +606,12 @@ void assigned_dev_update_irqs()
adev = LIST_FIRST(&adev_head);
while (adev) {
- AssignedDevInfo *next = LIST_NEXT(adev, next);
- AssignedDevice *assigned_dev = adev->assigned_dev;
- int irq, r;
+ int r;
+ struct AssignedDevInfo *next = LIST_NEXT(adev, next);
- irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
- irq = piix_get_irq(irq);
-
-#ifdef TARGET_IA64
- irq = ipf_map_irq(&assigned_dev->dev, irq);
-#endif
-
- if (irq != assigned_dev->girq) {
- struct kvm_assigned_irq assigned_irq_data;
-
- memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
- assigned_irq_data.assigned_dev_id =
- calc_assigned_dev_id(assigned_dev->h_busnr,
- (uint8_t) assigned_dev->h_devfn);
- assigned_irq_data.guest_irq = irq;
- assigned_irq_data.host_irq = assigned_dev->real_device.irq;
- r = kvm_assign_irq(kvm_context, &assigned_irq_data);
- if (r < 0) {
- fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
- adev->name, strerror(-r));
- fprintf(stderr, "Perhaps you are assigning a device "
- "that shares an IRQ with another device?\n");
- free_assigned_device(adev);
- adev = next;
- continue;
- }
- assigned_dev->girq = irq;
- }
+ r = assign_irq(adev);
+ if (r < 0)
+ remove_assigned_device(adev);
adev = next;
}
@@ -538,7 +622,6 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
int r;
AssignedDevice *dev;
uint8_t e_device, e_intx;
- struct kvm_assigned_pci_dev assigned_dev_data;
DEBUG("Registering real physical device %s (bus=%x dev=%x func=%x)\n",
adev->name, adev->bus, adev->dev, adev->func);
@@ -558,14 +641,14 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
if (get_real_device(dev, adev->bus, adev->dev, adev->func)) {
fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
__func__, adev->name);
- return NULL;
+ goto out;
}
/* handle real device's MMIO/PIO BARs */
if (assigned_dev_register_regions(dev->real_device.regions,
dev->real_device.region_number,
dev))
- return NULL;
+ goto out;
/* handle interrupt routing */
e_device = (dev->dev.devfn >> 3) & 0x1f;
@@ -576,29 +659,23 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
dev->h_busnr = adev->bus;
dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
- memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
- assigned_dev_data.assigned_dev_id =
- calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
- assigned_dev_data.busnr = dev->h_busnr;
- assigned_dev_data.devfn = dev->h_devfn;
-
-#ifdef KVM_CAP_IOMMU
- /* We always enable the IOMMU if present
- * (or when not disabled on the command line)
- */
- r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
- if (r && !adev->disable_iommu)
- assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
-#endif
+ /* assign device */
+ r = assign_device(adev);
+ if (r < 0)
+ goto assigned_out;
- r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
- if (r < 0) {
- fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
- adev->name, strerror(-r));
- return NULL;
- }
+ /* assign irq for the device */
+ r = assign_irq(adev);
+ if (r < 0)
+ goto assigned_out;
return &dev->dev;
+
+assigned_out:
+ deassign_device(adev);
+out:
+ free_assigned_device(adev);
+ return NULL;
}
/*
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index f216bb0..da775d7 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -94,10 +94,11 @@ struct AssignedDevInfo {
int disable_iommu;
};
-void free_assigned_device(AssignedDevInfo *adev);
PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus);
AssignedDevInfo *add_assigned_device(const char *arg);
void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices);
+void remove_assigned_device(AssignedDevInfo *adev);
+AssignedDevInfo *get_assigned_device(int pcibus, int slot);
ram_addr_t assigned_dev_load_option_roms(ram_addr_t rom_base_offset);
void assigned_dev_update_irqs(void);
diff --git a/qemu/hw/device-hotplug.c b/qemu/hw/device-hotplug.c
index d8f0fcc..9b213ad 100644
--- a/qemu/hw/device-hotplug.c
+++ b/qemu/hw/device-hotplug.c
@@ -51,7 +51,6 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr)
ret = init_assigned_device(adev, pci_bus);
if (ret == NULL) {
term_printf("Failed to assign device\n");
- free_assigned_device(adev);
return NULL;
}
@@ -64,6 +63,14 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr)
return ret;
}
+static void qemu_system_hot_deassign_device(AssignedDevInfo *adev)
+{
+ remove_assigned_device(adev);
+
+ term_printf("Unregistered host PCI device %02x:%02x.%1x "
+ "(\"%s\") from guest\n",
+ adev->bus, adev->dev, adev->func, adev->name);
+}
#endif /* USE_KVM_DEVICE_ASSIGNMENT */
static int add_init_drive(const char *opts)
@@ -241,12 +248,21 @@ void device_hot_remove_success(int pcibus, int slot)
{
PCIDevice *d = pci_find_device(pcibus, slot);
int class_code;
+ AssignedDevInfo *adev;
if (!d) {
term_printf("invalid slot %d\n", slot);
return;
}
+#ifdef USE_KVM_DEVICE_ASSIGNMENT
+ adev = get_assigned_device(pcibus, slot);
+ if (adev) {
+ qemu_system_hot_deassign_device(adev);
+ return;
+ }
+#endif /* USE_KVM_DEVICE_ASSIGNMENT */
+
class_code = d->config_read(d, PCI_CLASS_DEVICE+1, 1);
pci_unregister_device(d);
--
1.6.0.4
[-- Attachment #2: 0004-kvm-qemu-fix-hot-remove-device.patch --]
[-- Type: application/octet-stream, Size: 10913 bytes --]
From d75c6bbf025c3b4c3939cc5601f01facb70d1329 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Tue, 10 Feb 2009 17:25:25 +0800
Subject: [PATCH] kvm: qemu: fix hot remove device
when hot remove a device with iommu, should deassign it from guest,
and free it from qemu.
assign_dev_update_irqs may not be invoked when hot add a device,
so need to assign irq after assign device in init_assigned_device.
Signed-off-by: Weidong Han <weidong.han@intel.com>
---
qemu/hw/device-assignment.c | 187 ++++++++++++++++++++++++++++++-------------
qemu/hw/device-assignment.h | 3 +-
qemu/hw/device-hotplug.c | 18 ++++-
3 files changed, 151 insertions(+), 57 deletions(-)
diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index e6d2352..6362798 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -443,7 +443,7 @@ again:
static LIST_HEAD(, AssignedDevInfo) adev_head;
-void free_assigned_device(AssignedDevInfo *adev)
+static void free_assigned_device(AssignedDevInfo *adev)
{
AssignedDevice *dev = adev->assigned_dev;
@@ -487,6 +487,116 @@ static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
return (uint32_t)bus << 8 | (uint32_t)devfn;
}
+static int assign_device(AssignedDevInfo *adev)
+{
+ struct kvm_assigned_pci_dev assigned_dev_data;
+ AssignedDevice *dev = adev->assigned_dev;
+ int r;
+
+ memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+ assigned_dev_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ assigned_dev_data.busnr = dev->h_busnr;
+ assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+ /* We always enable the IOMMU if present
+ * (or when not disabled on the command line)
+ */
+ r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+ if (r && !adev->disable_iommu)
+ assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+#endif
+
+ r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
+ if (r < 0)
+ fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
+ adev->name, strerror(-r));
+ return r;
+}
+
+static void deassign_device(AssignedDevInfo *adev)
+{
+ struct kvm_assigned_pci_dev assigned_dev_data;
+ AssignedDevice *dev = adev->assigned_dev;
+ int r;
+
+ memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+ assigned_dev_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ assigned_dev_data.busnr = dev->h_busnr;
+ assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+ /* We always enable the IOMMU if present
+ * (or when not disabled on the command line)
+ */
+ r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+ if (r && !adev->disable_iommu)
+ assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+#endif
+
+ if (kvm_deassign_pci_device(kvm_context, &assigned_dev_data))
+ fprintf(stderr, "Could not notify kernel about "
+ "deassigned device (%x:%x.%x)\n",
+ dev->h_busnr, PCI_SLOT(dev->h_devfn), PCI_FUNC(dev->h_devfn));
+}
+
+static int assign_irq(AssignedDevInfo *adev)
+{
+ struct kvm_assigned_irq assigned_irq_data;
+ AssignedDevice *dev = adev->assigned_dev;
+ int irq, r = 0;
+
+ irq = pci_map_irq(&dev->dev, dev->intpin);
+ irq = piix_get_irq(irq);
+
+#ifdef TARGET_IA64
+ irq = ipf_map_irq(&dev->dev, irq);
+#endif
+
+ if (dev->girq == irq)
+ return r;
+
+ memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
+ assigned_irq_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ assigned_irq_data.guest_irq = irq;
+ assigned_irq_data.host_irq = dev->real_device.irq;
+ r = kvm_assign_irq(kvm_context, &assigned_irq_data);
+ if (r < 0) {
+ fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
+ adev->name, strerror(-r));
+ fprintf(stderr, "Perhaps you are assigning a device "
+ "that shares an IRQ with another device?\n");
+ return r;
+ }
+
+ dev->girq = irq;
+ return r;
+}
+
+void remove_assigned_device(AssignedDevInfo *adev)
+{
+ deassign_device(adev);
+ free_assigned_device(adev);
+}
+
+AssignedDevInfo *get_assigned_device(int pcibus, int slot)
+{
+ AssignedDevice *assigned_dev = NULL;
+ AssignedDevInfo *adev = NULL;
+
+ LIST_FOREACH(adev, &adev_head, next) {
+ assigned_dev = adev->assigned_dev;
+ if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
+ PCI_SLOT(assigned_dev->dev.devfn) == slot)
+ return adev;
+ }
+
+ return NULL;
+}
+
/* The pci config space got updated. Check if irq numbers have changed
* for our devices
*/
@@ -496,38 +606,12 @@ void assigned_dev_update_irqs()
adev = LIST_FIRST(&adev_head);
while (adev) {
- AssignedDevInfo *next = LIST_NEXT(adev, next);
- AssignedDevice *assigned_dev = adev->assigned_dev;
- int irq, r;
+ int r;
+ struct AssignedDevInfo *next = LIST_NEXT(adev, next);
- irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
- irq = piix_get_irq(irq);
-
-#ifdef TARGET_IA64
- irq = ipf_map_irq(&assigned_dev->dev, irq);
-#endif
-
- if (irq != assigned_dev->girq) {
- struct kvm_assigned_irq assigned_irq_data;
-
- memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
- assigned_irq_data.assigned_dev_id =
- calc_assigned_dev_id(assigned_dev->h_busnr,
- (uint8_t) assigned_dev->h_devfn);
- assigned_irq_data.guest_irq = irq;
- assigned_irq_data.host_irq = assigned_dev->real_device.irq;
- r = kvm_assign_irq(kvm_context, &assigned_irq_data);
- if (r < 0) {
- fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
- adev->name, strerror(-r));
- fprintf(stderr, "Perhaps you are assigning a device "
- "that shares an IRQ with another device?\n");
- free_assigned_device(adev);
- adev = next;
- continue;
- }
- assigned_dev->girq = irq;
- }
+ r = assign_irq(adev);
+ if (r < 0)
+ remove_assigned_device(adev);
adev = next;
}
@@ -538,7 +622,6 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
int r;
AssignedDevice *dev;
uint8_t e_device, e_intx;
- struct kvm_assigned_pci_dev assigned_dev_data;
DEBUG("Registering real physical device %s (bus=%x dev=%x func=%x)\n",
adev->name, adev->bus, adev->dev, adev->func);
@@ -558,14 +641,14 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
if (get_real_device(dev, adev->bus, adev->dev, adev->func)) {
fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
__func__, adev->name);
- return NULL;
+ goto out;
}
/* handle real device's MMIO/PIO BARs */
if (assigned_dev_register_regions(dev->real_device.regions,
dev->real_device.region_number,
dev))
- return NULL;
+ goto out;
/* handle interrupt routing */
e_device = (dev->dev.devfn >> 3) & 0x1f;
@@ -576,29 +659,23 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
dev->h_busnr = adev->bus;
dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
- memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
- assigned_dev_data.assigned_dev_id =
- calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
- assigned_dev_data.busnr = dev->h_busnr;
- assigned_dev_data.devfn = dev->h_devfn;
-
-#ifdef KVM_CAP_IOMMU
- /* We always enable the IOMMU if present
- * (or when not disabled on the command line)
- */
- r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
- if (r && !adev->disable_iommu)
- assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
-#endif
+ /* assign device */
+ r = assign_device(adev);
+ if (r < 0)
+ goto assigned_out;
- r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
- if (r < 0) {
- fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
- adev->name, strerror(-r));
- return NULL;
- }
+ /* assign irq for the device */
+ r = assign_irq(adev);
+ if (r < 0)
+ goto assigned_out;
return &dev->dev;
+
+assigned_out:
+ deassign_device(adev);
+out:
+ free_assigned_device(adev);
+ return NULL;
}
/*
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index f216bb0..da775d7 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -94,10 +94,11 @@ struct AssignedDevInfo {
int disable_iommu;
};
-void free_assigned_device(AssignedDevInfo *adev);
PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus);
AssignedDevInfo *add_assigned_device(const char *arg);
void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices);
+void remove_assigned_device(AssignedDevInfo *adev);
+AssignedDevInfo *get_assigned_device(int pcibus, int slot);
ram_addr_t assigned_dev_load_option_roms(ram_addr_t rom_base_offset);
void assigned_dev_update_irqs(void);
diff --git a/qemu/hw/device-hotplug.c b/qemu/hw/device-hotplug.c
index d8f0fcc..9b213ad 100644
--- a/qemu/hw/device-hotplug.c
+++ b/qemu/hw/device-hotplug.c
@@ -51,7 +51,6 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr)
ret = init_assigned_device(adev, pci_bus);
if (ret == NULL) {
term_printf("Failed to assign device\n");
- free_assigned_device(adev);
return NULL;
}
@@ -64,6 +63,14 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr)
return ret;
}
+static void qemu_system_hot_deassign_device(AssignedDevInfo *adev)
+{
+ remove_assigned_device(adev);
+
+ term_printf("Unregistered host PCI device %02x:%02x.%1x "
+ "(\"%s\") from guest\n",
+ adev->bus, adev->dev, adev->func, adev->name);
+}
#endif /* USE_KVM_DEVICE_ASSIGNMENT */
static int add_init_drive(const char *opts)
@@ -241,12 +248,21 @@ void device_hot_remove_success(int pcibus, int slot)
{
PCIDevice *d = pci_find_device(pcibus, slot);
int class_code;
+ AssignedDevInfo *adev;
if (!d) {
term_printf("invalid slot %d\n", slot);
return;
}
+#ifdef USE_KVM_DEVICE_ASSIGNMENT
+ adev = get_assigned_device(pcibus, slot);
+ if (adev) {
+ qemu_system_hot_deassign_device(adev);
+ return;
+ }
+#endif /* USE_KVM_DEVICE_ASSIGNMENT */
+
class_code = d->config_read(d, PCI_CLASS_DEVICE+1, 1);
pci_unregister_device(d);
--
1.6.0.4
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 4/4] kvm: qemu: fix hot remove device
2009-02-10 12:40 [PATCH 4/4] kvm: qemu: fix hot remove device Han, Weidong
@ 2009-02-11 18:59 ` Mark McLoughlin
2009-02-13 2:25 ` Han, Weidong
0 siblings, 1 reply; 3+ messages in thread
From: Mark McLoughlin @ 2009-02-11 18:59 UTC (permalink / raw)
To: Han, Weidong; +Cc: 'Avi Kivity', 'kvm@vger.kernel.org'
Hi Weidong,
In general, this looks like a good cleanup.
With deassign_device() fixed to only require assigned_dev_id, I would be
happy to ACK this whole patch.
However, it would be much, much easier to review the patch if you had
split it into multiple patches e.g.
1) Make init_assigned_device() call free_assigned_device on error
2) Split out assign_device() with no functional changes
3) Split out assign_irq() with no functional changes
4) Add deassign_device() and make init_assigned_device() and
assigned_dev_update_irqs() use it
5) Add device hotunplug
On Tue, 2009-02-10 at 20:40 +0800, Han, Weidong wrote:
> when hot remove a device with iommu, should deassign it from guest,
> and free it from qemu.
>
> assign_dev_update_irqs may not be invoked when hot add a device,
> so need to assign irq after assign device in init_assigned_device.
>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> ---
> qemu/hw/device-assignment.c | 187 ++++++++++++++++++++++++++++++-------------
> qemu/hw/device-assignment.h | 3 +-
> qemu/hw/device-hotplug.c | 18 ++++-
> 3 files changed, 151 insertions(+), 57 deletions(-)
>
> diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
> index e6d2352..6362798 100644
> --- a/qemu/hw/device-assignment.c
> +++ b/qemu/hw/device-assignment.c
> @@ -443,7 +443,7 @@ again:
>
> static LIST_HEAD(, AssignedDevInfo) adev_head;
>
> -void free_assigned_device(AssignedDevInfo *adev)
> +static void free_assigned_device(AssignedDevInfo *adev)
Right, if init_assigned_device() fails, the device gets freed, so
nothing outside of this file needs this.
> @@ -487,6 +487,116 @@ static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
> return (uint32_t)bus << 8 | (uint32_t)devfn;
> }
>
> +static int assign_device(AssignedDevInfo *adev)
> +{
> + struct kvm_assigned_pci_dev assigned_dev_data;
> + AssignedDevice *dev = adev->assigned_dev;
> + int r;
> +
> + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
> + assigned_dev_data.assigned_dev_id =
> + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
> + assigned_dev_data.busnr = dev->h_busnr;
> + assigned_dev_data.devfn = dev->h_devfn;
> +
> +#ifdef KVM_CAP_IOMMU
> + /* We always enable the IOMMU if present
> + * (or when not disabled on the command line)
> + */
> + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
> + if (r && !adev->disable_iommu)
> + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
> +#endif
> +
> + r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
> + if (r < 0)
> + fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
> + adev->name, strerror(-r));
> + return r;
> +}
Just a copy of the code from init_assigned_device() with no functional
changes.
> +static void deassign_device(AssignedDevInfo *adev)
> +{
> + struct kvm_assigned_pci_dev assigned_dev_data;
> + AssignedDevice *dev = adev->assigned_dev;
> + int r;
> +
> + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
> + assigned_dev_data.assigned_dev_id =
> + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
We should only need to set assigned_dev_id for deassignment, so these
lines should not be needed:
> + assigned_dev_data.busnr = dev->h_busnr;
> + assigned_dev_data.devfn = dev->h_devfn;
> +
> +#ifdef KVM_CAP_IOMMU
> + /* We always enable the IOMMU if present
> + * (or when not disabled on the command line)
> + */
> + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
> + if (r && !adev->disable_iommu)
> + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
> +#endif
I think the kernel side needs this fix:
- if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
+ if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
kvm_deassign_device(kvm, match);
> + if (kvm_deassign_pci_device(kvm_context, &assigned_dev_data))
> + fprintf(stderr, "Could not notify kernel about "
> + "deassigned device (%x:%x.%x)\n",
> + dev->h_busnr, PCI_SLOT(dev->h_devfn), PCI_FUNC(dev->h_devfn));
> +}
> +
> +static int assign_irq(AssignedDevInfo *adev)
> +{
> + struct kvm_assigned_irq assigned_irq_data;
> + AssignedDevice *dev = adev->assigned_dev;
> + int irq, r = 0;
> +
> + irq = pci_map_irq(&dev->dev, dev->intpin);
> + irq = piix_get_irq(irq);
> +
> +#ifdef TARGET_IA64
> + irq = ipf_map_irq(&dev->dev, irq);
> +#endif
> +
> + if (dev->girq == irq)
> + return r;
> +
> + memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
> + assigned_irq_data.assigned_dev_id =
> + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
> + assigned_irq_data.guest_irq = irq;
> + assigned_irq_data.host_irq = dev->real_device.irq;
> + r = kvm_assign_irq(kvm_context, &assigned_irq_data);
> + if (r < 0) {
> + fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
> + adev->name, strerror(-r));
> + fprintf(stderr, "Perhaps you are assigning a device "
> + "that shares an IRQ with another device?\n");
> + return r;
> + }
> +
> + dev->girq = irq;
> + return r;
> +}
Just a copy of the code in assigned_dev_update_irqs(), no functional
changes.
> +void remove_assigned_device(AssignedDevInfo *adev)
> +{
> + deassign_device(adev);
> + free_assigned_device(adev);
> +}
Okay, this is what the irq assignment code uses in its error handling
and what the device hotunplug uses.
> +AssignedDevInfo *get_assigned_device(int pcibus, int slot)
> +{
> + AssignedDevice *assigned_dev = NULL;
> + AssignedDevInfo *adev = NULL;
> +
> + LIST_FOREACH(adev, &adev_head, next) {
> + assigned_dev = adev->assigned_dev;
> + if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
> + PCI_SLOT(assigned_dev->dev.devfn) == slot)
> + return adev;
> + }
> +
> + return NULL;
> +}
Fine.
> /* The pci config space got updated. Check if irq numbers have changed
> * for our devices
> */
> @@ -496,38 +606,12 @@ void assigned_dev_update_irqs()
>
> adev = LIST_FIRST(&adev_head);
> while (adev) {
> - AssignedDevInfo *next = LIST_NEXT(adev, next);
> + struct AssignedDevInfo *next = LIST_NEXT(adev, next);
This is a spurious change, we don't need it.
> - AssignedDevice *assigned_dev = adev->assigned_dev;
> - int irq, r;
> + int r;
>
> - irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
> - irq = piix_get_irq(irq);
> -
> -#ifdef TARGET_IA64
> - irq = ipf_map_irq(&assigned_dev->dev, irq);
> -#endif
> -
> - if (irq != assigned_dev->girq) {
> - struct kvm_assigned_irq assigned_irq_data;
> -
> - memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
> - assigned_irq_data.assigned_dev_id =
> - calc_assigned_dev_id(assigned_dev->h_busnr,
> - (uint8_t) assigned_dev->h_devfn);
> - assigned_irq_data.guest_irq = irq;
> - assigned_irq_data.host_irq = assigned_dev->real_device.irq;
> - r = kvm_assign_irq(kvm_context, &assigned_irq_data);
> - if (r < 0) {
> - fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
> - adev->name, strerror(-r));
> - fprintf(stderr, "Perhaps you are assigning a device "
> - "that shares an IRQ with another device?\n");
> - free_assigned_device(adev);
> - adev = next;
> - continue;
> - }
> - assigned_dev->girq = irq;
> - }
> + r = assign_irq(adev);
> + if (r < 0)
> + remove_assigned_device(adev);
Okay, the addition of deassignment is the only functional change.
> @@ -576,29 +659,23 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
> dev->h_busnr = adev->bus;
> dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
>
> - memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
> - assigned_dev_data.assigned_dev_id =
> - calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
> - assigned_dev_data.busnr = dev->h_busnr;
> - assigned_dev_data.devfn = dev->h_devfn;
> -
> -#ifdef KVM_CAP_IOMMU
> - /* We always enable the IOMMU if present
> - * (or when not disabled on the command line)
> - */
> - r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
> - if (r && !adev->disable_iommu)
> - assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
> -#endif
> + /* assign device */
> + r = assign_device(adev);
> + if (r < 0)
> + goto assigned_out;
>
> - r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
> - if (r < 0) {
> - fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
> - adev->name, strerror(-r));
> - return NULL;
> - }
> + /* assign irq for the device */
> + r = assign_irq(adev);
> + if (r < 0)
> + goto assigned_out;
>
> return &dev->dev;
> +
> +assigned_out:
> + deassign_device(adev);
> +out:
> + free_assigned_device(adev);
> + return NULL;
> }
The addition of deassignment and freeing are the only functional
changes.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 3+ messages in thread
* RE: [PATCH 4/4] kvm: qemu: fix hot remove device
2009-02-11 18:59 ` Mark McLoughlin
@ 2009-02-13 2:25 ` Han, Weidong
0 siblings, 0 replies; 3+ messages in thread
From: Han, Weidong @ 2009-02-13 2:25 UTC (permalink / raw)
To: 'Mark McLoughlin'
Cc: 'Avi Kivity', 'kvm@vger.kernel.org'
Good suggestion. I will split it and resend them.
Regards,
Weidong
Mark McLoughlin wrote:
> Hi Weidong,
>
> In general, this looks like a good cleanup.
>
> With deassign_device() fixed to only require assigned_dev_id, I would
> be happy to ACK this whole patch.
>
> However, it would be much, much easier to review the patch if you had
> split it into multiple patches e.g.
>
> 1) Make init_assigned_device() call free_assigned_device on error
> 2) Split out assign_device() with no functional changes
> 3) Split out assign_irq() with no functional changes
> 4) Add deassign_device() and make init_assigned_device() and
> assigned_dev_update_irqs() use it
> 5) Add device hotunplug
>
> On Tue, 2009-02-10 at 20:40 +0800, Han, Weidong wrote:
>> when hot remove a device with iommu, should deassign it from guest,
>> and free it from qemu.
>>
>> assign_dev_update_irqs may not be invoked when hot add a device,
>> so need to assign irq after assign device in init_assigned_device.
>>
>> Signed-off-by: Weidong Han <weidong.han@intel.com>
>> ---
>> qemu/hw/device-assignment.c | 187
>> ++++++++++++++++++++++++++++++-------------
>> qemu/hw/device-assignment.h | 3 +- qemu/hw/device-hotplug.c |
>> 18 ++++- 3 files changed, 151 insertions(+), 57 deletions(-)
>>
>> diff --git a/qemu/hw/device-assignment.c
>> b/qemu/hw/device-assignment.c
>> index e6d2352..6362798 100644
>> --- a/qemu/hw/device-assignment.c
>> +++ b/qemu/hw/device-assignment.c
>> @@ -443,7 +443,7 @@ again:
>>
>> static LIST_HEAD(, AssignedDevInfo) adev_head;
>>
>> -void free_assigned_device(AssignedDevInfo *adev)
>> +static void free_assigned_device(AssignedDevInfo *adev)
>
> Right, if init_assigned_device() fails, the device gets freed, so
> nothing outside of this file needs this.
>
>
>> @@ -487,6 +487,116 @@ static uint32_t calc_assigned_dev_id(uint8_t
>> bus, uint8_t devfn) return (uint32_t)bus << 8 | (uint32_t)devfn;
>> }
>>
>> +static int assign_device(AssignedDevInfo *adev)
>> +{
>> + struct kvm_assigned_pci_dev assigned_dev_data;
>> + AssignedDevice *dev = adev->assigned_dev;
>> + int r;
>> +
>> + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
>> + assigned_dev_data.assigned_dev_id =
>> + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
>> + assigned_dev_data.busnr = dev->h_busnr;
>> + assigned_dev_data.devfn = dev->h_devfn;
>> +
>> +#ifdef KVM_CAP_IOMMU
>> + /* We always enable the IOMMU if present
>> + * (or when not disabled on the command line)
>> + */
>> + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
>> + if (r && !adev->disable_iommu)
>> + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; +#endif
>> +
>> + r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); +
>> if (r < 0) + fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
>> + adev->name, strerror(-r));
>> + return r;
>> +}
>
> Just a copy of the code from init_assigned_device() with no functional
> changes.
>
>> +static void deassign_device(AssignedDevInfo *adev) +{
>> + struct kvm_assigned_pci_dev assigned_dev_data;
>> + AssignedDevice *dev = adev->assigned_dev;
>> + int r;
>> +
>> + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
>> + assigned_dev_data.assigned_dev_id =
>> + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
>
> We should only need to set assigned_dev_id for deassignment, so these
> lines should not be needed:
>
>> + assigned_dev_data.busnr = dev->h_busnr;
>> + assigned_dev_data.devfn = dev->h_devfn;
>> +
>> +#ifdef KVM_CAP_IOMMU
>> + /* We always enable the IOMMU if present
>> + * (or when not disabled on the command line)
>> + */
>> + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
>> + if (r && !adev->disable_iommu)
>> + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
>> +#endif
>
> I think the kernel side needs this fix:
>
> - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
> + if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
> kvm_deassign_device(kvm, match);
>
>> + if (kvm_deassign_pci_device(kvm_context, &assigned_dev_data))
>> + fprintf(stderr, "Could not notify kernel about "
>> + "deassigned device (%x:%x.%x)\n",
>> + dev->h_busnr, PCI_SLOT(dev->h_devfn),
>> PCI_FUNC(dev->h_devfn)); +} +
>> +static int assign_irq(AssignedDevInfo *adev)
>> +{
>> + struct kvm_assigned_irq assigned_irq_data;
>> + AssignedDevice *dev = adev->assigned_dev;
>> + int irq, r = 0;
>> +
>> + irq = pci_map_irq(&dev->dev, dev->intpin);
>> + irq = piix_get_irq(irq);
>> +
>> +#ifdef TARGET_IA64
>> + irq = ipf_map_irq(&dev->dev, irq);
>> +#endif
>> +
>> + if (dev->girq == irq)
>> + return r;
>> +
>> + memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
>> + assigned_irq_data.assigned_dev_id =
>> + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
>> + assigned_irq_data.guest_irq = irq;
>> + assigned_irq_data.host_irq = dev->real_device.irq;
>> + r = kvm_assign_irq(kvm_context, &assigned_irq_data); + if (r
>> < 0) { + fprintf(stderr, "Failed to assign irq for \"%s\":
>> %s\n", + adev->name, strerror(-r));
>> + fprintf(stderr, "Perhaps you are assigning a device "
>> + "that shares an IRQ with another device?\n"); +
>> return r; + }
>> +
>> + dev->girq = irq;
>> + return r;
>> +}
>
> Just a copy of the code in assigned_dev_update_irqs(), no functional
> changes.
>
>> +void remove_assigned_device(AssignedDevInfo *adev) +{
>> + deassign_device(adev);
>> + free_assigned_device(adev);
>> +}
>
> Okay, this is what the irq assignment code uses in its error handling
> and what the device hotunplug uses.
>
>> +AssignedDevInfo *get_assigned_device(int pcibus, int slot) +{
>> + AssignedDevice *assigned_dev = NULL;
>> + AssignedDevInfo *adev = NULL;
>> +
>> + LIST_FOREACH(adev, &adev_head, next) {
>> + assigned_dev = adev->assigned_dev;
>> + if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
>> + PCI_SLOT(assigned_dev->dev.devfn) == slot) +
>> return adev; + }
>> +
>> + return NULL;
>> +}
>
> Fine.
>
>> /* The pci config space got updated. Check if irq numbers have
>> changed
>> * for our devices
>> */
>> @@ -496,38 +606,12 @@ void assigned_dev_update_irqs()
>>
>> adev = LIST_FIRST(&adev_head);
>> while (adev) {
>> - AssignedDevInfo *next = LIST_NEXT(adev, next);
>> + struct AssignedDevInfo *next = LIST_NEXT(adev, next);
>
> This is a spurious change, we don't need it.
>
>> - AssignedDevice *assigned_dev = adev->assigned_dev;
>> - int irq, r;
>> + int r;
>
>>
>> - irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
>> - irq = piix_get_irq(irq);
>> -
>> -#ifdef TARGET_IA64
>> - irq = ipf_map_irq(&assigned_dev->dev, irq);
>> -#endif
>> -
>> - if (irq != assigned_dev->girq) {
>> - struct kvm_assigned_irq assigned_irq_data; -
>> - memset(&assigned_irq_data, 0,
>> sizeof(assigned_irq_data));
>> - assigned_irq_data.assigned_dev_id =
>> - calc_assigned_dev_id(assigned_dev->h_busnr,
>> - (uint8_t)
>> assigned_dev->h_devfn);
>> - assigned_irq_data.guest_irq = irq;
>> - assigned_irq_data.host_irq =
>> assigned_dev->real_device.irq;
>> - r = kvm_assign_irq(kvm_context, &assigned_irq_data);
>> - if (r < 0) {
>> - fprintf(stderr, "Failed to assign irq for \"%s\":
>> %s\n",
>> - adev->name, strerror(-r));
>> - fprintf(stderr, "Perhaps you are assigning a device
>> "
>> - "that shares an IRQ with another
>> device?\n");
>> - free_assigned_device(adev);
>> - adev = next;
>> - continue;
>> - }
>> - assigned_dev->girq = irq;
>> - }
>> + r = assign_irq(adev);
>> + if (r < 0)
>> + remove_assigned_device(adev);
>
> Okay, the addition of deassignment is the only functional change.
>
>> @@ -576,29 +659,23 @@ struct PCIDevice
>> *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
>> dev->h_busnr = adev->bus; dev->h_devfn = PCI_DEVFN(adev->dev,
>> adev->func);
>>
>> - memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
>> - assigned_dev_data.assigned_dev_id =
>> - calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
>> - assigned_dev_data.busnr = dev->h_busnr;
>> - assigned_dev_data.devfn = dev->h_devfn;
>> -
>> -#ifdef KVM_CAP_IOMMU
>> - /* We always enable the IOMMU if present
>> - * (or when not disabled on the command line)
>> - */
>> - r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
>> - if (r && !adev->disable_iommu)
>> - assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
>> -#endif
>> + /* assign device */
>> + r = assign_device(adev);
>> + if (r < 0)
>> + goto assigned_out;
>>
>> - r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
>> - if (r < 0) {
>> - fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
>> - adev->name, strerror(-r));
>> - return NULL;
>> - }
>> + /* assign irq for the device */
>> + r = assign_irq(adev);
>> + if (r < 0)
>> + goto assigned_out;
>>
>> return &dev->dev;
>> +
>> +assigned_out:
>> + deassign_device(adev);
>> +out:
>> + free_assigned_device(adev);
>> + return NULL;
>> }
>
> The addition of deassignment and freeing are the only functional
> changes.
>
> Cheers,
> Mark.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-02-13 2:25 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-10 12:40 [PATCH 4/4] kvm: qemu: fix hot remove device Han, Weidong
2009-02-11 18:59 ` Mark McLoughlin
2009-02-13 2:25 ` Han, Weidong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox