qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
@ 2012-03-07  0:13 Alex Williamson
  2012-03-07  0:13 ` [Qemu-devel] [PATCH 1/6] acpi_piix4: Disallow write to up/down PCI hotplug registers Alex Williamson
                   ` (6 more replies)
  0 siblings, 7 replies; 19+ messages in thread
From: Alex Williamson @ 2012-03-07  0:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.williamson, ddutile, gleb, mst

Here's a re-work of the patch that added _STA for the purpose of
using it as an ack from the guest.  Instead of that, add a notifier
for device access.  Once the guest reads from device config space,
it owns it.  Until that point, we can remove it directly.  As pointed
out by MST, this passes test b) below, which the _STA method would not.
As a bonus, no bios change is required for this.  Patches 5 & 6 are
just cleanups that can be applied independently.  Thanks,

Alex

Tested using Linux guest:
a) without acpiphp loaded:
    - device_add (nothing happens)
    - device_del (device removed directly)
b) without acpiphp loaded:
    - device_add (nothing happens)
    - echo 1 > /sys/bus/pci/rescan (device discovered)
    - device_del (nothing happens, guest owns device)
    - modprobe acpiphp
    - device_del (guest releases device)
c) with acpiphp loaded:
    - device_add/del behave as expected (automatic add + coordinated removal)
Tested using WinXP guest:
    - device_add/del behave as expected (automatic add + coordinated removal)

---

Alex Williamson (6):
      api_piix4: Remove PCI_RMV_BASE write code
      acpi_piix4: Use pci_get/set_byte
      acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path
      pci: Add notifier for device probing
      acpi_piix4: Only allow writes to PCI hotplug eject register
      acpi_piix4: Disallow write to up/down PCI hotplug registers


 hw/acpi_piix4.c |  175 ++++++++++++++++++++++++++++---------------------------
 hw/pci_host.c   |   19 ++++++
 hw/pci_host.h   |    2 +
 3 files changed, 111 insertions(+), 85 deletions(-)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 1/6] acpi_piix4: Disallow write to up/down PCI hotplug registers
  2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
@ 2012-03-07  0:13 ` Alex Williamson
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 2/6] acpi_piix4: Only allow writes to PCI hotplug eject register Alex Williamson
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Alex Williamson @ 2012-03-07  0:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.williamson, ddutile, gleb, mst

These are never written.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/acpi_piix4.c |   45 +++++++++++++--------------------------------
 1 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 30d37f9..5960b7f 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -41,7 +41,8 @@
 #define GPE_BASE 0xafe0
 #define PROC_BASE 0xaf00
 #define GPE_LEN 4
-#define PCI_BASE 0xae00
+#define PCI_UP_BASE 0xae00
+#define PCI_DOWN_BASE 0xae04
 #define PCI_EJ_BASE 0xae08
 #define PCI_RMV_BASE 0xae0c
 
@@ -472,40 +473,22 @@ static void gpe_writeb(void *opaque, uint32_t addr, uint32_t val)
     PIIX4_DPRINTF("gpe write %x <== %d\n", addr, val);
 }
 
-static uint32_t pcihotplug_read(void *opaque, uint32_t addr)
+static uint32_t pci_updown_read(void *opaque, uint32_t addr)
 {
-    uint32_t val = 0;
-    struct pci_status *g = opaque;
-    switch (addr) {
-        case PCI_BASE:
-            val = g->up;
-            break;
-        case PCI_BASE + 4:
-            val = g->down;
-            break;
-        default:
-            break;
+    PIIX4PMState *s = opaque;
+    uint32_t val;
+
+    if (addr == PCI_UP_BASE) {
+        val = s->pci0_status.up;
+    } else {
+        val = s->pci0_status.down;
     }
 
-    PIIX4_DPRINTF("pcihotplug read %x == %x\n", addr, val);
+    PIIX4_DPRINTF("pci %s read %x\n",
+                  (addr == PCI_UP_BASE) ? "up" : "down", val);
     return val;
 }
 
-static void pcihotplug_write(void *opaque, uint32_t addr, uint32_t val)
-{
-    struct pci_status *g = opaque;
-    switch (addr) {
-        case PCI_BASE:
-            g->up = val;
-            break;
-        case PCI_BASE + 4:
-            g->down = val;
-            break;
-   }
-
-    PIIX4_DPRINTF("pcihotplug write %x <== %d\n", addr, val);
-}
-
 static uint32_t pciej_read(void *opaque, uint32_t addr)
 {
     PIIX4_DPRINTF("pciej read %x\n", addr);
@@ -549,7 +532,6 @@ static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
 
 static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s)
 {
-    struct pci_status *pci0_status = &s->pci0_status;
     int i = 0, cpus = smp_cpus;
 
     while (cpus > 0) {
@@ -564,8 +546,7 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s)
     register_ioport_write(PROC_BASE, 32, 1, gpe_writeb, s);
     register_ioport_read(PROC_BASE, 32, 1,  gpe_readb, s);
 
-    register_ioport_write(PCI_BASE, 8, 4, pcihotplug_write, pci0_status);
-    register_ioport_read(PCI_BASE, 8, 4,  pcihotplug_read, pci0_status);
+    register_ioport_read(PCI_UP_BASE, 8, 4, pci_updown_read, s);
 
     register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, bus);
     register_ioport_read(PCI_EJ_BASE, 4, 4,  pciej_read, bus);

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 2/6] acpi_piix4: Only allow writes to PCI hotplug eject register
  2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
  2012-03-07  0:13 ` [Qemu-devel] [PATCH 1/6] acpi_piix4: Disallow write to up/down PCI hotplug registers Alex Williamson
@ 2012-03-07  0:14 ` Alex Williamson
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 3/6] pci: Add notifier for device probing Alex Williamson
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Alex Williamson @ 2012-03-07  0:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.williamson, ddutile, gleb, mst

This is never read.  We can also derive bus from the write handler,
which later makes this easier to call directly.  Note that pciej_write
was actually called with (PCIBus *)dev->bus, which is cast as a void*
allowing us to pretend it's a BusState*.  Fix this so we don't depend
on the BusState location within PCIBus.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/acpi_piix4.c |   13 ++++---------
 1 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 5960b7f..4d88e23 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -489,15 +489,11 @@ static uint32_t pci_updown_read(void *opaque, uint32_t addr)
     return val;
 }
 
-static uint32_t pciej_read(void *opaque, uint32_t addr)
-{
-    PIIX4_DPRINTF("pciej read %x\n", addr);
-    return 0;
-}
-
 static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
 {
-    BusState *bus = opaque;
+    PIIX4PMState *s = opaque;
+    PCIDevice *dev = &s->dev;
+    BusState *bus = qdev_get_parent_bus(&dev->qdev);
     DeviceState *qdev, *next;
     int slot = ffs(val) - 1;
 
@@ -548,8 +544,7 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s)
 
     register_ioport_read(PCI_UP_BASE, 8, 4, pci_updown_read, s);
 
-    register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, bus);
-    register_ioport_read(PCI_EJ_BASE, 4, 4,  pciej_read, bus);
+    register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, s);
 
     register_ioport_write(PCI_RMV_BASE, 4, 4, pcirmv_write, s);
     register_ioport_read(PCI_RMV_BASE, 4, 4,  pcirmv_read, s);

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 3/6] pci: Add notifier for device probing
  2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
  2012-03-07  0:13 ` [Qemu-devel] [PATCH 1/6] acpi_piix4: Disallow write to up/down PCI hotplug registers Alex Williamson
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 2/6] acpi_piix4: Only allow writes to PCI hotplug eject register Alex Williamson
@ 2012-03-07  0:14 ` Alex Williamson
  2012-03-07  9:19   ` Paolo Bonzini
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 4/6] acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path Alex Williamson
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Alex Williamson @ 2012-03-07  0:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.williamson, ddutile, gleb, mst

It's sometimes useful to know when the guest probes a device.  For
example, during PCI hotplug once the guest accesses the device, we
can no longer remove it without guest permission.  Add a notifier
list that gets called when the vendor ID of a device is read.  The
PCI spec mandates that configuration software must read this
register when probing the slot.  Ideally we'd notify only on the
first vendor ID read, but this requires state, which has migration
implications.  For now, notify on every read of the vendor ID.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/pci_host.c |   19 +++++++++++++++++++
 hw/pci_host.h |    2 ++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/hw/pci_host.c b/hw/pci_host.c
index 44c6c20..7eb1eca 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -20,6 +20,7 @@
 
 #include "pci.h"
 #include "pci_host.h"
+#include "notify.h"
 
 /* debug PCI */
 //#define DEBUG_PCI
@@ -31,6 +32,19 @@ do { printf("pci_host_data: " fmt , ## __VA_ARGS__); } while (0)
 #define PCI_DPRINTF(fmt, ...)
 #endif
 
+static NotifierList pci_host_dev_probe_notifiers =
+    NOTIFIER_LIST_INITIALIZER(pci_host_dev_probe_notifiers);
+
+void pci_host_add_dev_probe_notifier(Notifier *notify)
+{
+    notifier_list_add(&pci_host_dev_probe_notifiers, notify);
+}
+
+void pci_host_remove_dev_probe_notifier(Notifier *notify)
+{
+    notifier_list_remove(&pci_host_dev_probe_notifiers, notify);
+}
+
 /*
  * PCI address
  * bit 16 - 24: bus number
@@ -58,6 +72,11 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
                                      uint32_t limit, uint32_t len)
 {
     assert(len <= 4);
+
+    if (addr == PCI_VENDOR_ID) {
+        notifier_list_notify(&pci_host_dev_probe_notifiers, pci_dev);
+    }
+
     return pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
 }
 
diff --git a/hw/pci_host.h b/hw/pci_host.h
index 359e38f..c5491ca 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -45,6 +45,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
                                   uint32_t limit, uint32_t val, uint32_t len);
 uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
                                      uint32_t limit, uint32_t len);
+void pci_host_add_dev_probe_notifier(Notifier *notify);
+void pci_host_remove_dev_probe_notifier(Notifier *notify);
 
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 4/6] acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path
  2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
                   ` (2 preceding siblings ...)
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 3/6] pci: Add notifier for device probing Alex Williamson
@ 2012-03-07  0:14 ` Alex Williamson
  2012-03-11 21:57   ` Michael S. Tsirkin
  2012-03-07  0:15 ` [Qemu-devel] [PATCH 5/6] acpi_piix4: Use pci_get/set_byte Alex Williamson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Alex Williamson @ 2012-03-07  0:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.williamson, ddutile, gleb, mst

When a guest probes a device, clear the "up" bit in the hotplug
register.  This allows us to enable a non-ACPI remove path for
devices added, but never accessed by the guest.  This is useful
when a guest does not have ACPI PCI hotplug support to avoid losing
devices to a guest.  We also now individually track bits for "up"
and "down" rather than clearing both on each PCI hotplug action.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/acpi_piix4.c |   58 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 4d88e23..7e766e5 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -27,6 +27,7 @@
 #include "sysemu.h"
 #include "range.h"
 #include "ioport.h"
+#include "pci_host.h"
 
 //#define DEBUG
 
@@ -75,6 +76,7 @@ typedef struct PIIX4PMState {
     qemu_irq smi_irq;
     int kvm_enabled;
     Notifier machine_ready;
+    Notifier device_probe;
 
     /* for pci hotplug */
     ACPIGPE gpe;
@@ -336,6 +338,16 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque)
 
 }
 
+static void piix4_pm_device_probe(Notifier *n, void *opaque)
+{
+    PIIX4PMState *s = container_of(n, PIIX4PMState, device_probe);
+    PCIDevice *pdev = opaque;
+
+    if (pci_find_domain(pdev->bus) == 0 && pci_bus_num(pdev->bus) == 0) {
+        s->pci0_status.up &= ~(1U << PCI_SLOT(pdev->devfn));
+    }
+}
+
 static PIIX4PMState *global_piix4_pm_state; /* cpu hotadd */
 
 static int piix4_pm_initfn(PCIDevice *dev)
@@ -383,6 +395,8 @@ static int piix4_pm_initfn(PCIDevice *dev)
     qemu_add_machine_init_done_notifier(&s->machine_ready);
     qemu_register_reset(piix4_reset, s);
     piix4_acpi_system_hot_add_init(dev->bus, s);
+    s->device_probe.notify = piix4_pm_device_probe;
+    pci_host_add_dev_probe_notifier(&s->device_probe);
 
     return 0;
 }
@@ -502,6 +516,7 @@ static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
         PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
         if (PCI_SLOT(dev->devfn) == slot && !pc->no_hotplug) {
             qdev_free(qdev);
+            s->pci0_status.down &= ~(1U << slot);
         }
     }
 
@@ -594,16 +609,41 @@ void qemu_system_cpu_hot_add(int cpu, int state)
 }
 #endif
 
-static void enable_device(PIIX4PMState *s, int slot)
+static int enable_device(PIIX4PMState *s, int slot)
 {
+    uint32_t mask = 1U << slot;
+
+    if ((s->pci0_status.up | s->pci0_status.down) & mask) {
+        return -1;
+    }
+
     s->gpe.sts[0] |= PIIX4_PCI_HOTPLUG_STATUS;
-    s->pci0_status.up |= (1 << slot);
+    s->pci0_status.up |= mask;
+
+    pm_update_sci(s);
+    return 0;
 }
 
-static void disable_device(PIIX4PMState *s, int slot)
+static int disable_device(PIIX4PMState *s, int slot)
 {
+    uint32_t mask = 1U << slot;
+
+    if (s->pci0_status.up & mask) {
+        s->pci0_status.up &= ~mask;
+        pciej_write(s, PCI_EJ_BASE, mask);
+
+        /* Clear GPE PCI hotplug status if nothing left pending */
+        if (!(s->pci0_status.up | s->pci0_status.down)) {
+            s->gpe.sts[0] &= ~PIIX4_PCI_HOTPLUG_STATUS;
+        }
+        return 0;
+    }
+
     s->gpe.sts[0] |= PIIX4_PCI_HOTPLUG_STATUS;
-    s->pci0_status.down |= (1 << slot);
+    s->pci0_status.down |= mask;
+
+    pm_update_sci(s);
+    return 0;
 }
 
 static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
@@ -620,15 +660,9 @@ static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
         return 0;
     }
 
-    s->pci0_status.up = 0;
-    s->pci0_status.down = 0;
     if (state == PCI_HOTPLUG_ENABLED) {
-        enable_device(s, slot);
+        return enable_device(s, slot);
     } else {
-        disable_device(s, slot);
+        return disable_device(s, slot);
     }
-
-    pm_update_sci(s);
-
-    return 0;
 }

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 5/6] acpi_piix4: Use pci_get/set_byte
  2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
                   ` (3 preceding siblings ...)
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 4/6] acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path Alex Williamson
@ 2012-03-07  0:15 ` Alex Williamson
  2012-03-07  0:15 ` [Qemu-devel] [PATCH 6/6] api_piix4: Remove PCI_RMV_BASE write code Alex Williamson
  2012-03-07 12:43 ` [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Gleb Natapov
  6 siblings, 0 replies; 19+ messages in thread
From: Alex Williamson @ 2012-03-07  0:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.williamson, ddutile, gleb, mst

Remove stray direct access

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/acpi_piix4.c |   53 +++++++++++++++++++++++++++--------------------------
 1 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 7e766e5..4640e2e 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -178,11 +178,12 @@ static const IORangeOps pm_iorange_ops = {
 static void apm_ctrl_changed(uint32_t val, void *arg)
 {
     PIIX4PMState *s = arg;
+    PCIDevice *dev = &s->dev;
 
     /* ACPI specs 3.0, 4.7.2.5 */
     acpi_pm1_cnt_update(&s->pm1_cnt, val == ACPI_ENABLE, val == ACPI_DISABLE);
 
-    if (s->dev.config[0x5b] & (1 << 1)) {
+    if (pci_get_byte(dev->config + 0x5b) & (1 << 1)) {
         if (s->smi_irq) {
             qemu_irq_raise(s->smi_irq);
         }
@@ -196,10 +197,11 @@ static void acpi_dbg_writel(void *opaque, uint32_t addr, uint32_t val)
 
 static void pm_io_space_update(PIIX4PMState *s)
 {
+    PCIDevice *dev = &s->dev;
     uint32_t pm_io_base;
 
-    if (s->dev.config[0x80] & 1) {
-        pm_io_base = le32_to_cpu(*(uint32_t *)(s->dev.config + 0x40));
+    if (pci_get_byte(dev->config + 0x80) & 1) {
+        pm_io_base = pci_get_long(dev->config + 0x40);
         pm_io_base &= 0xffc0;
 
         /* XXX: need to improve memory and ioport allocation */
@@ -302,16 +304,16 @@ static void piix4_update_hotplug(PIIX4PMState *s)
 static void piix4_reset(void *opaque)
 {
     PIIX4PMState *s = opaque;
-    uint8_t *pci_conf = s->dev.config;
+    PCIDevice *dev = &s->dev;
 
-    pci_conf[0x58] = 0;
-    pci_conf[0x59] = 0;
-    pci_conf[0x5a] = 0;
-    pci_conf[0x5b] = 0;
+    pci_set_byte(dev->config + 0x58, 0);
+    pci_set_byte(dev->config + 0x59, 0);
+    pci_set_byte(dev->config + 0x5a, 0);
+    pci_set_byte(dev->config + 0x5b, 0);
 
     if (s->kvm_enabled) {
         /* Mark SMM as already inited (until KVM supports SMM). */
-        pci_conf[0x5B] = 0x02;
+        pci_set_byte(dev->config + 0x5B, 0x02);
     }
     piix4_update_hotplug(s);
 }
@@ -328,13 +330,14 @@ static void piix4_powerdown(void *opaque, int irq, int power_failing)
 static void piix4_pm_machine_ready(Notifier *n, void *opaque)
 {
     PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready);
-    uint8_t *pci_conf;
+    PCIDevice *dev = &s->dev;
 
-    pci_conf = s->dev.config;
-    pci_conf[0x5f] = (isa_is_ioport_assigned(0x378) ? 0x80 : 0) | 0x10;
-    pci_conf[0x63] = 0x60;
-    pci_conf[0x67] = (isa_is_ioport_assigned(0x3f8) ? 0x08 : 0) |
-	(isa_is_ioport_assigned(0x2f8) ? 0x90 : 0);
+    pci_set_byte(dev->config + 0x5f,
+                 (isa_is_ioport_assigned(0x378) ? 0x80 : 0) | 0x10);
+    pci_set_byte(dev->config + 0x63, 0x60);
+    pci_set_byte(dev->config + 0x67,
+                 (isa_is_ioport_assigned(0x3f8) ? 0x08 : 0) |
+                 (isa_is_ioport_assigned(0x2f8) ? 0x90 : 0));
 
 }
 
@@ -353,18 +356,16 @@ static PIIX4PMState *global_piix4_pm_state; /* cpu hotadd */
 static int piix4_pm_initfn(PCIDevice *dev)
 {
     PIIX4PMState *s = DO_UPCAST(PIIX4PMState, dev, dev);
-    uint8_t *pci_conf;
 
     /* for cpu hotadd */
     global_piix4_pm_state = s;
 
-    pci_conf = s->dev.config;
-    pci_conf[0x06] = 0x80;
-    pci_conf[0x07] = 0x02;
-    pci_conf[0x09] = 0x00;
-    pci_conf[0x3d] = 0x01; // interrupt pin 1
+    pci_set_byte(dev->config + 0x06, 0x80);
+    pci_set_byte(dev->config + 0x07, 0x02);
+    pci_set_byte(dev->config + 0x09, 0x00);
+    pci_set_byte(dev->config + 0x3d, 0x01); /* interrupt pin 1 */
 
-    pci_conf[0x40] = 0x01; /* PM io base read only bit */
+    pci_set_byte(dev->config + 0x40, 0x01); /* PM io base read only bit */
 
     /* APM */
     apm_init(&s->apm, apm_ctrl_changed, s);
@@ -374,14 +375,14 @@ static int piix4_pm_initfn(PCIDevice *dev)
     if (s->kvm_enabled) {
         /* Mark SMM as already inited to prevent SMM from running.  KVM does not
          * support SMM mode. */
-        pci_conf[0x5B] = 0x02;
+        pci_set_byte(dev->config + 0x5B, 0x02);
     }
 
     /* XXX: which specification is used ? The i82731AB has different
        mappings */
-    pci_conf[0x90] = s->smb_io_base | 1;
-    pci_conf[0x91] = s->smb_io_base >> 8;
-    pci_conf[0xd2] = 0x09;
+    pci_set_byte(dev->config + 0x90, s->smb_io_base | 1);
+    pci_set_byte(dev->config + 0x91, s->smb_io_base >> 8);
+    pci_set_byte(dev->config + 0xd2, 0x09);
     register_ioport_write(s->smb_io_base, 64, 1, smb_ioport_writeb, &s->smb);
     register_ioport_read(s->smb_io_base, 64, 1, smb_ioport_readb, &s->smb);
 

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 6/6] api_piix4: Remove PCI_RMV_BASE write code
  2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
                   ` (4 preceding siblings ...)
  2012-03-07  0:15 ` [Qemu-devel] [PATCH 5/6] acpi_piix4: Use pci_get/set_byte Alex Williamson
@ 2012-03-07  0:15 ` Alex Williamson
  2012-03-07 12:43 ` [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Gleb Natapov
  6 siblings, 0 replies; 19+ messages in thread
From: Alex Williamson @ 2012-03-07  0:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.williamson, ddutile, gleb, mst

Achieves the same result with less code.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/acpi_piix4.c |    6 ------
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 4640e2e..3db0efd 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -532,11 +532,6 @@ static uint32_t pcirmv_read(void *opaque, uint32_t addr)
     return s->pci0_hotplug_enable;
 }
 
-static void pcirmv_write(void *opaque, uint32_t addr, uint32_t val)
-{
-    return;
-}
-
 extern const char *global_cpu_model;
 
 static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
@@ -562,7 +557,6 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s)
 
     register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, s);
 
-    register_ioport_write(PCI_RMV_BASE, 4, 4, pcirmv_write, s);
     register_ioport_read(PCI_RMV_BASE, 4, 4,  pcirmv_read, s);
 
     pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 3/6] pci: Add notifier for device probing
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 3/6] pci: Add notifier for device probing Alex Williamson
@ 2012-03-07  9:19   ` Paolo Bonzini
  2012-03-07 20:12     ` Alex Williamson
  0 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2012-03-07  9:19 UTC (permalink / raw)
  To: Alex Williamson; +Cc: ddutile, qemu-devel, gleb, mst

Il 07/03/2012 01:14, Alex Williamson ha scritto:
> +void pci_host_remove_dev_probe_notifier(Notifier *notify)
> +{
> +    notifier_list_remove(&pci_host_dev_probe_notifiers, notify);
> +}
> +

Looks like qemu-kvm needs a merge. :)

Upstream has notifier_remove, not notifier_list_remove.

Paolo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
                   ` (5 preceding siblings ...)
  2012-03-07  0:15 ` [Qemu-devel] [PATCH 6/6] api_piix4: Remove PCI_RMV_BASE write code Alex Williamson
@ 2012-03-07 12:43 ` Gleb Natapov
  2012-03-07 17:20   ` Alex Williamson
  6 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-03-07 12:43 UTC (permalink / raw)
  To: Alex Williamson; +Cc: ddutile, qemu-devel, mst

On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> Here's a re-work of the patch that added _STA for the purpose of
> using it as an ack from the guest.  Instead of that, add a notifier
> for device access.  Once the guest reads from device config space,
> it owns it.  Until that point, we can remove it directly.  As pointed
> out by MST, this passes test b) below, which the _STA method would not.
> As a bonus, no bios change is required for this.  Patches 5 & 6 are
> just cleanups that can be applied independently.  Thanks,
> 
While I agree with Michael that using _STA as ack is a hack I think
this approach is not less of a hack. It is unlikely that this is how it
work on bare metal and we should follow real HW if possible.

> Alex
> 
> Tested using Linux guest:
> a) without acpiphp loaded:
>     - device_add (nothing happens)
>     - device_del (device removed directly)
How it works on real HW? On non ACPI compliant guest hot plug unplug is
not suppose to work.

> b) without acpiphp loaded:
>     - device_add (nothing happens)
>     - echo 1 > /sys/bus/pci/rescan (device discovered)
>     - device_del (nothing happens, guest owns device)
So guest can block a device from being ever removed?

>     - modprobe acpiphp
>     - device_del (guest releases device)
> c) with acpiphp loaded:
>     - device_add/del behave as expected (automatic add + coordinated removal)
> Tested using WinXP guest:
>     - device_add/del behave as expected (automatic add + coordinated removal)
> 
> ---
> 
> Alex Williamson (6):
>       api_piix4: Remove PCI_RMV_BASE write code
>       acpi_piix4: Use pci_get/set_byte
>       acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path
>       pci: Add notifier for device probing
>       acpi_piix4: Only allow writes to PCI hotplug eject register
>       acpi_piix4: Disallow write to up/down PCI hotplug registers
> 
> 
>  hw/acpi_piix4.c |  175 ++++++++++++++++++++++++++++---------------------------
>  hw/pci_host.c   |   19 ++++++
>  hw/pci_host.h   |    2 +
>  3 files changed, 111 insertions(+), 85 deletions(-)

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 12:43 ` [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Gleb Natapov
@ 2012-03-07 17:20   ` Alex Williamson
  2012-03-07 18:59     ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Alex Williamson @ 2012-03-07 17:20 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: ddutile, qemu-devel, mst

On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > Here's a re-work of the patch that added _STA for the purpose of
> > using it as an ack from the guest.  Instead of that, add a notifier
> > for device access.  Once the guest reads from device config space,
> > it owns it.  Until that point, we can remove it directly.  As pointed
> > out by MST, this passes test b) below, which the _STA method would not.
> > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > just cleanups that can be applied independently.  Thanks,
> > 
> While I agree with Michael that using _STA as ack is a hack I think
> this approach is not less of a hack. It is unlikely that this is how it
> work on bare metal and we should follow real HW if possible.

The test below is the only thing that proved to me it was less of a
hack.  Introducing a _LCK method for a slot may be another way to do
this.  Unfortunately it's not required that the OSPM call _LCK and it's
not mentioned in the msft document referenced previously.
 
> > Tested using Linux guest:
> > a) without acpiphp loaded:
> >     - device_add (nothing happens)
> >     - device_del (device removed directly)
> How it works on real HW? On non ACPI compliant guest hot plug unplug is
> not suppose to work.

We're dealing with ACPI hotplug, so it's not as completely defined as
something like SHPC.  My understanding is that add and remove can be
initiated by either an OS defined software method or via a physical
button on the slot.  What we do in qemu is more akin to the physical
button.  The user places a card in a slot, presses the button, which
will signal the OS to power up the slot, discover the device, and start
making use of it.  An indicator LED is optional in the PCI hotplug spec,
so the user is left to check the OS or look for device activity to
determine if the card was inserted.  If nothing happens, I suspect the
user would likely pull the card back out, which is effectively what
we're allowing here.

> > b) without acpiphp loaded:
> >     - device_add (nothing happens)
> >     - echo 1 > /sys/bus/pci/rescan (device discovered)
> >     - device_del (nothing happens, guest owns device)
> So guest can block a device from being ever removed?

Yes, surprise removal is beyond the scope of this series.  We can always
shutdown a guest to remove the device, but surprise removal risks the
integrity of the guest.  Thanks,

Alex

> >     - modprobe acpiphp
> >     - device_del (guest releases device)
> > c) with acpiphp loaded:
> >     - device_add/del behave as expected (automatic add + coordinated removal)
> > Tested using WinXP guest:
> >     - device_add/del behave as expected (automatic add + coordinated removal)
> > 
> > ---
> > 
> > Alex Williamson (6):
> >       api_piix4: Remove PCI_RMV_BASE write code
> >       acpi_piix4: Use pci_get/set_byte
> >       acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path
> >       pci: Add notifier for device probing
> >       acpi_piix4: Only allow writes to PCI hotplug eject register
> >       acpi_piix4: Disallow write to up/down PCI hotplug registers
> > 
> > 
> >  hw/acpi_piix4.c |  175 ++++++++++++++++++++++++++++---------------------------
> >  hw/pci_host.c   |   19 ++++++
> >  hw/pci_host.h   |    2 +
> >  3 files changed, 111 insertions(+), 85 deletions(-)
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 17:20   ` Alex Williamson
@ 2012-03-07 18:59     ` Gleb Natapov
  2012-03-07 19:51       ` Alex Williamson
  0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-03-07 18:59 UTC (permalink / raw)
  To: Alex Williamson; +Cc: ddutile, qemu-devel, mst

On Wed, Mar 07, 2012 at 10:20:49AM -0700, Alex Williamson wrote:
> On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> > On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > > Here's a re-work of the patch that added _STA for the purpose of
> > > using it as an ack from the guest.  Instead of that, add a notifier
> > > for device access.  Once the guest reads from device config space,
> > > it owns it.  Until that point, we can remove it directly.  As pointed
> > > out by MST, this passes test b) below, which the _STA method would not.
> > > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > > just cleanups that can be applied independently.  Thanks,
> > > 
> > While I agree with Michael that using _STA as ack is a hack I think
> > this approach is not less of a hack. It is unlikely that this is how it
> > work on bare metal and we should follow real HW if possible.
> 
> The test below is the only thing that proved to me it was less of a
> hack.  Introducing a _LCK method for a slot may be another way to do
> this.  Unfortunately it's not required that the OSPM call _LCK and it's
> not mentioned in the msft document referenced previously.
>  
I do not understand where this requirement, that device_del should work
if non-acpi guest is running, is coming from? Because if there is no
such requirement then the hack is not needed.

> > > Tested using Linux guest:
> > > a) without acpiphp loaded:
> > >     - device_add (nothing happens)
> > >     - device_del (device removed directly)
> > How it works on real HW? On non ACPI compliant guest hot plug unplug is
> > not suppose to work.
> 
> We're dealing with ACPI hotplug, so it's not as completely defined as
> something like SHPC.  My understanding is that add and remove can be
> initiated by either an OS defined software method or via a physical
> button on the slot.  What we do in qemu is more akin to the physical
> button.  The user places a card in a slot, presses the button, which
> will signal the OS to power up the slot, discover the device, and start
> making use of it.  An indicator LED is optional in the PCI hotplug spec,
> so the user is left to check the OS or look for device activity to
> determine if the card was inserted.  If nothing happens, I suspect the
> user would likely pull the card back out, which is effectively what
> we're allowing here.
> 
> > > b) without acpiphp loaded:
> > >     - device_add (nothing happens)
> > >     - echo 1 > /sys/bus/pci/rescan (device discovered)
> > >     - device_del (nothing happens, guest owns device)
> > So guest can block a device from being ever removed?
> 
> Yes, surprise removal is beyond the scope of this series.  We can always
> shutdown a guest to remove the device, but surprise removal risks the
> integrity of the guest.  Thanks,
Surprise removal is a guest driver issue, not ACPI AFAIK. 

> 
> Alex
> 
> > >     - modprobe acpiphp
> > >     - device_del (guest releases device)
> > > c) with acpiphp loaded:
> > >     - device_add/del behave as expected (automatic add + coordinated removal)
> > > Tested using WinXP guest:
> > >     - device_add/del behave as expected (automatic add + coordinated removal)
> > > 
> > > ---
> > > 
> > > Alex Williamson (6):
> > >       api_piix4: Remove PCI_RMV_BASE write code
> > >       acpi_piix4: Use pci_get/set_byte
> > >       acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path
> > >       pci: Add notifier for device probing
> > >       acpi_piix4: Only allow writes to PCI hotplug eject register
> > >       acpi_piix4: Disallow write to up/down PCI hotplug registers
> > > 
> > > 
> > >  hw/acpi_piix4.c |  175 ++++++++++++++++++++++++++++---------------------------
> > >  hw/pci_host.c   |   19 ++++++
> > >  hw/pci_host.h   |    2 +
> > >  3 files changed, 111 insertions(+), 85 deletions(-)
> > 
> > --
> > 			Gleb.
> 
> 

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 18:59     ` Gleb Natapov
@ 2012-03-07 19:51       ` Alex Williamson
  2012-03-07 21:00         ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Alex Williamson @ 2012-03-07 19:51 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: ddutile, qemu-devel, mst

On Wed, 2012-03-07 at 20:59 +0200, Gleb Natapov wrote:
> On Wed, Mar 07, 2012 at 10:20:49AM -0700, Alex Williamson wrote:
> > On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> > > On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > > > Here's a re-work of the patch that added _STA for the purpose of
> > > > using it as an ack from the guest.  Instead of that, add a notifier
> > > > for device access.  Once the guest reads from device config space,
> > > > it owns it.  Until that point, we can remove it directly.  As pointed
> > > > out by MST, this passes test b) below, which the _STA method would not.
> > > > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > > > just cleanups that can be applied independently.  Thanks,
> > > > 
> > > While I agree with Michael that using _STA as ack is a hack I think
> > > this approach is not less of a hack. It is unlikely that this is how it
> > > work on bare metal and we should follow real HW if possible.
> > 
> > The test below is the only thing that proved to me it was less of a
> > hack.  Introducing a _LCK method for a slot may be another way to do
> > this.  Unfortunately it's not required that the OSPM call _LCK and it's
> > not mentioned in the msft document referenced previously.
> >  
> I do not understand where this requirement, that device_del should work
> if non-acpi guest is running, is coming from? Because if there is no
> such requirement then the hack is not needed.

Where do I file my TPS report? ;)  This is simply trying to add some
definition to "when is a hot attach completed".  Once we know that, we
can consider the device owned by the guest after that point.  Prior to
that, we can allow the cancellation of a hot attach, by directly
removing the device.  After that point, we have to ask permission from
the guest or deal with surprise removal.

There are two use cases I know of that make the non-ACPI device_del, or
really device_add cancellation, useful.  The first is what we've been
discussing, that not all guests will support ACPI based PCI hotplug and
devices can be lost to the guest until shutdown, including assigned
devices.  The second is something we more commonly run into in testing,
that between and 'add' & 'del' (or del & add), there's some undefined
delay required to prevent us stepping on ourselves.  For instance, if we
do an 'add' immediately followed by a 'del', we clear the 'up' register,
set the 'down' register and hope that the guest removes a device that it
never knew existed.  This code allows that to work as expected, removing
the device even thought the guest never saw it.  In the other direction
(del->add), PCI won't create a device in a slot that's already occupied,
so we never actually get to the race there.  Overall, an improvement in
usability IMHO.

Once we define an end point for addition, we could actually take it a
step further and add a timeout parameter to device_add, where if the
guest hasn't taken the device before the timeout, we automatically
remove it and device_add returns error.  We might consider doing the
same for device_del.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 3/6] pci: Add notifier for device probing
  2012-03-07  9:19   ` Paolo Bonzini
@ 2012-03-07 20:12     ` Alex Williamson
  0 siblings, 0 replies; 19+ messages in thread
From: Alex Williamson @ 2012-03-07 20:12 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: ddutile, qemu-devel, gleb, mst

On Wed, 2012-03-07 at 10:19 +0100, Paolo Bonzini wrote:
> Il 07/03/2012 01:14, Alex Williamson ha scritto:
> > +void pci_host_remove_dev_probe_notifier(Notifier *notify)
> > +{
> > +    notifier_list_remove(&pci_host_dev_probe_notifiers, notify);
> > +}
> > +
> 
> Looks like qemu-kvm needs a merge. :)
> 
> Upstream has notifier_remove, not notifier_list_remove.

Thanks Paolo,

Alex

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 19:51       ` Alex Williamson
@ 2012-03-07 21:00         ` Gleb Natapov
  2012-03-07 21:44           ` Alex Williamson
  0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-03-07 21:00 UTC (permalink / raw)
  To: Alex Williamson; +Cc: ddutile, qemu-devel, mst

On Wed, Mar 07, 2012 at 12:51:48PM -0700, Alex Williamson wrote:
> On Wed, 2012-03-07 at 20:59 +0200, Gleb Natapov wrote:
> > On Wed, Mar 07, 2012 at 10:20:49AM -0700, Alex Williamson wrote:
> > > On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> > > > On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > > > > Here's a re-work of the patch that added _STA for the purpose of
> > > > > using it as an ack from the guest.  Instead of that, add a notifier
> > > > > for device access.  Once the guest reads from device config space,
> > > > > it owns it.  Until that point, we can remove it directly.  As pointed
> > > > > out by MST, this passes test b) below, which the _STA method would not.
> > > > > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > > > > just cleanups that can be applied independently.  Thanks,
> > > > > 
> > > > While I agree with Michael that using _STA as ack is a hack I think
> > > > this approach is not less of a hack. It is unlikely that this is how it
> > > > work on bare metal and we should follow real HW if possible.
> > > 
> > > The test below is the only thing that proved to me it was less of a
> > > hack.  Introducing a _LCK method for a slot may be another way to do
> > > this.  Unfortunately it's not required that the OSPM call _LCK and it's
> > > not mentioned in the msft document referenced previously.
> > >  
> > I do not understand where this requirement, that device_del should work
> > if non-acpi guest is running, is coming from? Because if there is no
> > such requirement then the hack is not needed.
> 
> Where do I file my TPS report? ;)  This is simply trying to add some
> definition to "when is a hot attach completed".  Once we know that, we
> can consider the device owned by the guest after that point.  Prior to
> that, we can allow the cancellation of a hot attach, by directly
> removing the device.  After that point, we have to ask permission from
> the guest or deal with surprise removal.
> 
That just heuristic based on observation of several guests, no? device_add
sends sci interrupt to a guest. How do we know that some guest will not
get upset if it will not find promised device after getting the interrupt
and executing Notify()?

> There are two use cases I know of that make the non-ACPI device_del, or
> really device_add cancellation, useful.  The first is what we've been
> discussing, that not all guests will support ACPI based PCI hotplug and
> devices can be lost to the guest until shutdown, including assigned
I do not know how non ACPI based PCI hotplug works. I can only assume
that they have a way to notify device that it can be removed. The patch
series is about ACPI though. The patch series also does not solve lost
device problem since guest can rescan the bus and claim device forever.
You showed this in your original mail yourself.

> devices.  The second is something we more commonly run into in testing,
> that between and 'add' & 'del' (or del & add), there's some undefined
> delay required to prevent us stepping on ourselves.  For instance, if we
> do an 'add' immediately followed by a 'del', we clear the 'up' register,
> set the 'down' register and hope that the guest removes a device that it
> never knew existed.  This code allows that to work as expected, removing
That's QEMU problem and it should be solved in QEMU. What if we will not
clear 'up' bit and let guest process both events add and del?

> the device even thought the guest never saw it.  In the other direction
> (del->add), PCI won't create a device in a slot that's already occupied,
> so we never actually get to the race there.  Overall, an improvement in
> usability IMHO.
> 
> Once we define an end point for addition, we could actually take it a
> step further and add a timeout parameter to device_add, where if the
> guest hasn't taken the device before the timeout, we automatically
> remove it and device_add returns error.  We might consider doing the
> same for device_del.  Thanks,
>
IMO end point of addition should be sending of sci interrupt After that
device should not be removed without guest participation. We can provide
forced device_del that yanks device anyway for the cases when device was
erroneously added while non-ACPI guest were running.

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 21:00         ` Gleb Natapov
@ 2012-03-07 21:44           ` Alex Williamson
  2012-03-07 22:17             ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Alex Williamson @ 2012-03-07 21:44 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: ddutile, qemu-devel, mst

On Wed, 2012-03-07 at 23:00 +0200, Gleb Natapov wrote:
> On Wed, Mar 07, 2012 at 12:51:48PM -0700, Alex Williamson wrote:
> > On Wed, 2012-03-07 at 20:59 +0200, Gleb Natapov wrote:
> > > On Wed, Mar 07, 2012 at 10:20:49AM -0700, Alex Williamson wrote:
> > > > On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> > > > > On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > > > > > Here's a re-work of the patch that added _STA for the purpose of
> > > > > > using it as an ack from the guest.  Instead of that, add a notifier
> > > > > > for device access.  Once the guest reads from device config space,
> > > > > > it owns it.  Until that point, we can remove it directly.  As pointed
> > > > > > out by MST, this passes test b) below, which the _STA method would not.
> > > > > > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > > > > > just cleanups that can be applied independently.  Thanks,
> > > > > > 
> > > > > While I agree with Michael that using _STA as ack is a hack I think
> > > > > this approach is not less of a hack. It is unlikely that this is how it
> > > > > work on bare metal and we should follow real HW if possible.
> > > > 
> > > > The test below is the only thing that proved to me it was less of a
> > > > hack.  Introducing a _LCK method for a slot may be another way to do
> > > > this.  Unfortunately it's not required that the OSPM call _LCK and it's
> > > > not mentioned in the msft document referenced previously.
> > > >  
> > > I do not understand where this requirement, that device_del should work
> > > if non-acpi guest is running, is coming from? Because if there is no
> > > such requirement then the hack is not needed.
> > 
> > Where do I file my TPS report? ;)  This is simply trying to add some
> > definition to "when is a hot attach completed".  Once we know that, we
> > can consider the device owned by the guest after that point.  Prior to
> > that, we can allow the cancellation of a hot attach, by directly
> > removing the device.  After that point, we have to ask permission from
> > the guest or deal with surprise removal.
> > 
> That just heuristic based on observation of several guests, no? device_add
> sends sci interrupt to a guest. How do we know that some guest will not
> get upset if it will not find promised device after getting the interrupt
> and executing Notify()?

There's no promise of a device with a device check notify.  It's defined
as both addition and removal (I imagine this is what we'd call if we did
a surprise removal).  The guest needs to go test if there's something
there (we don't provide a _STA method, so it has to go probe the
device).  Hopefully guests are robust enough to not fall over when they
discover there was nothing there and there still is nothing there.  This
is the same thing that might happen on a physical system if a power
latch is broken and the card fails to power up.

> 
> > There are two use cases I know of that make the non-ACPI device_del, or
> > really device_add cancellation, useful.  The first is what we've been
> > discussing, that not all guests will support ACPI based PCI hotplug and
> > devices can be lost to the guest until shutdown, including assigned
> I do not know how non ACPI based PCI hotplug works. I can only assume
> that they have a way to notify device that it can be removed. The patch
> series is about ACPI though. The patch series also does not solve lost
> device problem since guest can rescan the bus and claim device forever.
> You showed this in your original mail yourself.

That falls under surprise removal, which as noted before, I'm not
attempting to address here.  Once the guest has accessed the device,
using the spec defined probe method, we have to assume it owns it.

> > devices.  The second is something we more commonly run into in testing,
> > that between and 'add' & 'del' (or del & add), there's some undefined
> > delay required to prevent us stepping on ourselves.  For instance, if we
> > do an 'add' immediately followed by a 'del', we clear the 'up' register,
> > set the 'down' register and hope that the guest removes a device that it
> > never knew existed.  This code allows that to work as expected, removing
> That's QEMU problem and it should be solved in QEMU. What if we will not
> clear 'up' bit and let guest process both events add and del?

This *is* an attempt to solve it in Qemu.  Letting the guest deal with
both events seems far riskier than this approach.  If we just want to
clear bits, PCNF should write back each slot to the up/down register as
it sends the Notify.  That's hardly much of an improvement IMHO though.

> > the device even thought the guest never saw it.  In the other direction
> > (del->add), PCI won't create a device in a slot that's already occupied,
> > so we never actually get to the race there.  Overall, an improvement in
> > usability IMHO.
> > 
> > Once we define an end point for addition, we could actually take it a
> > step further and add a timeout parameter to device_add, where if the
> > guest hasn't taken the device before the timeout, we automatically
> > remove it and device_add returns error.  We might consider doing the
> > same for device_del.  Thanks,
> >
> IMO end point of addition should be sending of sci interrupt After that
> device should not be removed without guest participation. We can provide
> forced device_del that yanks device anyway for the cases when device was
> erroneously added while non-ACPI guest were running.

In effect, you'd rather give the user a loaded gun for surprise removal,
pat them on the back and move on?  Thanks,

Alex

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 21:44           ` Alex Williamson
@ 2012-03-07 22:17             ` Gleb Natapov
  2012-03-07 22:46               ` Alex Williamson
  0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-03-07 22:17 UTC (permalink / raw)
  To: Alex Williamson; +Cc: ddutile, qemu-devel, mst

On Wed, Mar 07, 2012 at 02:44:13PM -0700, Alex Williamson wrote:
> On Wed, 2012-03-07 at 23:00 +0200, Gleb Natapov wrote:
> > On Wed, Mar 07, 2012 at 12:51:48PM -0700, Alex Williamson wrote:
> > > On Wed, 2012-03-07 at 20:59 +0200, Gleb Natapov wrote:
> > > > On Wed, Mar 07, 2012 at 10:20:49AM -0700, Alex Williamson wrote:
> > > > > On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> > > > > > On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > > > > > > Here's a re-work of the patch that added _STA for the purpose of
> > > > > > > using it as an ack from the guest.  Instead of that, add a notifier
> > > > > > > for device access.  Once the guest reads from device config space,
> > > > > > > it owns it.  Until that point, we can remove it directly.  As pointed
> > > > > > > out by MST, this passes test b) below, which the _STA method would not.
> > > > > > > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > > > > > > just cleanups that can be applied independently.  Thanks,
> > > > > > > 
> > > > > > While I agree with Michael that using _STA as ack is a hack I think
> > > > > > this approach is not less of a hack. It is unlikely that this is how it
> > > > > > work on bare metal and we should follow real HW if possible.
> > > > > 
> > > > > The test below is the only thing that proved to me it was less of a
> > > > > hack.  Introducing a _LCK method for a slot may be another way to do
> > > > > this.  Unfortunately it's not required that the OSPM call _LCK and it's
> > > > > not mentioned in the msft document referenced previously.
> > > > >  
> > > > I do not understand where this requirement, that device_del should work
> > > > if non-acpi guest is running, is coming from? Because if there is no
> > > > such requirement then the hack is not needed.
> > > 
> > > Where do I file my TPS report? ;)  This is simply trying to add some
> > > definition to "when is a hot attach completed".  Once we know that, we
> > > can consider the device owned by the guest after that point.  Prior to
> > > that, we can allow the cancellation of a hot attach, by directly
> > > removing the device.  After that point, we have to ask permission from
> > > the guest or deal with surprise removal.
> > > 
> > That just heuristic based on observation of several guests, no? device_add
> > sends sci interrupt to a guest. How do we know that some guest will not
> > get upset if it will not find promised device after getting the interrupt
> > and executing Notify()?
> 
> There's no promise of a device with a device check notify.  It's defined
What do you mean?

> as both addition and removal (I imagine this is what we'd call if we did
> a surprise removal).  The guest needs to go test if there's something
Notify is called with different parameters for addition and removal.
Addition and removal look very different for a guest.

> there (we don't provide a _STA method, so it has to go probe the
We should. Just not use it to ack insertion.

> device).  Hopefully guests are robust enough to not fall over when they
> discover there was nothing there and there still is nothing there.  This
> is the same thing that might happen on a physical system if a power
> latch is broken and the card fails to power up.
> 
> > 
> > > There are two use cases I know of that make the non-ACPI device_del, or
> > > really device_add cancellation, useful.  The first is what we've been
> > > discussing, that not all guests will support ACPI based PCI hotplug and
> > > devices can be lost to the guest until shutdown, including assigned
> > I do not know how non ACPI based PCI hotplug works. I can only assume
> > that they have a way to notify device that it can be removed. The patch
> > series is about ACPI though. The patch series also does not solve lost
> > device problem since guest can rescan the bus and claim device forever.
> > You showed this in your original mail yourself.
> 
> That falls under surprise removal, which as noted before, I'm not
> attempting to address here.  Once the guest has accessed the device,
> using the spec defined probe method, we have to assume it owns it.
I do not understand what do you mean by surprise removal. I am not
talking about surprise removal here.

> 
> > > devices.  The second is something we more commonly run into in testing,
> > > that between and 'add' & 'del' (or del & add), there's some undefined
> > > delay required to prevent us stepping on ourselves.  For instance, if we
> > > do an 'add' immediately followed by a 'del', we clear the 'up' register,
> > > set the 'down' register and hope that the guest removes a device that it
> > > never knew existed.  This code allows that to work as expected, removing
> > That's QEMU problem and it should be solved in QEMU. What if we will not
> > clear 'up' bit and let guest process both events add and del?
> 
> This *is* an attempt to solve it in Qemu.  Letting the guest deal with
> both events seems far riskier than this approach.  If we just want to
> clear bits, PCNF should write back each slot to the up/down register as
> it sends the Notify.  That's hardly much of an improvement IMHO though.
> 
I already proposed dropping up/down thing and moving to how cpu hotplug
works. I also do not see why you do not like clearing bits in PCNF.

And we also should have a way to initiate device ejection from a guest.
If Windows user press eject button for a device in the GUI next
device_del should succeed without even sending sci.

> > > the device even thought the guest never saw it.  In the other direction
> > > (del->add), PCI won't create a device in a slot that's already occupied,
> > > so we never actually get to the race there.  Overall, an improvement in
> > > usability IMHO.
> > > 
> > > Once we define an end point for addition, we could actually take it a
> > > step further and add a timeout parameter to device_add, where if the
> > > guest hasn't taken the device before the timeout, we automatically
> > > remove it and device_add returns error.  We might consider doing the
> > > same for device_del.  Thanks,
> > >
> > IMO end point of addition should be sending of sci interrupt After that
> > device should not be removed without guest participation. We can provide
> > forced device_del that yanks device anyway for the cases when device was
> > erroneously added while non-ACPI guest were running.
> 
> In effect, you'd rather give the user a loaded gun for surprise removal,
> pat them on the back and move on?  Thanks,
> 
I'd rather have things working like real HW does. And on real HW you
have this option if nothing else works. And on VM you want this option
anyway since guest may not cooperate but management is determined to get
its assigned device back, so it either has to kill the guest or force
device removal. We have enough ammo in teh monitor to harm a guest already.
This is not user interface this is management interface.

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 22:17             ` Gleb Natapov
@ 2012-03-07 22:46               ` Alex Williamson
  2012-03-08 12:39                 ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Alex Williamson @ 2012-03-07 22:46 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: ddutile, qemu-devel, mst

On Thu, 2012-03-08 at 00:17 +0200, Gleb Natapov wrote:
> On Wed, Mar 07, 2012 at 02:44:13PM -0700, Alex Williamson wrote:
> > On Wed, 2012-03-07 at 23:00 +0200, Gleb Natapov wrote:
> > > On Wed, Mar 07, 2012 at 12:51:48PM -0700, Alex Williamson wrote:
> > > > On Wed, 2012-03-07 at 20:59 +0200, Gleb Natapov wrote:
> > > > > On Wed, Mar 07, 2012 at 10:20:49AM -0700, Alex Williamson wrote:
> > > > > > On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> > > > > > > On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > > > > > > > Here's a re-work of the patch that added _STA for the purpose of
> > > > > > > > using it as an ack from the guest.  Instead of that, add a notifier
> > > > > > > > for device access.  Once the guest reads from device config space,
> > > > > > > > it owns it.  Until that point, we can remove it directly.  As pointed
> > > > > > > > out by MST, this passes test b) below, which the _STA method would not.
> > > > > > > > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > > > > > > > just cleanups that can be applied independently.  Thanks,
> > > > > > > > 
> > > > > > > While I agree with Michael that using _STA as ack is a hack I think
> > > > > > > this approach is not less of a hack. It is unlikely that this is how it
> > > > > > > work on bare metal and we should follow real HW if possible.
> > > > > > 
> > > > > > The test below is the only thing that proved to me it was less of a
> > > > > > hack.  Introducing a _LCK method for a slot may be another way to do
> > > > > > this.  Unfortunately it's not required that the OSPM call _LCK and it's
> > > > > > not mentioned in the msft document referenced previously.
> > > > > >  
> > > > > I do not understand where this requirement, that device_del should work
> > > > > if non-acpi guest is running, is coming from? Because if there is no
> > > > > such requirement then the hack is not needed.
> > > > 
> > > > Where do I file my TPS report? ;)  This is simply trying to add some
> > > > definition to "when is a hot attach completed".  Once we know that, we
> > > > can consider the device owned by the guest after that point.  Prior to
> > > > that, we can allow the cancellation of a hot attach, by directly
> > > > removing the device.  After that point, we have to ask permission from
> > > > the guest or deal with surprise removal.
> > > > 
> > > That just heuristic based on observation of several guests, no? device_add
> > > sends sci interrupt to a guest. How do we know that some guest will not
> > > get upset if it will not find promised device after getting the interrupt
> > > and executing Notify()?
> > 
> > There's no promise of a device with a device check notify.  It's defined
> What do you mean?

I read it more as "something happened, go find out what".

> > as both addition and removal (I imagine this is what we'd call if we did
> > a surprise removal).  The guest needs to go test if there's something
> Notify is called with different parameters for addition and removal.
> Addition and removal look very different for a guest.

Right, device check(1) on addition vs eject request(3) on removal.
However, device check can also mean something went away, which is why I
describe it that way above.

> > there (we don't provide a _STA method, so it has to go probe the
> We should. Just not use it to ack insertion.

Both Linux and Windows seem happy enough to probe for the device, so I'm
not sure it buys us anything but completeness.

> > device).  Hopefully guests are robust enough to not fall over when they
> > discover there was nothing there and there still is nothing there.  This
> > is the same thing that might happen on a physical system if a power
> > latch is broken and the card fails to power up.
> > 
> > > 
> > > > There are two use cases I know of that make the non-ACPI device_del, or
> > > > really device_add cancellation, useful.  The first is what we've been
> > > > discussing, that not all guests will support ACPI based PCI hotplug and
> > > > devices can be lost to the guest until shutdown, including assigned
> > > I do not know how non ACPI based PCI hotplug works. I can only assume
> > > that they have a way to notify device that it can be removed. The patch
> > > series is about ACPI though. The patch series also does not solve lost
> > > device problem since guest can rescan the bus and claim device forever.
> > > You showed this in your original mail yourself.
> > 
> > That falls under surprise removal, which as noted before, I'm not
> > attempting to address here.  Once the guest has accessed the device,
> > using the spec defined probe method, we have to assume it owns it.
> I do not understand what do you mean by surprise removal. I am not
> talking about surprise removal here.

Any removal of the device without the guest agreeing to it is surprise
removal:

        The patch series also does not solve lost device problem since
        guest can rescan the bus and claim device forever.  You showed
        this in your original mail yourself.

If the guest rescans the bus and touches the device, it owns it.  Taking
it back at that point would be a surprise removal.

> > > > devices.  The second is something we more commonly run into in testing,
> > > > that between and 'add' & 'del' (or del & add), there's some undefined
> > > > delay required to prevent us stepping on ourselves.  For instance, if we
> > > > do an 'add' immediately followed by a 'del', we clear the 'up' register,
> > > > set the 'down' register and hope that the guest removes a device that it
> > > > never knew existed.  This code allows that to work as expected, removing
> > > That's QEMU problem and it should be solved in QEMU. What if we will not
> > > clear 'up' bit and let guest process both events add and del?
> > 
> > This *is* an attempt to solve it in Qemu.  Letting the guest deal with
> > both events seems far riskier than this approach.  If we just want to
> > clear bits, PCNF should write back each slot to the up/down register as
> > it sends the Notify.  That's hardly much of an improvement IMHO though.
> > 
> I already proposed dropping up/down thing and moving to how cpu hotplug
> works. I also do not see why you do not like clearing bits in PCNF.

I'm not opposed to it, I just don't know how much value it adds.  All
that tells us is that a Notify was sent, not whether there was any
response to it.  Even a non-hotplug capable ACPI guest will do this.  I
haven't looked at cpu hotplug, but this series maintains compatibility
with existing bios code.

> And we also should have a way to initiate device ejection from a guest.
> If Windows user press eject button for a device in the GUI next
> device_del should succeed without even sending sci.

We have this AFAIK.  Isn't the guest able to call _EJ0 independent of us
signaling it to do so?

> > > > the device even thought the guest never saw it.  In the other direction
> > > > (del->add), PCI won't create a device in a slot that's already occupied,
> > > > so we never actually get to the race there.  Overall, an improvement in
> > > > usability IMHO.
> > > > 
> > > > Once we define an end point for addition, we could actually take it a
> > > > step further and add a timeout parameter to device_add, where if the
> > > > guest hasn't taken the device before the timeout, we automatically
> > > > remove it and device_add returns error.  We might consider doing the
> > > > same for device_del.  Thanks,
> > > >
> > > IMO end point of addition should be sending of sci interrupt After that
> > > device should not be removed without guest participation. We can provide
> > > forced device_del that yanks device anyway for the cases when device was
> > > erroneously added while non-ACPI guest were running.
> > 
> > In effect, you'd rather give the user a loaded gun for surprise removal,
> > pat them on the back and move on?  Thanks,
> > 
> I'd rather have things working like real HW does. And on real HW you
> have this option if nothing else works. And on VM you want this option
> anyway since guest may not cooperate but management is determined to get
> its assigned device back, so it either has to kill the guest or force
> device removal. We have enough ammo in teh monitor to harm a guest already.
> This is not user interface this is management interface.

Show me a sysadmin that would rip a running card out of a system rather
than shut it down to remove it.  Management tools already have that
option.  This series allows a "safe" bypass for the case when the guest
hasn't touched the device.  I don't know why we'd tell a user to use a
"force remove" option when they don't have the visibility to determine
whether the device is unused that we have in qemu.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] PCI hotplug improvements
  2012-03-07 22:46               ` Alex Williamson
@ 2012-03-08 12:39                 ` Gleb Natapov
  0 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2012-03-08 12:39 UTC (permalink / raw)
  To: Alex Williamson; +Cc: ddutile, qemu-devel, mst

On Wed, Mar 07, 2012 at 03:46:40PM -0700, Alex Williamson wrote:
> On Thu, 2012-03-08 at 00:17 +0200, Gleb Natapov wrote:
> > On Wed, Mar 07, 2012 at 02:44:13PM -0700, Alex Williamson wrote:
> > > On Wed, 2012-03-07 at 23:00 +0200, Gleb Natapov wrote:
> > > > On Wed, Mar 07, 2012 at 12:51:48PM -0700, Alex Williamson wrote:
> > > > > On Wed, 2012-03-07 at 20:59 +0200, Gleb Natapov wrote:
> > > > > > On Wed, Mar 07, 2012 at 10:20:49AM -0700, Alex Williamson wrote:
> > > > > > > On Wed, 2012-03-07 at 14:43 +0200, Gleb Natapov wrote:
> > > > > > > > On Tue, Mar 06, 2012 at 05:13:36PM -0700, Alex Williamson wrote:
> > > > > > > > > Here's a re-work of the patch that added _STA for the purpose of
> > > > > > > > > using it as an ack from the guest.  Instead of that, add a notifier
> > > > > > > > > for device access.  Once the guest reads from device config space,
> > > > > > > > > it owns it.  Until that point, we can remove it directly.  As pointed
> > > > > > > > > out by MST, this passes test b) below, which the _STA method would not.
> > > > > > > > > As a bonus, no bios change is required for this.  Patches 5 & 6 are
> > > > > > > > > just cleanups that can be applied independently.  Thanks,
> > > > > > > > > 
> > > > > > > > While I agree with Michael that using _STA as ack is a hack I think
> > > > > > > > this approach is not less of a hack. It is unlikely that this is how it
> > > > > > > > work on bare metal and we should follow real HW if possible.
> > > > > > > 
> > > > > > > The test below is the only thing that proved to me it was less of a
> > > > > > > hack.  Introducing a _LCK method for a slot may be another way to do
> > > > > > > this.  Unfortunately it's not required that the OSPM call _LCK and it's
> > > > > > > not mentioned in the msft document referenced previously.
> > > > > > >  
> > > > > > I do not understand where this requirement, that device_del should work
> > > > > > if non-acpi guest is running, is coming from? Because if there is no
> > > > > > such requirement then the hack is not needed.
> > > > > 
> > > > > Where do I file my TPS report? ;)  This is simply trying to add some
> > > > > definition to "when is a hot attach completed".  Once we know that, we
> > > > > can consider the device owned by the guest after that point.  Prior to
> > > > > that, we can allow the cancellation of a hot attach, by directly
> > > > > removing the device.  After that point, we have to ask permission from
> > > > > the guest or deal with surprise removal.
> > > > > 
> > > > That just heuristic based on observation of several guests, no? device_add
> > > > sends sci interrupt to a guest. How do we know that some guest will not
> > > > get upset if it will not find promised device after getting the interrupt
> > > > and executing Notify()?
> > > 
> > > There's no promise of a device with a device check notify.  It's defined
> > What do you mean?
> 
> I read it more as "something happened, go find out what".
>
According to spec Notify(1) is used to notify OSPM that the device either
appeared or disappeared. Since OSPM knows current state of the device it
knows what to expect from the notification. Spec even says "OSPM may
optimize out reenumeration" which it can do based on that knowledge.

Notify(0) (Bus Check) looks more like "something happened, go find out
what".

> > > as both addition and removal (I imagine this is what we'd call if we did
> > > a surprise removal).  The guest needs to go test if there's something
> > Notify is called with different parameters for addition and removal.
> > Addition and removal look very different for a guest.
> 
> Right, device check(1) on addition vs eject request(3) on removal.
> However, device check can also mean something went away, which is why I
> describe it that way above.
> 
See above.

> > > there (we don't provide a _STA method, so it has to go probe the
> > We should. Just not use it to ack insertion.
> 
> Both Linux and Windows seem happy enough to probe for the device, so I'm
> not sure it buys us anything but completeness.
> 
Spec says "If a device object (including the processor object) does not
have an _STA object, then OSPM assumes that the device is present, enabled,
shown in the UI, and functioning." It would be interesting to see DSDT
from machine that support ACPI pci hotplug.

> > > device).  Hopefully guests are robust enough to not fall over when they
> > > discover there was nothing there and there still is nothing there.  This
> > > is the same thing that might happen on a physical system if a power
> > > latch is broken and the card fails to power up.
> > > 
> > > > 
> > > > > There are two use cases I know of that make the non-ACPI device_del, or
> > > > > really device_add cancellation, useful.  The first is what we've been
> > > > > discussing, that not all guests will support ACPI based PCI hotplug and
> > > > > devices can be lost to the guest until shutdown, including assigned
> > > > I do not know how non ACPI based PCI hotplug works. I can only assume
> > > > that they have a way to notify device that it can be removed. The patch
> > > > series is about ACPI though. The patch series also does not solve lost
> > > > device problem since guest can rescan the bus and claim device forever.
> > > > You showed this in your original mail yourself.
> > > 
> > > That falls under surprise removal, which as noted before, I'm not
> > > attempting to address here.  Once the guest has accessed the device,
> > > using the spec defined probe method, we have to assume it owns it.
> > I do not understand what do you mean by surprise removal. I am not
> > talking about surprise removal here.
> 
> Any removal of the device without the guest agreeing to it is surprise
> removal:
> 
>         The patch series also does not solve lost device problem since
>         guest can rescan the bus and claim device forever.  You showed
>         this in your original mail yourself.
> 
> If the guest rescans the bus and touches the device, it owns it.  Taking
> it back at that point would be a surprise removal.
It may touch it but not load a driver for it and not configure it in any
way (not allocating MMIO and PIO for it). It will still be OK to remove it.
You just use a heuristic here. I'll hate to debug bug report like "why
can't I hot unplug my device when 'info pci' says it is unconfigured".
The right user expectation should be that device can be unpluged only
with ACPI capable guest.

> 
> > > > > devices.  The second is something we more commonly run into in testing,
> > > > > that between and 'add' & 'del' (or del & add), there's some undefined
> > > > > delay required to prevent us stepping on ourselves.  For instance, if we
> > > > > do an 'add' immediately followed by a 'del', we clear the 'up' register,
> > > > > set the 'down' register and hope that the guest removes a device that it
> > > > > never knew existed.  This code allows that to work as expected, removing
> > > > That's QEMU problem and it should be solved in QEMU. What if we will not
> > > > clear 'up' bit and let guest process both events add and del?
> > > 
> > > This *is* an attempt to solve it in Qemu.  Letting the guest deal with
> > > both events seems far riskier than this approach.  If we just want to
> > > clear bits, PCNF should write back each slot to the up/down register as
> > > it sends the Notify.  That's hardly much of an improvement IMHO though.
> > > 
> > I already proposed dropping up/down thing and moving to how cpu hotplug
> > works. I also do not see why you do not like clearing bits in PCNF.
> 
> I'm not opposed to it, I just don't know how much value it adds.  All
> that tells us is that a Notify was sent, not whether there was any
> response to it.  Even a non-hotplug capable ACPI guest will do this.  I
> haven't looked at cpu hotplug, but this series maintains compatibility
> with existing bios code.
If Notify was evaluated we are clearly dealing with ACPI capable guest.
If it does not work correctly this is not our problem.

> 
> > And we also should have a way to initiate device ejection from a guest.
> > If Windows user press eject button for a device in the GUI next
> > device_del should succeed without even sending sci.
> 
> We have this AFAIK.  Isn't the guest able to call _EJ0 independent of us
> signaling it to do so?
> 
That is exactly what guest is doing. If it works this is great.

> > > > > the device even thought the guest never saw it.  In the other direction
> > > > > (del->add), PCI won't create a device in a slot that's already occupied,
> > > > > so we never actually get to the race there.  Overall, an improvement in
> > > > > usability IMHO.
> > > > > 
> > > > > Once we define an end point for addition, we could actually take it a
> > > > > step further and add a timeout parameter to device_add, where if the
> > > > > guest hasn't taken the device before the timeout, we automatically
> > > > > remove it and device_add returns error.  We might consider doing the
> > > > > same for device_del.  Thanks,
> > > > >
> > > > IMO end point of addition should be sending of sci interrupt After that
> > > > device should not be removed without guest participation. We can provide
> > > > forced device_del that yanks device anyway for the cases when device was
> > > > erroneously added while non-ACPI guest were running.
> > > 
> > > In effect, you'd rather give the user a loaded gun for surprise removal,
> > > pat them on the back and move on?  Thanks,
> > > 
> > I'd rather have things working like real HW does. And on real HW you
> > have this option if nothing else works. And on VM you want this option
> > anyway since guest may not cooperate but management is determined to get
> > its assigned device back, so it either has to kill the guest or force
> > device removal. We have enough ammo in teh monitor to harm a guest already.
> > This is not user interface this is management interface.
> 
> Show me a sysadmin that would rip a running card out of a system rather
> than shut it down to remove it.  Management tools already have that
If a system has chassis that allows pci hot plug and sysadmin plugged in
a card while DOS was running I do not see why she will be paranoid
enough to not unplugged it without powering down the system. Unlikely
scenario, I agree. But, for some reason, you are trying to add heuristics
to handle exactly that kind of scenarios in QEMU.

> option.  This series allows a "safe" bypass for the case when the guest
> hasn't touched the device.  I don't know why we'd tell a user to use a
> "force remove" option when they don't have the visibility to determine
> whether the device is unused that we have in qemu.  Thanks,
> 
Actually user that has access to the monitor has a visibility to
determine whether the device is unused. "info pci" should tell him it
right away. But I am not insisting on "forced remove" either. Rebooting
non ACPI capable OS to do unplug is very reasonable requirement.

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 4/6] acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path
  2012-03-07  0:14 ` [Qemu-devel] [PATCH 4/6] acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path Alex Williamson
@ 2012-03-11 21:57   ` Michael S. Tsirkin
  0 siblings, 0 replies; 19+ messages in thread
From: Michael S. Tsirkin @ 2012-03-11 21:57 UTC (permalink / raw)
  To: Alex Williamson; +Cc: ddutile, qemu-devel, gleb

On Tue, Mar 06, 2012 at 05:14:51PM -0700, Alex Williamson wrote:
> When a guest probes a device, clear the "up" bit in the hotplug
> register.  This allows us to enable a non-ACPI remove path for
> devices added, but never accessed by the guest.  This is useful
> when a guest does not have ACPI PCI hotplug support to avoid losing
> devices to a guest.  We also now individually track bits for "up"
> and "down" rather than clearing both on each PCI hotplug action.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

There are two features here:
1. Fixing up/down handling

2. non ACPI removal

I think 1 is done correctly here. But 2.
seems something completely unrelated to acpi.
How about tracking access in pci core?

> ---
> 
>  hw/acpi_piix4.c |   58 ++++++++++++++++++++++++++++++++++++++++++++-----------
>  1 files changed, 46 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
> index 4d88e23..7e766e5 100644
> --- a/hw/acpi_piix4.c
> +++ b/hw/acpi_piix4.c
> @@ -27,6 +27,7 @@
>  #include "sysemu.h"
>  #include "range.h"
>  #include "ioport.h"
> +#include "pci_host.h"
>  
>  //#define DEBUG
>  
> @@ -75,6 +76,7 @@ typedef struct PIIX4PMState {
>      qemu_irq smi_irq;
>      int kvm_enabled;
>      Notifier machine_ready;
> +    Notifier device_probe;
>  
>      /* for pci hotplug */
>      ACPIGPE gpe;
> @@ -336,6 +338,16 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque)
>  
>  }
>  
> +static void piix4_pm_device_probe(Notifier *n, void *opaque)
> +{
> +    PIIX4PMState *s = container_of(n, PIIX4PMState, device_probe);
> +    PCIDevice *pdev = opaque;
> +
> +    if (pci_find_domain(pdev->bus) == 0 && pci_bus_num(pdev->bus) == 0) {
> +        s->pci0_status.up &= ~(1U << PCI_SLOT(pdev->devfn));
> +    }

Seems ugly.  How about we register notifiers per bus?

> +}
> +
>  static PIIX4PMState *global_piix4_pm_state; /* cpu hotadd */
>  
>  static int piix4_pm_initfn(PCIDevice *dev)
> @@ -383,6 +395,8 @@ static int piix4_pm_initfn(PCIDevice *dev)
>      qemu_add_machine_init_done_notifier(&s->machine_ready);
>      qemu_register_reset(piix4_reset, s);
>      piix4_acpi_system_hot_add_init(dev->bus, s);
> +    s->device_probe.notify = piix4_pm_device_probe;
> +    pci_host_add_dev_probe_notifier(&s->device_probe);
>  
>      return 0;
>  }
> @@ -502,6 +516,7 @@ static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
>          PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
>          if (PCI_SLOT(dev->devfn) == slot && !pc->no_hotplug) {
>              qdev_free(qdev);
> +            s->pci0_status.down &= ~(1U << slot);
>          }
>      }
>  
> @@ -594,16 +609,41 @@ void qemu_system_cpu_hot_add(int cpu, int state)
>  }
>  #endif
>  
> -static void enable_device(PIIX4PMState *s, int slot)
> +static int enable_device(PIIX4PMState *s, int slot)
>  {
> +    uint32_t mask = 1U << slot;
> +
> +    if ((s->pci0_status.up | s->pci0_status.down) & mask) {
> +        return -1;
> +    }
> +
>      s->gpe.sts[0] |= PIIX4_PCI_HOTPLUG_STATUS;
> -    s->pci0_status.up |= (1 << slot);
> +    s->pci0_status.up |= mask;
> +
> +    pm_update_sci(s);
> +    return 0;
>  }
>  
> -static void disable_device(PIIX4PMState *s, int slot)
> +static int disable_device(PIIX4PMState *s, int slot)
>  {
> +    uint32_t mask = 1U << slot;
> +
> +    if (s->pci0_status.up & mask) {
> +        s->pci0_status.up &= ~mask;
> +        pciej_write(s, PCI_EJ_BASE, mask);
> +
> +        /* Clear GPE PCI hotplug status if nothing left pending */
> +        if (!(s->pci0_status.up | s->pci0_status.down)) {
> +            s->gpe.sts[0] &= ~PIIX4_PCI_HOTPLUG_STATUS;
> +        }
> +        return 0;
> +    }
> +
>      s->gpe.sts[0] |= PIIX4_PCI_HOTPLUG_STATUS;
> -    s->pci0_status.down |= (1 << slot);
> +    s->pci0_status.down |= mask;
> +
> +    pm_update_sci(s);
> +    return 0;
>  }
>  
>  static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
> @@ -620,15 +660,9 @@ static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
>          return 0;
>      }
>  
> -    s->pci0_status.up = 0;
> -    s->pci0_status.down = 0;
>      if (state == PCI_HOTPLUG_ENABLED) {
> -        enable_device(s, slot);
> +        return enable_device(s, slot);
>      } else {
> -        disable_device(s, slot);
> +        return disable_device(s, slot);
>      }
> -
> -    pm_update_sci(s);
> -
> -    return 0;
>  }

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-03-11 21:57 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-07  0:13 [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Alex Williamson
2012-03-07  0:13 ` [Qemu-devel] [PATCH 1/6] acpi_piix4: Disallow write to up/down PCI hotplug registers Alex Williamson
2012-03-07  0:14 ` [Qemu-devel] [PATCH 2/6] acpi_piix4: Only allow writes to PCI hotplug eject register Alex Williamson
2012-03-07  0:14 ` [Qemu-devel] [PATCH 3/6] pci: Add notifier for device probing Alex Williamson
2012-03-07  9:19   ` Paolo Bonzini
2012-03-07 20:12     ` Alex Williamson
2012-03-07  0:14 ` [Qemu-devel] [PATCH 4/6] acpi_piix4: Track PCI hotplug status and allow non-ACPI remove path Alex Williamson
2012-03-11 21:57   ` Michael S. Tsirkin
2012-03-07  0:15 ` [Qemu-devel] [PATCH 5/6] acpi_piix4: Use pci_get/set_byte Alex Williamson
2012-03-07  0:15 ` [Qemu-devel] [PATCH 6/6] api_piix4: Remove PCI_RMV_BASE write code Alex Williamson
2012-03-07 12:43 ` [Qemu-devel] [PATCH 0/6] PCI hotplug improvements Gleb Natapov
2012-03-07 17:20   ` Alex Williamson
2012-03-07 18:59     ` Gleb Natapov
2012-03-07 19:51       ` Alex Williamson
2012-03-07 21:00         ` Gleb Natapov
2012-03-07 21:44           ` Alex Williamson
2012-03-07 22:17             ` Gleb Natapov
2012-03-07 22:46               ` Alex Williamson
2012-03-08 12:39                 ` Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).