* [PATCHv3 01/13] qemu: make default_write_config use mask table
[not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:22 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 02/13] qemu: capability bits in pci save/restore Michael S. Tsirkin
` (11 subsequent siblings)
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:22 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.
This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.
As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci.c | 145 ++++++++++++-------------------------------------------------
hw/pci.h | 18 +++++++-
2 files changed, 46 insertions(+), 117 deletions(-)
diff --git a/hw/pci.c b/hw/pci.c
index 0ab5b94..235191d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -239,6 +239,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
return pci_parse_devaddr(devaddr, domp, busp, slotp);
}
+static void pci_init_mask(PCIDevice *dev)
+{
+ int i;
+ dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
+ dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
+ dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+ | PCI_COMMAND_MASTER;
+ for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+ dev->wmask[i] = 0xff;
+}
+
/* -1 for devfn means auto assign */
static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
const char *name, int devfn,
@@ -261,6 +272,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
pci_set_default_subsystem_id(pci_dev);
+ pci_init_mask(pci_dev);
if (!config_read)
config_read = pci_default_read_config;
@@ -334,6 +346,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
{
PCIIORegion *r;
uint32_t addr;
+ uint32_t wmask;
if ((unsigned int)region_num >= PCI_NUM_REGIONS)
return;
@@ -349,12 +362,17 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
r->size = size;
r->type = type;
r->map_func = map_func;
+
+ wmask = ~(size - 1);
if (region_num == PCI_ROM_SLOT) {
addr = 0x30;
+ /* ROM enable bit is writeable */
+ wmask |= 1;
} else {
addr = 0x10 + region_num * 4;
}
*(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+ *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
}
static void pci_update_mappings(PCIDevice *d)
@@ -463,118 +481,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
return val;
}
-void pci_default_write_config(PCIDevice *d,
- uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
{
- int can_write, i;
- uint32_t end, addr;
-
- if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
- (address >= 0x30 && address < 0x34))) {
- PCIIORegion *r;
- int reg;
+ uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+ int i;
- if ( address >= 0x30 ) {
- reg = PCI_ROM_SLOT;
- }else{
- reg = (address - 0x10) >> 2;
- }
- r = &d->io_regions[reg];
- if (r->size == 0)
- goto default_config;
- /* compute the stored value */
- if (reg == PCI_ROM_SLOT) {
- /* keep ROM enable bit */
- val &= (~(r->size - 1)) | 1;
- } else {
- val &= ~(r->size - 1);
- val |= r->type;
- }
- *(uint32_t *)(d->config + address) = cpu_to_le32(val);
- pci_update_mappings(d);
- return;
- }
- default_config:
/* not efficient, but simple */
- addr = address;
- for(i = 0; i < len; i++) {
- /* default read/write accesses */
- switch(d->config[0x0e]) {
- case 0x00:
- case 0x80:
- switch(addr) {
- case 0x00:
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x06:
- case 0x07:
- case 0x08:
- case 0x09:
- case 0x0a:
- case 0x0b:
- case 0x0e:
- case 0x10 ... 0x27: /* base */
- case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
- case 0x30 ... 0x33: /* rom */
- case 0x3d:
- can_write = 0;
- break;
- default:
- can_write = 1;
- break;
- }
- break;
- default:
- case 0x01:
- switch(addr) {
- case 0x00:
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x06:
- case 0x07:
- case 0x08:
- case 0x09:
- case 0x0a:
- case 0x0b:
- case 0x0e:
- case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
- case 0x38 ... 0x3b: /* rom */
- case 0x3d:
- can_write = 0;
- break;
- default:
- can_write = 1;
- break;
- }
- break;
- }
- if (can_write) {
- /* Mask out writes to reserved bits in registers */
- switch (addr) {
- case 0x05:
- val &= ~PCI_COMMAND_RESERVED_MASK_HI;
- break;
- case 0x06:
- val &= ~PCI_STATUS_RESERVED_MASK_LO;
- break;
- case 0x07:
- val &= ~PCI_STATUS_RESERVED_MASK_HI;
- break;
- }
- d->config[addr] = val;
- }
- if (++addr > 0xff)
- break;
- val >>= 8;
+ memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+ for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+ uint8_t wmask = d->wmask[addr];
+ d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
}
-
- end = address + len;
- if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
- /* if the command register is modified, we must modify the mappings */
+ if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+ || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+ & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
pci_update_mappings(d);
- }
}
void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -847,16 +768,8 @@ static void pci_bridge_write_config(PCIDevice *d,
{
PCIBridge *s = (PCIBridge *)d;
- if (address == 0x19 || (address == 0x18 && len > 1)) {
- if (address == 0x19)
- s->bus->bus_num = val & 0xff;
- else
- s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
- printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
- }
pci_default_write_config(d, address, val, len);
+ s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
}
PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index 0405837..d0db402 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -100,16 +100,24 @@ typedef struct PCIIORegion {
#define PCI_COMMAND 0x04 /* 16 bits */
#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */
#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */
+#define PCI_COMMAND_MASTER 0x4 /* Enable bus master */
#define PCI_STATUS 0x06 /* 16 bits */
#define PCI_REVISION_ID 0x08 /* 8 bits */
#define PCI_CLASS_DEVICE 0x0a /* Device class */
+#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */
+#define PCI_LATENCY_TIMER 0x0d /* 8 bits */
#define PCI_HEADER_TYPE 0x0e /* 8 bits */
#define PCI_HEADER_TYPE_NORMAL 0
#define PCI_HEADER_TYPE_BRIDGE 1
#define PCI_HEADER_TYPE_CARDBUS 2
#define PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */
+#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */
+#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */
+#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */
#define PCI_SUBSYSTEM_VENDOR_ID 0x2c /* 16 bits */
#define PCI_SUBSYSTEM_ID 0x2e /* 16 bits */
+#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */
#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */
#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */
#define PCI_MIN_GNT 0x3e /* 8 bits */
@@ -139,10 +147,18 @@ typedef struct PCIIORegion {
#define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
struct PCIDevice {
DeviceState qdev;
/* PCI config space */
- uint8_t config[256];
+ uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+ /* Used to implement R/W bytes */
+ uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
/* the following fields are read only */
PCIBus *bus;
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 02/13] qemu: capability bits in pci save/restore
[not found] <cover.1244192535.git.mst@redhat.com>
2009-06-05 10:22 ` [PATCHv3 01/13] qemu: make default_write_config use mask table Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 03/13] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
` (10 subsequent siblings)
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Add support for capability bits in save/restore for pci.
These will be used for MSI, where the capability might
be present or not as requested by user, which does not
map well into a single version number.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci.c | 14 ++++++++++++--
hw/pci.h | 4 ++++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/hw/pci.c b/hw/pci.c
index 235191d..361d741 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -127,12 +127,15 @@ int pci_bus_num(PCIBus *s)
void pci_device_save(PCIDevice *s, QEMUFile *f)
{
+ int version = s->cap_present ? 3 : 2;
int i;
- qemu_put_be32(f, 2); /* PCI device version */
+ qemu_put_be32(f, version); /* PCI device version */
qemu_put_buffer(f, s->config, 256);
for (i = 0; i < 4; i++)
qemu_put_be32(f, s->irq_state[i]);
+ if (version >= 3)
+ qemu_put_be32(f, s->cap_present);
}
int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -141,7 +144,7 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
int i;
version_id = qemu_get_be32(f);
- if (version_id > 2)
+ if (version_id > 3)
return -EINVAL;
qemu_get_buffer(f, s->config, 256);
pci_update_mappings(s);
@@ -149,6 +152,13 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
if (version_id >= 2)
for (i = 0; i < 4; i ++)
s->irq_state[i] = qemu_get_be32(f);
+ if (version_id >= 3)
+ s->cap_present = qemu_get_be32(f);
+ else
+ s->cap_present = 0;
+
+ if (s->cap_present & ~s->cap_supported)
+ return -EINVAL;
return 0;
}
diff --git a/hw/pci.h b/hw/pci.h
index d0db402..6f0803f 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -178,6 +178,10 @@ struct PCIDevice {
/* Current IRQ levels. Used internally by the generic PCI code. */
int irq_state[4];
+
+ /* Capability bits for save/load */
+ uint32_t cap_supported;
+ uint32_t cap_present;
};
PCIDevice *pci_register_device(PCIBus *bus, const char *name,
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
[not found] <cover.1244192535.git.mst@redhat.com>
2009-06-05 10:22 ` [PATCHv3 01/13] qemu: make default_write_config use mask table Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 02/13] qemu: capability bits in pci save/restore Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-09 17:11 ` [Qemu-devel] " Glauber Costa
2009-06-05 10:23 ` [PATCHv3 04/13] qemu: helper routines for pci access Michael S. Tsirkin
` (9 subsequent siblings)
12 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Add routines to manage PCI capability list. First user will be MSI-X.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
hw/pci.h | 18 +++++++++++-
2 files changed, 106 insertions(+), 10 deletions(-)
diff --git a/hw/pci.c b/hw/pci.c
index 361d741..ed011b5 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
int version = s->cap_present ? 3 : 2;
int i;
- qemu_put_be32(f, version); /* PCI device version */
+ /* PCI device version and capabilities */
+ qemu_put_be32(f, version);
+ if (version >= 3)
+ qemu_put_be32(f, s->cap_present);
qemu_put_buffer(f, s->config, 256);
for (i = 0; i < 4; i++)
qemu_put_be32(f, s->irq_state[i]);
- if (version >= 3)
- qemu_put_be32(f, s->cap_present);
}
int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
version_id = qemu_get_be32(f);
if (version_id > 3)
return -EINVAL;
- qemu_get_buffer(f, s->config, 256);
- pci_update_mappings(s);
-
- if (version_id >= 2)
- for (i = 0; i < 4; i ++)
- s->irq_state[i] = qemu_get_be32(f);
if (version_id >= 3)
s->cap_present = qemu_get_be32(f);
else
@@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
if (s->cap_present & ~s->cap_supported)
return -EINVAL;
+ qemu_get_buffer(f, s->config, 256);
+ pci_update_mappings(s);
+
+ if (version_id >= 2)
+ for (i = 0; i < 4; i ++)
+ s->irq_state[i] = qemu_get_be32(f);
+ /* Clear wmask and used bits for capabilities.
+ Must be restored separately, since capabilities can
+ be placed anywhere in config space. */
+ memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
+ for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+ s->wmask[i] = 0xff;
return 0;
}
@@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
return (PCIDevice *)dev;
}
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+ int offset = PCI_CONFIG_HEADER_SIZE;
+ int i;
+ for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+ if (pdev->used[i])
+ offset = i + 1;
+ else if (i - offset + 1 == size)
+ return offset;
+ return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+ uint8_t *prev_p)
+{
+ uint8_t next, prev;
+
+ if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+ return 0;
+
+ for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+ prev = next + PCI_CAP_LIST_NEXT)
+ if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
+ break;
+
+ *prev_p = prev;
+ return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+ uint8_t offset = pci_find_space(pdev, size);
+ uint8_t *config = pdev->config + offset;
+ if (!offset)
+ return -ENOSPC;
+ config[PCI_CAP_LIST_ID] = cap_id;
+ config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+ pdev->config[PCI_CAPABILITY_LIST] = offset;
+ pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+ memset(pdev->used + offset, 0xFF, size);
+ /* Make capability read-only by default */
+ memset(pdev->wmask + offset, 0, size);
+ return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+ uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+ if (!offset)
+ return;
+ pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+ /* Make capability writeable again */
+ memset(pdev->wmask + offset, 0xff, size);
+ memset(pdev->used + offset, 0, size);
+
+ if (!pdev->config[PCI_CAPABILITY_LIST])
+ pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+ memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+ uint8_t prev;
+ return pci_find_capability_list(pdev, cap_id, &prev);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 6f0803f..4838c59 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -123,6 +123,10 @@ typedef struct PCIIORegion {
#define PCI_MIN_GNT 0x3e /* 8 bits */
#define PCI_MAX_LAT 0x3f /* 8 bits */
+/* Capability lists */
+#define PCI_CAP_LIST_ID 0 /* Capability ID */
+#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
+
#define PCI_REVISION 0x08 /* obsolete, use PCI_REVISION_ID */
#define PCI_SUBVENDOR_ID 0x2c /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
#define PCI_SUBDEVICE_ID 0x2e /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -130,7 +134,7 @@ typedef struct PCIIORegion {
/* Bits in the PCI Status Register (PCI 2.3 spec) */
#define PCI_STATUS_RESERVED1 0x007
#define PCI_STATUS_INT_STATUS 0x008
-#define PCI_STATUS_CAPABILITIES 0x010
+#define PCI_STATUS_CAP_LIST 0x010
#define PCI_STATUS_66MHZ 0x020
#define PCI_STATUS_RESERVED2 0x040
#define PCI_STATUS_FAST_BACK 0x080
@@ -160,6 +164,9 @@ struct PCIDevice {
/* Used to implement R/W bytes */
uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
+ /* Used to allocate config space for capabilities. */
+ uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
/* the following fields are read only */
PCIBus *bus;
int devfn;
@@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
uint32_t size, int type,
PCIMapIORegionFunc *map_func);
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
uint32_t pci_default_read_config(PCIDevice *d,
uint32_t address, int len);
void pci_default_write_config(PCIDevice *d,
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 04/13] qemu: helper routines for pci access.
[not found] <cover.1244192535.git.mst@redhat.com>
` (2 preceding siblings ...)
2009-06-05 10:23 ` [PATCHv3 03/13] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 05/13] qemu: MSI-X support functions Michael S. Tsirkin
` (8 subsequent siblings)
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci.h | 30 +++++++++++++++++++++++++++---
1 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/hw/pci.h b/hw/pci.h
index 4838c59..477aa64 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -240,21 +240,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
pci_map_irq_fn map_irq, const char *name);
static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+ cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+ return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+ cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+ return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
{
- cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+ pci_set_word(&pci_config[PCI_VENDOR_ID], val);
}
static inline void
pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
{
- cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+ pci_set_word(&pci_config[PCI_DEVICE_ID], val);
}
static inline void
pci_config_set_class(uint8_t *pci_config, uint16_t val)
{
- cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+ pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
}
typedef void (*pci_qdev_initfn)(PCIDevice *dev);
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 05/13] qemu: MSI-X support functions
[not found] <cover.1244192535.git.mst@redhat.com>
` (3 preceding siblings ...)
2009-06-05 10:23 ` [PATCHv3 04/13] qemu: helper routines for pci access Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-09 17:26 ` [Qemu-devel] " Glauber Costa
2009-06-05 10:23 ` [PATCHv3 06/13] qemu: add flag to disable MSI-X by default Michael S. Tsirkin
` (7 subsequent siblings)
12 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported.
For PC this will be set by APIC.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
Makefile.target | 2 +-
hw/msix.c | 423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
hw/msix.h | 35 +++++
hw/pci.h | 20 +++
4 files changed, 479 insertions(+), 1 deletions(-)
create mode 100644 hw/msix.c
create mode 100644 hw/msix.h
diff --git a/Makefile.target b/Makefile.target
index 664a1e3..87b2859 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
ifndef CONFIG_USER_ONLY
OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
- gdbstub.o gdbstub-xml.o
+ gdbstub.o gdbstub-xml.o msix.o
# virtio has to be here due to weird dependency between PCI and virtio-net.
# need to fix this properly
OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..1b5aec8
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,423 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define PCI_MSIX_FLAGS 2 /* Table at lower 11 bits */
+#define PCI_MSIX_FLAGS_QSIZE 0x7FF
+#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
+#define PCI_MSIX_FLAGS_BIRMASK (7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...) \
+ do { \
+ fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
+ } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag to globally disable MSI-X support */
+int msix_disable;
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+ unsigned bar_nr, unsigned bar_size)
+{
+ int config_offset;
+ uint8_t *config;
+ uint32_t new_size;
+
+ if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+ return -EINVAL;
+ if (bar_size > 0x80000000)
+ return -ENOSPC;
+
+ /* Add space for MSI-X structures */
+ if (!bar_size)
+ new_size = MSIX_PAGE_SIZE;
+ else if (bar_size < MSIX_PAGE_SIZE) {
+ bar_size = MSIX_PAGE_SIZE;
+ new_size = MSIX_PAGE_SIZE * 2;
+ } else
+ new_size = bar_size * 2;
+
+ pdev->msix_bar_size = new_size;
+ config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+ if (config_offset < 0)
+ return config_offset;
+ config = pdev->config + config_offset;
+
+ pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+ /* Table on top of BAR */
+ pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+ /* Pending bits on top of that */
+ pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+ bar_nr);
+ pdev->msix_cap = config_offset;
+ /* Make flags bit writeable. */
+ pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+ return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+ int vector;
+
+ for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+ dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+ uint32_t val, int len)
+{
+ unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+ if (addr + len <= enable_pos || addr > enable_pos)
+ return;
+
+ if (msix_enabled(dev))
+ qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+ PCIDevice *dev = opaque;
+ unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+ void *page = dev->msix_table_page;
+ uint32_t val = 0;
+
+ memcpy(&val, (void *)((char *)page + offset), 4);
+
+ return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+ fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+ return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+ return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+ return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+ return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+ *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+ *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+ unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+ return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIDevice *dev = opaque;
+ unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+ int vector = offset / MSIX_ENTRY_SIZE;
+ memcpy(dev->msix_table_page + offset, &val, 4);
+ if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+ msix_clr_pending(dev, vector);
+ msix_notify(dev, vector);
+ }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+ msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+ msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ uint8_t *config = d->config + d->msix_cap;
+ uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+ uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+ /* TODO: for assigned devices, we'll want to make it possible to map
+ * pending bits separately in case they are in a separate bar. */
+ int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+ if (table_bir != region_num)
+ return;
+ if (size <= offset)
+ return;
+ cpu_register_physical_memory(addr + offset, size - offset,
+ d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+ unsigned bar_nr, unsigned bar_size)
+{
+ int ret = -ENOMEM;
+ /* Nothing to do if MSI is not supported by interrupt controller */
+ if (!msix_supported)
+ return -ENOTTY;
+
+ if (nentries > MSIX_MAX_ENTRIES)
+ return -EINVAL;
+
+ dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+ sizeof *dev->msix_entry_used);
+ if (!dev->msix_entry_used)
+ goto err_used;
+
+ dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+ if (!dev->msix_table_page)
+ goto err_page;
+
+ dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+ msix_mmio_write, dev);
+ if (dev->msix_mmio_index == -1) {
+ ret = -EBUSY;
+ goto err_index;
+ }
+
+ dev->msix_entries_nr = nentries;
+ dev->cap_supported |= QEMU_PCI_CAP_MSIX;
+ /* If disabled, stop here. User can later load confiuration with MSI-X
+ * enabled. */
+ if (msix_disable)
+ return 0;
+
+ ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+ if (ret)
+ goto err_config;
+
+ dev->cap_present |= QEMU_PCI_CAP_MSIX;
+ return 0;
+
+err_config:
+ cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+ qemu_free(dev->msix_table_page);
+ dev->msix_table_page = NULL;
+err_page:
+ qemu_free(dev->msix_entry_used);
+ dev->msix_entry_used = NULL;
+err_used:
+ return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+ if (!(dev->cap_supported & QEMU_PCI_CAP_MSIX))
+ return 0;
+ pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+ dev->msix_cap = 0;
+ msix_free_irq_entries(dev);
+ dev->msix_entries_nr = 0;
+ cpu_unregister_io_memory(dev->msix_mmio_index);
+ qemu_free(dev->msix_table_page);
+ dev->msix_table_page = NULL;
+ qemu_free(dev->msix_entry_used);
+ dev->msix_entry_used = NULL;
+ dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+ dev->cap_supported &= ~QEMU_PCI_CAP_MSIX;
+ return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+ unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+ PCI_MSIX_FLAGS_QSIZE) + 1;
+ qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+ qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+ (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+int msix_load(PCIDevice *dev, QEMUFile *f)
+{
+ uint8_t offset = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+ unsigned nentries;
+
+ if (!!(dev->cap_present & QEMU_PCI_CAP_MSIX) == !!offset) {
+ fprintf(stderr, "MSI-X bit set but no capability is present\n");
+ return -EINVAL;
+ }
+
+ msix_free_irq_entries(dev);
+
+ if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+ return 0;
+
+ /* Sanity check: we probably could add more of these. */
+ nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+ PCI_MSIX_FLAGS_QSIZE) + 1;
+ if (nentries > MSIX_MAX_ENTRIES) {
+ fprintf(stderr, "msix_load: nentries mismatch: %d > %d\n",
+ nentries, dev->msix_entries_nr);
+ return -EINVAL;
+ }
+
+ /* Make flags bit writeable. */
+ dev->wmask[offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+ /* Reserve space used by this capability */
+ pci_reserve_capability(dev, offset, MSIX_CAP_LENGTH);
+ /* Store the new offset */
+ dev->msix_cap = offset;
+
+ dev->msix_entries_nr = nentries;
+
+ qemu_get_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+ qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+ (nentries + 7) / 8);
+
+ return 0;
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+ return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+ return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+ (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+ MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+ return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+ dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+ uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+ uint64_t address;
+ uint32_t data;
+
+ if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+ return;
+ if (msix_is_masked(dev, vector)) {
+ msix_set_pending(dev, vector);
+ return;
+ }
+
+ address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+ address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+ data = pci_get_long(table_entry + MSIX_MSG_DATA);
+ stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+ if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+ return;
+ msix_free_irq_entries(dev);
+ dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+ memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+ if (vector >= dev->msix_entries_nr)
+ return -EINVAL;
+ dev->msix_entry_used[vector]++;
+ return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+ if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+ --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..79e84a3
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,35 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+ unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+ uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+int msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_disable;
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 477aa64..98a34ee 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -156,6 +156,11 @@ typedef struct PCIIORegion {
/* Size of the standard PCI config space */
#define PCI_CONFIG_SPACE_SIZE 0x100
+/* Bits in cap_supported/cap_present fields. */
+enum {
+ QEMU_PCI_CAP_MSIX = 0x1,
+};
+
struct PCIDevice {
DeviceState qdev;
/* PCI config space */
@@ -189,6 +194,21 @@ struct PCIDevice {
/* Capability bits for save/load */
uint32_t cap_supported;
uint32_t cap_present;
+
+ /* Offset of MSI-X capability in config space */
+ uint8_t msix_cap;
+
+ /* MSI-X entries */
+ int msix_entries_nr;
+
+ /* Space to store MSIX table */
+ uint8_t *msix_table_page;
+ /* MMIO index used to map MSIX table and pending bit entries. */
+ int msix_mmio_index;
+ /* Reference-count for entries actually in use by driver. */
+ unsigned *msix_entry_used;
+ /* Region including the MSI-X table */
+ uint32_t msix_bar_size;
};
PCIDevice *pci_register_device(PCIBus *bus, const char *name,
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 06/13] qemu: add flag to disable MSI-X by default
[not found] <cover.1244192535.git.mst@redhat.com>
` (4 preceding siblings ...)
2009-06-05 10:23 ` [PATCHv3 05/13] qemu: MSI-X support functions Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
` (6 subsequent siblings)
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Add global flag to disable MSI-X by default. This is useful primarily
to make images loadable by older qemu (without msix). Even when MSI-X
is disabled by flag, you can still load images that have MSI-X enabled.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/msix.c | 3 +++
qemu-options.hx | 2 ++
vl.c | 3 +++
3 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/hw/msix.c b/hw/msix.c
index 1b5aec8..ada81d8 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -122,6 +122,9 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
uint32_t val, int len)
{
unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+ if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+ return;
+
if (addr + len <= enable_pos || addr > enable_pos)
return;
diff --git a/qemu-options.hx b/qemu-options.hx
index 87af798..fd041a4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1575,3 +1575,5 @@ DEF("semihosting", 0, QEMU_OPTION_semihosting,
DEF("old-param", 0, QEMU_OPTION_old_param,
"-old-param old param mode\n")
#endif
+DEF("disable-msix", 0, QEMU_OPTION_disable_msix,
+ "-disable-msix disable msix support for PCI devices (enabled by default)\n")
diff --git a/vl.c b/vl.c
index 2c1f0e0..2757d4f 100644
--- a/vl.c
+++ b/vl.c
@@ -134,6 +134,7 @@ int main(int argc, char **argv)
#include "hw/usb.h"
#include "hw/pcmcia.h"
#include "hw/pc.h"
+#include "hw/msix.h"
#include "hw/audiodev.h"
#include "hw/isa.h"
#include "hw/baum.h"
@@ -5557,6 +5558,8 @@ int main(int argc, char **argv, char **envp)
xen_mode = XEN_ATTACH;
break;
#endif
+ case QEMU_OPTION_disable_msix:
+ msix_disable = 1;
}
}
}
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
[not found] <cover.1244192535.git.mst@redhat.com>
` (5 preceding siblings ...)
2009-06-05 10:23 ` [PATCHv3 06/13] qemu: add flag to disable MSI-X by default Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-09 17:33 ` [Qemu-devel] " Glauber Costa
2009-06-05 10:23 ` [PATCHv3 08/13] qemu: add support for resizing regions Michael S. Tsirkin
` (5 subsequent siblings)
12 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/apic.c | 43 +++++++++++++++++++++++++++++++++++++++----
1 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/hw/apic.c b/hw/apic.c
index 8c8b2de..ed03a36 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
*/
#include "hw.h"
#include "pc.h"
+#include "pci.h"
+#include "msix.h"
#include "qemu-timer.h"
#include "host-utils.h"
@@ -63,6 +65,19 @@
#define MAX_APICS 255
#define MAX_APIC_WORDS 8
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT 0
+#define MSI_DATA_VECTOR_MASK 0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT 8
+#define MSI_DATA_TRIGGER_SHIFT 15
+#define MSI_DATA_LEVEL_SHIFT 14
+#define MSI_ADDR_DEST_MODE_SHIFT 2
+#define MSI_ADDR_DEST_ID_SHIFT 12
+#define MSI_ADDR_DEST_ID_MASK 0x00ffff0
+
+#define MSI_ADDR_BASE 0xfee00000
+#define MSI_ADDR_SIZE 0x100000
+
typedef struct APICState {
CPUState *cpu_env;
uint32_t apicbase;
@@ -712,11 +727,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
return val;
}
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+ uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+ uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+ uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+ uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+ uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+ /* XXX: Ignore redirection hint. */
+ apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
{
CPUState *env;
APICState *s;
- int index;
+ int index = (addr >> 4) & 0xff;
+ if (addr > 0xfff || !index) {
+ /* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+ apic_send_msi(addr, val);
+ return;
+ }
env = cpu_single_env;
if (!env)
@@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
#endif
- index = (addr >> 4) & 0xff;
switch(index) {
case 0x02:
s->id = (val >> 24);
@@ -911,6 +945,7 @@ int apic_init(CPUState *env)
s->cpu_env = env;
apic_reset(s);
+ msix_supported = 1;
/* XXX: mapping more APICs at the same memory location */
if (apic_io_memory == 0) {
@@ -918,7 +953,8 @@ int apic_init(CPUState *env)
on the global memory bus. */
apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
apic_mem_write, NULL);
- cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+ /* XXX: what if the base changes? */
+ cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
apic_io_memory);
}
s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -929,4 +965,3 @@ int apic_init(CPUState *env)
local_apics[s->id] = s;
return 0;
}
-
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 08/13] qemu: add support for resizing regions
[not found] <cover.1244192535.git.mst@redhat.com>
` (6 preceding siblings ...)
2009-06-05 10:23 ` [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-09 17:36 ` [Qemu-devel] " Glauber Costa
2009-06-05 10:24 ` [PATCHv3 09/13] qemu: virtio support for many interrupt vectors Michael S. Tsirkin
` (4 subsequent siblings)
12 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Make it possible to resize PCI regions. This will be used by virtio
with MSI-X, where the region size depends on whether MSI-X is enabled,
and can change across load/save.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci.c | 54 ++++++++++++++++++++++++++++++++++++------------------
hw/pci.h | 3 +++
2 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/hw/pci.c b/hw/pci.c
index ed011b5..042a216 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
*(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
}
+static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
+{
+ if (r->addr == -1)
+ return;
+ if (r->type & PCI_ADDRESS_SPACE_IO) {
+ int class;
+ /* NOTE: specific hack for IDE in PC case:
+ only one byte must be mapped. */
+ class = pci_get_word(d->config + PCI_CLASS_DEVICE);
+ if (class == 0x0101 && r->size == 4) {
+ isa_unassign_ioport(r->addr + 2, 1);
+ } else {
+ isa_unassign_ioport(r->addr, r->size);
+ }
+ } else {
+ cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
+ r->size,
+ IO_MEM_UNASSIGNED);
+ qemu_unregister_coalesced_mmio(r->addr, r->size);
+ }
+}
+
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+ uint32_t size)
+{
+
+ PCIIORegion *r = &pci_dev->io_regions[region_num];
+ if (r->size == size)
+ return;
+ r->size = size;
+ pci_unmap_region(pci_dev, r);
+ r->addr = -1;
+ pci_update_mappings(pci_dev);
+}
+
static void pci_update_mappings(PCIDevice *d)
{
PCIIORegion *r;
@@ -445,24 +480,7 @@ static void pci_update_mappings(PCIDevice *d)
}
/* now do the real mapping */
if (new_addr != r->addr) {
- if (r->addr != -1) {
- if (r->type & PCI_ADDRESS_SPACE_IO) {
- int class;
- /* NOTE: specific hack for IDE in PC case:
- only one byte must be mapped. */
- class = d->config[0x0a] | (d->config[0x0b] << 8);
- if (class == 0x0101 && r->size == 4) {
- isa_unassign_ioport(r->addr + 2, 1);
- } else {
- isa_unassign_ioport(r->addr, r->size);
- }
- } else {
- cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
- r->size,
- IO_MEM_UNASSIGNED);
- qemu_unregister_coalesced_mmio(r->addr, r->size);
- }
- }
+ pci_unmap_region(d, r);
r->addr = new_addr;
if (r->addr != -1) {
r->map_func(d, i, r->addr, r->size, r->type);
diff --git a/hw/pci.h b/hw/pci.h
index 98a34ee..8e74033 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -221,6 +221,9 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
uint32_t size, int type,
PCIMapIORegionFunc *map_func);
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+ uint32_t size);
+
int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 09/13] qemu: virtio support for many interrupt vectors
[not found] <cover.1244192535.git.mst@redhat.com>
` (7 preceding siblings ...)
2009-06-05 10:23 ` [PATCHv3 08/13] qemu: add support for resizing regions Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 10/13] qemu: MSI-X support in virtio PCI Michael S. Tsirkin
` (3 subsequent siblings)
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/syborg_virtio.c | 13 ++++++++--
hw/virtio-pci.c | 24 +++++++++++++++----
hw/virtio.c | 63 ++++++++++++++++++++++++++++++++++++++-------------
hw/virtio.h | 10 ++++++-
4 files changed, 84 insertions(+), 26 deletions(-)
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 37c219c..d8c978a 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
vdev->features = value;
break;
case SYBORG_VIRTIO_QUEUE_BASE:
- virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+ if (value == 0)
+ virtio_reset(vdev);
+ else
+ virtio_queue_set_addr(vdev, vdev->queue_sel, value);
break;
case SYBORG_VIRTIO_QUEUE_SEL:
if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
syborg_virtio_writel
};
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
{
SyborgVirtIOProxy *proxy = opaque;
int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
}
static VirtIOBindings syborg_virtio_bindings = {
- .update_irq = syborg_virtio_update_irq
+ .notify = syborg_virtio_update_irq
};
static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
proxy->vdev = vdev;
+ /* Don't support multiple vectors */
+ proxy->vdev->nvectors = 0;
sysbus_init_irq(&proxy->busdev, &proxy->irq);
iomemtype = cpu_register_io_memory(0, syborg_virtio_readfn,
syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
+ qemu_register_reset(virtio_reset, 0, vdev);
+
virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
}
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c072423..7dfdd80 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
/* virtio device */
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
{
VirtIOPCIProxy *proxy = opaque;
qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
}
+static void virtio_pci_reset(void *opaque)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ virtio_reset(proxy->vdev);
+}
+
static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
{
VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
break;
case VIRTIO_PCI_QUEUE_PFN:
pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
- virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+ if (pa == 0)
+ virtio_pci_reset(proxy);
+ else
+ virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
break;
case VIRTIO_PCI_QUEUE_SEL:
if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
case VIRTIO_PCI_STATUS:
vdev->status = val & 0xFF;
if (vdev->status == 0)
- virtio_reset(vdev);
+ virtio_pci_reset(proxy);
break;
}
}
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
/* reading from the ISR also clears it. */
ret = vdev->isr;
vdev->isr = 0;
- virtio_update_irq(vdev);
+ qemu_set_irq(proxy->pci_dev.irq[0], 0);
break;
default:
break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
}
static const VirtIOBindings virtio_pci_bindings = {
- .update_irq = virtio_pci_update_irq
+ .notify = virtio_pci_notify
};
static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
proxy->vdev = vdev;
+ /* No support for multiple vectors yet. */
+ proxy->vdev->nvectors = 0;
+
config = proxy->pci_dev.config;
pci_config_set_vendor_id(config, vendor);
pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
pci_register_io_region(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
virtio_map);
+ qemu_register_reset(virtio_pci_reset, 0, proxy);
+
virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
}
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..63ffcff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
target_phys_addr_t pa;
uint16_t last_avail_idx;
int inuse;
+ uint16_t vector;
void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
};
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
}
/* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+ if (vdev->binding->notify) {
+ vdev->binding->notify(vdev->binding_opaque, vector);
+ }
+}
void virtio_update_irq(VirtIODevice *vdev)
{
- if (vdev->binding->update_irq) {
- vdev->binding->update_irq(vdev->binding_opaque);
- }
+ virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
}
void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
vdev->queue_sel = 0;
vdev->status = 0;
vdev->isr = 0;
- virtio_update_irq(vdev);
+ vdev->config_vector = VIRTIO_NO_VECTOR;
+ virtio_notify_vector(vdev, vdev->config_vector);
for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
vdev->vq[i].vring.used = 0;
vdev->vq[i].last_avail_idx = 0;
vdev->vq[i].pa = 0;
+ vdev->vq[i].vector = VIRTIO_NO_VECTOR;
}
}
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
{
- if (addr == 0) {
- virtio_reset(vdev);
- } else {
- vdev->vq[n].pa = addr;
- virtqueue_init(&vdev->vq[n]);
- }
+ vdev->vq[n].pa = addr;
+ virtqueue_init(&vdev->vq[n]);
}
target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
}
}
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+ return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+ VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+ if (n < VIRTIO_PCI_QUEUE_MAX)
+ vdev->vq[n].vector = vector;
+}
+
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
void (*handle_output)(VirtIODevice *, VirtQueue *))
{
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
return;
vdev->isr |= 0x01;
- virtio_update_irq(vdev);
+ virtio_notify_vector(vdev, vq->vector);
}
void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
return;
vdev->isr |= 0x03;
- virtio_update_irq(vdev);
+ virtio_notify_vector(vdev, vdev->config_vector);
}
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
/* FIXME: load/save binding. */
//pci_device_save(&vdev->pci_dev, f);
+ //msix_save(&vdev->pci_dev, f);
qemu_put_8s(f, &vdev->status);
qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
qemu_put_be32(f, vdev->config_len);
qemu_put_buffer(f, vdev->config, vdev->config_len);
+ if (vdev->nvectors)
+ qemu_put_be16s(f, &vdev->config_vector);
+
for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
if (vdev->vq[i].vring.num == 0)
break;
@@ -577,15 +596,19 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
qemu_put_be32(f, vdev->vq[i].vring.num);
qemu_put_be64(f, vdev->vq[i].pa);
qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+ if (vdev->nvectors)
+ qemu_put_be16s(f, &vdev->vq[i].vector);
}
}
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
{
int num, i;
/* FIXME: load/save binding. */
//pci_device_load(&vdev->pci_dev, f);
+ //r = msix_load(&vdev->pci_dev, f);
+ //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
qemu_get_8s(f, &vdev->status);
qemu_get_8s(f, &vdev->isr);
@@ -594,6 +617,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
vdev->config_len = qemu_get_be32(f);
qemu_get_buffer(f, vdev->config, vdev->config_len);
+ if (vdev->nvectors) {
+ qemu_get_be16s(f, &vdev->config_vector);
+ //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+ }
num = qemu_get_be32(f);
for (i = 0; i < num; i++) {
@@ -604,9 +631,14 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
if (vdev->vq[i].pa) {
virtqueue_init(&vdev->vq[i]);
}
+ if (vdev->nvectors) {
+ qemu_get_be16s(f, &vdev->vq[i].vector);
+ //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+ }
}
- virtio_update_irq(vdev);
+ virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+ return 0;
}
void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +659,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
vdev->status = 0;
vdev->isr = 0;
vdev->queue_sel = 0;
+ vdev->config_vector = VIRTIO_NO_VECTOR;
vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
vdev->name = name;
@@ -636,8 +669,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
else
vdev->config = NULL;
- qemu_register_reset(virtio_reset, 0, vdev);
-
return vdev;
}
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
} VirtQueueElement;
typedef struct {
- void (*update_irq)(void * opaque);
+ void (*notify)(void * opaque, uint16_t vector);
} VirtIOBindings;
#define VIRTIO_PCI_QUEUE_MAX 16
+#define VIRTIO_NO_VECTOR 0xffff
+
struct VirtIODevice
{
const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
uint32_t features;
size_t config_len;
void *config;
+ uint16_t config_vector;
+ int nvectors;
uint32_t (*get_features)(VirtIODevice *vdev);
uint32_t (*bad_features)(VirtIODevice *vdev);
void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
void virtio_save(VirtIODevice *vdev, QEMUFile *f);
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
void virtio_cleanup(VirtIODevice *vdev);
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
int virtio_queue_get_num(VirtIODevice *vdev, int n);
void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
void virtio_reset(void *opaque);
void virtio_update_irq(VirtIODevice *vdev);
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 10/13] qemu: MSI-X support in virtio PCI
[not found] <cover.1244192535.git.mst@redhat.com>
` (8 preceding siblings ...)
2009-06-05 10:24 ` [PATCHv3 09/13] qemu: virtio support for many interrupt vectors Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 11/13] qemu: request 3 vectors in virtio-net Michael S. Tsirkin
` (2 subsequent siblings)
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/virtio-pci.c | 152 ++++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 112 insertions(+), 40 deletions(-)
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 7dfdd80..294f4c7 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
#include "virtio.h"
#include "pci.h"
//#include "sysemu.h"
+#include "msix.h"
/* from Linux's linux/virtio_pci.h */
@@ -47,7 +48,24 @@
* a read-and-acknowledge. */
#define VIRTIO_PCI_ISR 19
-#define VIRTIO_PCI_CONFIG 20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR 20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR 22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI 20
+#define VIRTIO_PCI_CONFIG_MSI 24
+#define VIRTIO_PCI_REGION_SIZE(dev) (msix_present(dev) ? \
+ VIRTIO_PCI_CONFIG_MSI : \
+ VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev) (msix_enabled(dev) ? \
+ VIRTIO_PCI_CONFIG_MSI : \
+ VIRTIO_PCI_CONFIG_NOMSI)
/* Virtio ABI version, if we increment this, we break the guest driver. */
#define VIRTIO_PCI_ABI_VERSION 0
@@ -81,14 +99,17 @@ typedef struct {
static void virtio_pci_notify(void *opaque, uint16_t vector)
{
VirtIOPCIProxy *proxy = opaque;
-
- qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+ if (msix_enabled(&proxy->pci_dev))
+ msix_notify(&proxy->pci_dev, vector);
+ else
+ qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
}
static void virtio_pci_reset(void *opaque)
{
VirtIOPCIProxy *proxy = opaque;
virtio_reset(proxy->vdev);
+ msix_reset(&proxy->pci_dev);
}
static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
VirtIODevice *vdev = proxy->vdev;
target_phys_addr_t pa;
- addr -= proxy->addr;
-
switch (addr) {
case VIRTIO_PCI_GUEST_FEATURES:
/* Guest does not negotiate properly? We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
if (vdev->status == 0)
virtio_pci_reset(proxy);
break;
+ case VIRTIO_MSI_CONFIG_VECTOR:
+ msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+ /* Make it possible for guest to discover an error took place. */
+ if (msix_vector_use(&proxy->pci_dev, val) < 0)
+ val = VIRTIO_NO_VECTOR;
+ vdev->config_vector = val;
+ break;
+ case VIRTIO_MSI_QUEUE_VECTOR:
+ msix_vector_unuse(&proxy->pci_dev,
+ virtio_queue_vector(vdev, vdev->queue_sel));
+ /* Make it possible for guest to discover an error took place. */
+ if (msix_vector_use(&proxy->pci_dev, val) < 0)
+ val = VIRTIO_NO_VECTOR;
+ virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+ break;
+ default:
+ fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+ __func__, addr, val);
+ break;
}
}
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
{
- VirtIOPCIProxy *proxy = opaque;
VirtIODevice *vdev = proxy->vdev;
uint32_t ret = 0xFFFFFFFF;
- addr -= proxy->addr;
-
switch (addr) {
case VIRTIO_PCI_HOST_FEATURES:
ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
vdev->isr = 0;
qemu_set_irq(proxy->pci_dev.irq[0], 0);
break;
+ case VIRTIO_MSI_CONFIG_VECTOR:
+ ret = vdev->config_vector;
+ break;
+ case VIRTIO_MSI_QUEUE_VECTOR:
+ ret = virtio_queue_vector(vdev, vdev->queue_sel);
+ break;
default:
break;
}
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
{
VirtIOPCIProxy *proxy = opaque;
- addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+ uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+ addr -= proxy->addr;
+ if (addr < config)
+ return virtio_ioport_read(proxy, addr);
+ addr -= config;
return virtio_config_readb(proxy->vdev, addr);
}
static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
{
VirtIOPCIProxy *proxy = opaque;
- addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+ uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+ addr -= proxy->addr;
+ if (addr < config)
+ return virtio_ioport_read(proxy, addr);
+ addr -= config;
return virtio_config_readw(proxy->vdev, addr);
}
static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
{
VirtIOPCIProxy *proxy = opaque;
- addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+ uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+ addr -= proxy->addr;
+ if (addr < config)
+ return virtio_ioport_read(proxy, addr);
+ addr -= config;
return virtio_config_readl(proxy->vdev, addr);
}
static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
{
VirtIOPCIProxy *proxy = opaque;
- addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+ uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+ addr -= proxy->addr;
+ if (addr < config) {
+ virtio_ioport_write(proxy, addr, val);
+ return;
+ }
+ addr -= config;
virtio_config_writeb(proxy->vdev, addr, val);
}
static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
{
VirtIOPCIProxy *proxy = opaque;
- addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+ uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+ addr -= proxy->addr;
+ if (addr < config) {
+ virtio_ioport_write(proxy, addr, val);
+ return;
+ }
+ addr -= config;
virtio_config_writew(proxy->vdev, addr, val);
}
static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
{
VirtIOPCIProxy *proxy = opaque;
- addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+ uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+ addr -= proxy->addr;
+ if (addr < config) {
+ virtio_ioport_write(proxy, addr, val);
+ return;
+ }
+ addr -= config;
virtio_config_writel(proxy->vdev, addr, val);
}
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
{
VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
- int i;
+ unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
proxy->addr = addr;
- for (i = 0; i < 3; i++) {
- register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
- virtio_ioport_write, proxy);
- register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
- virtio_ioport_read, proxy);
- }
- if (vdev->config_len) {
- register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
- virtio_pci_config_writeb, proxy);
- register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
- virtio_pci_config_writew, proxy);
- register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
- virtio_pci_config_writel, proxy);
- register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
- virtio_pci_config_readb, proxy);
- register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
- virtio_pci_config_readw, proxy);
- register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
- virtio_pci_config_readl, proxy);
+ register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+ register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+ register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+ register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+ register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+ register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
+ if (vdev->config_len)
vdev->get_config(vdev, vdev->config);
- }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+ uint32_t val, int len)
+{
+ pci_default_write_config(pci_dev, address, val, len);
+ msix_write_config(pci_dev, address, val, len);
}
static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
proxy->vdev = vdev;
- /* No support for multiple vectors yet. */
- proxy->vdev->nvectors = 0;
-
config = proxy->pci_dev.config;
pci_config_set_vendor_id(config, vendor);
pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
config[0x3d] = 1;
- size = 20 + vdev->config_len;
+ if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+ pci_register_io_region(&proxy->pci_dev, 1,
+ msix_bar_size(&proxy->pci_dev),
+ PCI_ADDRESS_SPACE_MEM,
+ msix_mmio_map);
+ proxy->pci_dev.config_write = virtio_write_config;
+ proxy->pci_dev.unregister = msix_uninit;
+ } else
+ vdev->nvectors = 0;
+
+ size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
if (size & (size-1))
size = 1 << qemu_fls(size);
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 11/13] qemu: request 3 vectors in virtio-net
[not found] <cover.1244192535.git.mst@redhat.com>
` (9 preceding siblings ...)
2009-06-05 10:24 ` [PATCHv3 10/13] qemu: MSI-X support in virtio PCI Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 12/13] qemu: virtio save/load bindings Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 13/13] qemu: add pci_get/set_byte Michael S. Tsirkin
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Request up to 3 vectors in virtio-net. Actual bindings might supply
less.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/virtio-net.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 60aa6da..6118fe3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -621,6 +621,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+ n->vdev.nvectors = 3;
register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
virtio_net_save, virtio_net_load, n);
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 12/13] qemu: virtio save/load bindings
[not found] <cover.1244192535.git.mst@redhat.com>
` (10 preceding siblings ...)
2009-06-05 10:24 ` [PATCHv3 11/13] qemu: request 3 vectors in virtio-net Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-09 17:45 ` [Qemu-devel] " Glauber Costa
2009-06-05 10:24 ` [PATCHv3 13/13] qemu: add pci_get/set_byte Michael S. Tsirkin
12 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Implement bindings for virtio save/load. Use them in virtio pci.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/virtio-pci.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
hw/virtio.c | 31 ++++++++++++++-----------------
hw/virtio.h | 4 ++++
3 files changed, 66 insertions(+), 18 deletions(-)
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 294f4c7..589fbb1 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -105,6 +105,48 @@ static void virtio_pci_notify(void *opaque, uint16_t vector)
qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
}
+static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ pci_device_save(&proxy->pci_dev, f);
+ msix_save(&proxy->pci_dev, f);
+ if (msix_present(&proxy->pci_dev))
+ qemu_put_be16(f, proxy->vdev->config_vector);
+}
+
+static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ if (msix_present(&proxy->pci_dev))
+ qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
+}
+
+static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ int ret;
+ ret = pci_device_load(&proxy->pci_dev, f);
+ if (ret)
+ return ret;
+ ret = msix_load(&proxy->pci_dev, f);
+ if (ret)
+ return ret;
+ if (msix_present(&proxy->pci_dev))
+ qemu_get_be16s(f, &proxy->vdev->config_vector);
+ return 0;
+}
+
+static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ uint16_t vector;
+ if (!msix_present(&proxy->pci_dev))
+ return 0;
+ qemu_get_be16s(f, &vector);
+ virtio_queue_set_vector(proxy->vdev, n, vector);
+ return 0;
+}
+
static void virtio_pci_reset(void *opaque)
{
VirtIOPCIProxy *proxy = opaque;
@@ -317,7 +359,12 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
}
static const VirtIOBindings virtio_pci_bindings = {
- .notify = virtio_pci_notify
+ .notify = virtio_pci_notify,
+ .save_config = virtio_pci_save_config,
+ .load_config = virtio_pci_load_config,
+ .save_config = virtio_pci_save_config,
+ .save_queue = virtio_pci_save_queue,
+ .load_queue = virtio_pci_load_queue,
};
static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index 63ffcff..b773dff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -568,9 +568,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
{
int i;
- /* FIXME: load/save binding. */
- //pci_device_save(&vdev->pci_dev, f);
- //msix_save(&vdev->pci_dev, f);
+ if (vdev->binding->save_config)
+ vdev->binding->save_config(vdev->binding_opaque, f);
qemu_put_8s(f, &vdev->status);
qemu_put_8s(f, &vdev->isr);
@@ -596,19 +595,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
qemu_put_be32(f, vdev->vq[i].vring.num);
qemu_put_be64(f, vdev->vq[i].pa);
qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
- if (vdev->nvectors)
- qemu_put_be16s(f, &vdev->vq[i].vector);
+ if (vdev->binding->save_queue)
+ vdev->binding->save_queue(vdev->binding_opaque, i, f);
}
}
int virtio_load(VirtIODevice *vdev, QEMUFile *f)
{
- int num, i;
+ int num, i, ret;
- /* FIXME: load/save binding. */
- //pci_device_load(&vdev->pci_dev, f);
- //r = msix_load(&vdev->pci_dev, f);
- //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
+ if (vdev->binding->load_config) {
+ ret = vdev->binding->load_config(vdev->binding_opaque, f);
+ if (ret)
+ return ret;
+ }
qemu_get_8s(f, &vdev->status);
qemu_get_8s(f, &vdev->isr);
@@ -617,10 +617,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
vdev->config_len = qemu_get_be32(f);
qemu_get_buffer(f, vdev->config, vdev->config_len);
- if (vdev->nvectors) {
- qemu_get_be16s(f, &vdev->config_vector);
- //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
- }
num = qemu_get_be32(f);
for (i = 0; i < num; i++) {
@@ -631,9 +627,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
if (vdev->vq[i].pa) {
virtqueue_init(&vdev->vq[i]);
}
- if (vdev->nvectors) {
- qemu_get_be16s(f, &vdev->vq[i].vector);
- //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+ if (vdev->binding->load_queue) {
+ ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+ if (ret)
+ return ret;
}
}
diff --git a/hw/virtio.h b/hw/virtio.h
index 04a3c3d..ce05517 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -72,6 +72,10 @@ typedef struct VirtQueueElement
typedef struct {
void (*notify)(void * opaque, uint16_t vector);
+ void (*save_config)(void * opaque, QEMUFile *f);
+ void (*save_queue)(void * opaque, int n, QEMUFile *f);
+ int (*load_config)(void * opaque, QEMUFile *f);
+ int (*load_queue)(void * opaque, int n, QEMUFile *f);
} VirtIOBindings;
#define VIRTIO_PCI_QUEUE_MAX 16
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* [PATCHv3 13/13] qemu: add pci_get/set_byte
[not found] <cover.1244192535.git.mst@redhat.com>
` (11 preceding siblings ...)
2009-06-05 10:24 ` [PATCHv3 12/13] qemu: virtio save/load bindings Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
12 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, vi
Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci.h | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/hw/pci.h b/hw/pci.h
index 8e74033..7cc9a8a 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -263,6 +263,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
pci_map_irq_fn map_irq, const char *name);
static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+ *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+ return *config;
+}
+
+static inline void
pci_set_word(uint8_t *config, uint16_t val)
{
cpu_to_le16wu((uint16_t *)config, val);
--
1.6.3.1.56.g79e1.dirty
^ permalink raw reply related [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-05 10:23 ` [PATCHv3 03/13] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
@ 2009-06-09 17:11 ` Glauber Costa
2009-06-10 9:54 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Glauber Costa @ 2009-06-09 17:11 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> Add routines to manage PCI capability list. First user will be MSI-X.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> hw/pci.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> hw/pci.h | 18 +++++++++++-
> 2 files changed, 106 insertions(+), 10 deletions(-)
>
> diff --git a/hw/pci.c b/hw/pci.c
> index 361d741..ed011b5 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> int version = s->cap_present ? 3 : 2;
> int i;
>
> - qemu_put_be32(f, version); /* PCI device version */
> + /* PCI device version and capabilities */
> + qemu_put_be32(f, version);
> + if (version >= 3)
> + qemu_put_be32(f, s->cap_present);
> qemu_put_buffer(f, s->config, 256);
> for (i = 0; i < 4; i++)
> qemu_put_be32(f, s->irq_state[i]);
> - if (version >= 3)
> - qemu_put_be32(f, s->cap_present);
> }
What is it doing here?
You should just do it right in the first patch, instead of doing in
one way there, and fixing here.
>
> int pci_device_load(PCIDevice *s, QEMUFile *f)
> @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> version_id = qemu_get_be32(f);
> if (version_id > 3)
> return -EINVAL;
> - qemu_get_buffer(f, s->config, 256);
> - pci_update_mappings(s);
> -
> - if (version_id >= 2)
> - for (i = 0; i < 4; i ++)
> - s->irq_state[i] = qemu_get_be32(f);
> if (version_id >= 3)
> s->cap_present = qemu_get_be32(f);
> else
ditto.
> @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> if (s->cap_present & ~s->cap_supported)
> return -EINVAL;
>
> + qemu_get_buffer(f, s->config, 256);
> + pci_update_mappings(s);
> +
> + if (version_id >= 2)
> + for (i = 0; i < 4; i ++)
> + s->irq_state[i] = qemu_get_be32(f);
> + /* Clear wmask and used bits for capabilities.
> + Must be restored separately, since capabilities can
> + be placed anywhere in config space. */
> + memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> + for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> + s->wmask[i] = 0xff;
> return 0;
> }
Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
lose by keeping it at the same place in config space?
>
> @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
>
> return (PCIDevice *)dev;
> }
> +
> +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> +{
> + int offset = PCI_CONFIG_HEADER_SIZE;
> + int i;
> + for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> + if (pdev->used[i])
> + offset = i + 1;
> + else if (i - offset + 1 == size)
> + return offset;
> + return 0;
> +}
> +
> +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> + uint8_t *prev_p)
> +{
> + uint8_t next, prev;
> +
> + if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> + return 0;
> +
> + for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> + prev = next + PCI_CAP_LIST_NEXT)
> + if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> + break;
> +
> + *prev_p = prev;
> + return next;
> +}
I'd prefer to do:
if (prev_p)
*prev_p = prev;
so we don't have to always pass a prev_p pointer. You have yourself a user
where you don't need it in this very patch.
> +
> +/* Reserve space and add capability to the linked list in pci config space */
> +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> + uint8_t offset = pci_find_space(pdev, size);
> + uint8_t *config = pdev->config + offset;
> + if (!offset)
> + return -ENOSPC;
> + config[PCI_CAP_LIST_ID] = cap_id;
> + config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> + pdev->config[PCI_CAPABILITY_LIST] = offset;
> + pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> + memset(pdev->used + offset, 0xFF, size);
> + /* Make capability read-only by default */
> + memset(pdev->wmask + offset, 0, size);
> + return offset;
> +}
> +
> +/* Unlink capability from the pci config space. */
> +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> + uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> + if (!offset)
> + return;
> + pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> + /* Make capability writeable again */
> + memset(pdev->wmask + offset, 0xff, size);
> + memset(pdev->used + offset, 0, size);
> +
> + if (!pdev->config[PCI_CAPABILITY_LIST])
> + pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> +}
> +
> +/* Reserve space for capability at a known offset (to call after load). */
> +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> +{
> + memset(pdev->used + offset, 0xff, size);
> +}
> +
> +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> +{
> + uint8_t prev;
> + return pci_find_capability_list(pdev, cap_id, &prev);
> +}
> diff --git a/hw/pci.h b/hw/pci.h
> index 6f0803f..4838c59 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
> #define PCI_MIN_GNT 0x3e /* 8 bits */
> #define PCI_MAX_LAT 0x3f /* 8 bits */
>
> +/* Capability lists */
> +#define PCI_CAP_LIST_ID 0 /* Capability ID */
> +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
> +
> #define PCI_REVISION 0x08 /* obsolete, use PCI_REVISION_ID */
> #define PCI_SUBVENDOR_ID 0x2c /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
> #define PCI_SUBDEVICE_ID 0x2e /* obsolete, use PCI_SUBSYSTEM_ID */
> @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
> /* Bits in the PCI Status Register (PCI 2.3 spec) */
> #define PCI_STATUS_RESERVED1 0x007
> #define PCI_STATUS_INT_STATUS 0x008
> -#define PCI_STATUS_CAPABILITIES 0x010
> +#define PCI_STATUS_CAP_LIST 0x010
> #define PCI_STATUS_66MHZ 0x020
> #define PCI_STATUS_RESERVED2 0x040
> #define PCI_STATUS_FAST_BACK 0x080
> @@ -160,6 +164,9 @@ struct PCIDevice {
> /* Used to implement R/W bytes */
> uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
>
> + /* Used to allocate config space for capabilities. */
> + uint8_t used[PCI_CONFIG_SPACE_SIZE];
> +
> /* the following fields are read only */
> PCIBus *bus;
> int devfn;
> @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> uint32_t size, int type,
> PCIMapIORegionFunc *map_func);
>
> +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> +
> +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> +
> +
> uint32_t pci_default_read_config(PCIDevice *d,
> uint32_t address, int len);
> void pci_default_write_config(PCIDevice *d,
> --
> 1.6.3.1.56.g79e1.dirty
>
>
>
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
2009-06-05 10:23 ` [PATCHv3 05/13] qemu: MSI-X support functions Michael S. Tsirkin
@ 2009-06-09 17:26 ` Glauber Costa
2009-06-10 9:58 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Glauber Costa @ 2009-06-09 17:26 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> Add functions implementing MSI-X support. First user will be virtio-pci.
> Note that platform must set a flag to declare MSI supported.
> For PC this will be set by APIC.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> Makefile.target | 2 +-
> hw/msix.c | 423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/msix.h | 35 +++++
> hw/pci.h | 20 +++
> 4 files changed, 479 insertions(+), 1 deletions(-)
> create mode 100644 hw/msix.c
> create mode 100644 hw/msix.h
>
> diff --git a/Makefile.target b/Makefile.target
> index 664a1e3..87b2859 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
> ifndef CONFIG_USER_ONLY
>
> OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> - gdbstub.o gdbstub-xml.o
> + gdbstub.o gdbstub-xml.o msix.o
> # virtio has to be here due to weird dependency between PCI and virtio-net.
> # need to fix this properly
> OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> diff --git a/hw/msix.c b/hw/msix.c
> new file mode 100644
> index 0000000..1b5aec8
> --- /dev/null
> +++ b/hw/msix.c
> @@ -0,0 +1,423 @@
> +/*
> + * MSI-X device support
> + *
> + * This module includes support for MSI-X in pci devices.
> + *
> + * Author: Michael S. Tsirkin <mst@redhat.com>
> + *
> + * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include "hw.h"
> +#include "msix.h"
> +#include "pci.h"
> +
> +/* Declaration from linux/pci_regs.h */
> +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> +#define PCI_MSIX_FLAGS 2 /* Table at lower 11 bits */
> +#define PCI_MSIX_FLAGS_QSIZE 0x7FF
> +#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
> +#define PCI_MSIX_FLAGS_BIRMASK (7 << 0)
> +
> +/* MSI-X capability structure */
> +#define MSIX_TABLE_OFFSET 4
> +#define MSIX_PBA_OFFSET 8
> +#define MSIX_CAP_LENGTH 12
> +
> +/* MSI enable bit is in byte 1 in FLAGS register */
> +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> +
> +/* MSI-X table format */
> +#define MSIX_MSG_ADDR 0
> +#define MSIX_MSG_UPPER_ADDR 4
> +#define MSIX_MSG_DATA 8
> +#define MSIX_VECTOR_CTRL 12
> +#define MSIX_ENTRY_SIZE 16
> +#define MSIX_VECTOR_MASK 0x1
> +
> +/* How much space does an MSIX table need. */
> +/* The spec requires giving the table structure
> + * a 4K aligned region all by itself. Align it to
> + * target pages so that drivers can do passthrough
> + * on the rest of the region. */
> +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> +/* Reserve second half of the page for pending bits */
> +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> +#define MSIX_MAX_ENTRIES 32
> +
> +
> +#ifdef MSIX_DEBUG
> +#define DEBUG(fmt, ...) \
> + do { \
> + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DEBUG(fmt, ...) do { } while(0)
> +#endif
> +
> +/* Flag to globally disable MSI-X support */
> +int msix_disable;
> +
> +/* Flag for interrupt controller to declare MSI-X support */
> +int msix_supported;
maybe better to make it static, and provide msi_state() returning -1 for disabled,
0 for supported, etc...
> +
> +/* Add MSI-X capability to the config space for the device. */
> +/* Given a bar and its size, add MSI-X table on top of it
> + * and fill MSI-X capability in the config space.
> + * Original bar size must be a power of 2 or 0.
> + * New bar size is returned. */
> +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> + unsigned bar_nr, unsigned bar_size)
> +{
> + int config_offset;
> + uint8_t *config;
> + uint32_t new_size;
> +
> + if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> + return -EINVAL;
> + if (bar_size > 0x80000000)
> + return -ENOSPC;
> +
> + /* Add space for MSI-X structures */
> + if (!bar_size)
> + new_size = MSIX_PAGE_SIZE;
> + else if (bar_size < MSIX_PAGE_SIZE) {
> + bar_size = MSIX_PAGE_SIZE;
> + new_size = MSIX_PAGE_SIZE * 2;
> + } else
> + new_size = bar_size * 2;
> +
> + pdev->msix_bar_size = new_size;
> + config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> + if (config_offset < 0)
> + return config_offset;
> + config = pdev->config + config_offset;
> +
> + pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> + /* Table on top of BAR */
> + pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> + /* Pending bits on top of that */
> + pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> + bar_nr);
> + pdev->msix_cap = config_offset;
> + /* Make flags bit writeable. */
> + pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> + return 0;
> +}
> +
> +
> +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> + * modified, it should be retrieved with msix_bar_size. */
> +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> + unsigned bar_nr, unsigned bar_size)
> +{
> + int ret = -ENOMEM;
> + /* Nothing to do if MSI is not supported by interrupt controller */
> + if (!msix_supported)
> + return -ENOTTY;
> +
> + if (nentries > MSIX_MAX_ENTRIES)
> + return -EINVAL;
> +
> + dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> + sizeof *dev->msix_entry_used);
> + if (!dev->msix_entry_used)
> + goto err_used;
no need to check. oom_checker will kill qemu if it fails.
> +
> + dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> + if (!dev->msix_table_page)
> + goto err_page;
ditto.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
2009-06-05 10:23 ` [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
@ 2009-06-09 17:33 ` Glauber Costa
2009-06-10 9:59 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Glauber Costa @ 2009-06-09 17:33 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
> env = cpu_single_env;
> if (!env)
> @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
> #endif
>
> - index = (addr >> 4) & 0xff;
> switch(index) {
> case 0x02:
> s->id = (val >> 24);
> @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
> s->cpu_env = env;
>
> apic_reset(s);
> + msix_supported = 1;
>
> /* XXX: mapping more APICs at the same memory location */
> if (apic_io_memory == 0) {
> @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
> on the global memory bus. */
> apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
> apic_mem_write, NULL);
> - cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> + /* XXX: what if the base changes? */
> + cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
> apic_io_memory);
+1
I think you have a point here. Your patch is in no way worse than what we had,
but we're currently not handling correctly the case of base address changing.
Guess it is not common in normal apic usage for OSes...
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
2009-06-05 10:23 ` [PATCHv3 08/13] qemu: add support for resizing regions Michael S. Tsirkin
@ 2009-06-09 17:36 ` Glauber Costa
2009-06-10 10:05 ` Michael S. Tsirkin
2009-06-10 10:46 ` Michael S. Tsirkin
0 siblings, 2 replies; 139+ messages in thread
From: Glauber Costa @ 2009-06-09 17:36 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> Make it possible to resize PCI regions. This will be used by virtio
> with MSI-X, where the region size depends on whether MSI-X is enabled,
> and can change across load/save.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> hw/pci.c | 54 ++++++++++++++++++++++++++++++++++++------------------
> hw/pci.h | 3 +++
> 2 files changed, 39 insertions(+), 18 deletions(-)
>
> diff --git a/hw/pci.c b/hw/pci.c
> index ed011b5..042a216 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> }
>
> +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> +{
> + if (r->addr == -1)
> + return;
> + if (r->type & PCI_ADDRESS_SPACE_IO) {
> + int class;
> + /* NOTE: specific hack for IDE in PC case:
> + only one byte must be mapped. */
> + class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> + if (class == 0x0101 && r->size == 4) {
> + isa_unassign_ioport(r->addr + 2, 1);
> + } else {
> + isa_unassign_ioport(r->addr, r->size);
> + }
> + } else {
> + cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> + r->size,
> + IO_MEM_UNASSIGNED);
> + qemu_unregister_coalesced_mmio(r->addr, r->size);
> + }
> +}
> +
this is a good cleanup...
> +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> + uint32_t size)
> +{
> +
> + PCIIORegion *r = &pci_dev->io_regions[region_num];
> + if (r->size == size)
> + return;
> + r->size = size;
> + pci_unmap_region(pci_dev, r);
> + r->addr = -1;
> + pci_update_mappings(pci_dev);
> +}
> +
but the only user of this one seem to be commented out, and later removed.
Why is this needed?
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
2009-06-05 10:24 ` [PATCHv3 12/13] qemu: virtio save/load bindings Michael S. Tsirkin
@ 2009-06-09 17:45 ` Glauber Costa
2009-06-10 10:11 ` Michael S. Tsirkin
2009-06-10 11:33 ` Michael S. Tsirkin
0 siblings, 2 replies; 139+ messages in thread
From: Glauber Costa @ 2009-06-09 17:45 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
duplicated save config.
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 04a3c3d..ce05517 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
>
> typedef struct {
> void (*notify)(void * opaque, uint16_t vector);
> + void (*save_config)(void * opaque, QEMUFile *f);
> + void (*save_queue)(void * opaque, int n, QEMUFile *f);
> + int (*load_config)(void * opaque, QEMUFile *f);
> + int (*load_queue)(void * opaque, int n, QEMUFile *f);
> } VirtIOBindings;
>
So, what's the overall effect on a virtual machine that gets migrated,
of a certain device not implementing one of those functions? Will it work?
Will it break?
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-09 17:11 ` [Qemu-devel] " Glauber Costa
@ 2009-06-10 9:54 ` Michael S. Tsirkin
2009-06-10 14:55 ` Glauber Costa
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 9:54 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > Add routines to manage PCI capability list. First user will be MSI-X.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > hw/pci.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > hw/pci.h | 18 +++++++++++-
> > 2 files changed, 106 insertions(+), 10 deletions(-)
> >
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 361d741..ed011b5 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > int version = s->cap_present ? 3 : 2;
> > int i;
> >
> > - qemu_put_be32(f, version); /* PCI device version */
> > + /* PCI device version and capabilities */
> > + qemu_put_be32(f, version);
> > + if (version >= 3)
> > + qemu_put_be32(f, s->cap_present);
> > qemu_put_buffer(f, s->config, 256);
> > for (i = 0; i < 4; i++)
> > qemu_put_be32(f, s->irq_state[i]);
> > - if (version >= 3)
> > - qemu_put_be32(f, s->cap_present);
> > }
> What is it doing here?
> You should just do it right in the first patch, instead of doing in
> one way there, and fixing here.
>
> >
> > int pci_device_load(PCIDevice *s, QEMUFile *f)
> > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > version_id = qemu_get_be32(f);
> > if (version_id > 3)
> > return -EINVAL;
> > - qemu_get_buffer(f, s->config, 256);
> > - pci_update_mappings(s);
> > -
> > - if (version_id >= 2)
> > - for (i = 0; i < 4; i ++)
> > - s->irq_state[i] = qemu_get_be32(f);
> > if (version_id >= 3)
> > s->cap_present = qemu_get_be32(f);
> > else
> ditto.
> > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > if (s->cap_present & ~s->cap_supported)
> > return -EINVAL;
> >
> > + qemu_get_buffer(f, s->config, 256);
> > + pci_update_mappings(s);
> > +
> > + if (version_id >= 2)
> > + for (i = 0; i < 4; i ++)
> > + s->irq_state[i] = qemu_get_be32(f);
> > + /* Clear wmask and used bits for capabilities.
> > + Must be restored separately, since capabilities can
> > + be placed anywhere in config space. */
> > + memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > + for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > + s->wmask[i] = 0xff;
> > return 0;
> > }
> Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> lose by keeping it at the same place in config space?
We lose the ability to let user control the capabilities exposed
by the device.
And generally, I dislike arbitrary limitations. The PCI spec says the
capability can be anywhere, implementing a linked list of caps is simple
enough to not invent abritrary restrictions.
> >
> > @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> >
> > return (PCIDevice *)dev;
> > }
> > +
> > +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> > +{
> > + int offset = PCI_CONFIG_HEADER_SIZE;
> > + int i;
> > + for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > + if (pdev->used[i])
> > + offset = i + 1;
> > + else if (i - offset + 1 == size)
> > + return offset;
> > + return 0;
> > +}
> > +
> > +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> > + uint8_t *prev_p)
> > +{
> > + uint8_t next, prev;
> > +
> > + if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> > + return 0;
> > +
> > + for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> > + prev = next + PCI_CAP_LIST_NEXT)
> > + if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> > + break;
> > +
> > + *prev_p = prev;
> > + return next;
> > +}
> I'd prefer to do:
> if (prev_p)
> *prev_p = prev;
> so we don't have to always pass a prev_p pointer. You have yourself a user
> where you don't need it in this very patch.
Good idea.
> > +
> > +/* Reserve space and add capability to the linked list in pci config space */
> > +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > + uint8_t offset = pci_find_space(pdev, size);
> > + uint8_t *config = pdev->config + offset;
> > + if (!offset)
> > + return -ENOSPC;
> > + config[PCI_CAP_LIST_ID] = cap_id;
> > + config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> > + pdev->config[PCI_CAPABILITY_LIST] = offset;
> > + pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> > + memset(pdev->used + offset, 0xFF, size);
> > + /* Make capability read-only by default */
> > + memset(pdev->wmask + offset, 0, size);
> > + return offset;
> > +}
> > +
> > +/* Unlink capability from the pci config space. */
> > +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > + uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> > + if (!offset)
> > + return;
> > + pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> > + /* Make capability writeable again */
> > + memset(pdev->wmask + offset, 0xff, size);
> > + memset(pdev->used + offset, 0, size);
> > +
> > + if (!pdev->config[PCI_CAPABILITY_LIST])
> > + pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> > +}
> > +
> > +/* Reserve space for capability at a known offset (to call after load). */
> > +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> > +{
> > + memset(pdev->used + offset, 0xff, size);
> > +}
> > +
> > +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> > +{
> > + uint8_t prev;
> > + return pci_find_capability_list(pdev, cap_id, &prev);
> > +}
> > diff --git a/hw/pci.h b/hw/pci.h
> > index 6f0803f..4838c59 100644
> > --- a/hw/pci.h
> > +++ b/hw/pci.h
> > @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
> > #define PCI_MIN_GNT 0x3e /* 8 bits */
> > #define PCI_MAX_LAT 0x3f /* 8 bits */
> >
> > +/* Capability lists */
> > +#define PCI_CAP_LIST_ID 0 /* Capability ID */
> > +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
> > +
> > #define PCI_REVISION 0x08 /* obsolete, use PCI_REVISION_ID */
> > #define PCI_SUBVENDOR_ID 0x2c /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
> > #define PCI_SUBDEVICE_ID 0x2e /* obsolete, use PCI_SUBSYSTEM_ID */
> > @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
> > /* Bits in the PCI Status Register (PCI 2.3 spec) */
> > #define PCI_STATUS_RESERVED1 0x007
> > #define PCI_STATUS_INT_STATUS 0x008
> > -#define PCI_STATUS_CAPABILITIES 0x010
> > +#define PCI_STATUS_CAP_LIST 0x010
> > #define PCI_STATUS_66MHZ 0x020
> > #define PCI_STATUS_RESERVED2 0x040
> > #define PCI_STATUS_FAST_BACK 0x080
> > @@ -160,6 +164,9 @@ struct PCIDevice {
> > /* Used to implement R/W bytes */
> > uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
> >
> > + /* Used to allocate config space for capabilities. */
> > + uint8_t used[PCI_CONFIG_SPACE_SIZE];
> > +
> > /* the following fields are read only */
> > PCIBus *bus;
> > int devfn;
> > @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> > uint32_t size, int type,
> > PCIMapIORegionFunc *map_func);
> >
> > +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> > +
> > +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> > +
> > +
> > uint32_t pci_default_read_config(PCIDevice *d,
> > uint32_t address, int len);
> > void pci_default_write_config(PCIDevice *d,
> > --
> > 1.6.3.1.56.g79e1.dirty
> >
> >
> >
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
2009-06-09 17:26 ` [Qemu-devel] " Glauber Costa
@ 2009-06-10 9:58 ` Michael S. Tsirkin
0 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 9:58 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Tue, Jun 09, 2009 at 02:26:27PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> > Add functions implementing MSI-X support. First user will be virtio-pci.
> > Note that platform must set a flag to declare MSI supported.
> > For PC this will be set by APIC.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > Makefile.target | 2 +-
> > hw/msix.c | 423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > hw/msix.h | 35 +++++
> > hw/pci.h | 20 +++
> > 4 files changed, 479 insertions(+), 1 deletions(-)
> > create mode 100644 hw/msix.c
> > create mode 100644 hw/msix.h
> >
> > diff --git a/Makefile.target b/Makefile.target
> > index 664a1e3..87b2859 100644
> > --- a/Makefile.target
> > +++ b/Makefile.target
> > @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
> > ifndef CONFIG_USER_ONLY
> >
> > OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> > - gdbstub.o gdbstub-xml.o
> > + gdbstub.o gdbstub-xml.o msix.o
> > # virtio has to be here due to weird dependency between PCI and virtio-net.
> > # need to fix this properly
> > OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> > diff --git a/hw/msix.c b/hw/msix.c
> > new file mode 100644
> > index 0000000..1b5aec8
> > --- /dev/null
> > +++ b/hw/msix.c
> > @@ -0,0 +1,423 @@
> > +/*
> > + * MSI-X device support
> > + *
> > + * This module includes support for MSI-X in pci devices.
> > + *
> > + * Author: Michael S. Tsirkin <mst@redhat.com>
> > + *
> > + * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2. See
> > + * the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "hw.h"
> > +#include "msix.h"
> > +#include "pci.h"
> > +
> > +/* Declaration from linux/pci_regs.h */
> > +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> > +#define PCI_MSIX_FLAGS 2 /* Table at lower 11 bits */
> > +#define PCI_MSIX_FLAGS_QSIZE 0x7FF
> > +#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
> > +#define PCI_MSIX_FLAGS_BIRMASK (7 << 0)
> > +
> > +/* MSI-X capability structure */
> > +#define MSIX_TABLE_OFFSET 4
> > +#define MSIX_PBA_OFFSET 8
> > +#define MSIX_CAP_LENGTH 12
> > +
> > +/* MSI enable bit is in byte 1 in FLAGS register */
> > +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> > +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> > +
> > +/* MSI-X table format */
> > +#define MSIX_MSG_ADDR 0
> > +#define MSIX_MSG_UPPER_ADDR 4
> > +#define MSIX_MSG_DATA 8
> > +#define MSIX_VECTOR_CTRL 12
> > +#define MSIX_ENTRY_SIZE 16
> > +#define MSIX_VECTOR_MASK 0x1
> > +
> > +/* How much space does an MSIX table need. */
> > +/* The spec requires giving the table structure
> > + * a 4K aligned region all by itself. Align it to
> > + * target pages so that drivers can do passthrough
> > + * on the rest of the region. */
> > +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> > +/* Reserve second half of the page for pending bits */
> > +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> > +#define MSIX_MAX_ENTRIES 32
> > +
> > +
> > +#ifdef MSIX_DEBUG
> > +#define DEBUG(fmt, ...) \
> > + do { \
> > + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
> > + } while (0)
> > +#else
> > +#define DEBUG(fmt, ...) do { } while(0)
> > +#endif
> > +
> > +/* Flag to globally disable MSI-X support */
> > +int msix_disable;
> > +
> > +/* Flag for interrupt controller to declare MSI-X support */
> > +int msix_supported;
> maybe better to make it static,
It's not read-only either.
> and provide msi_state() returning -1 for disabled,
> 0 for supported, etc...
Matter of taste, I prefer a set of binary flags rather than yet another enum:
msix_disable is controlled by user, msix_supported is a safety valve
for non-PC platforms. It's easier to keep them separate IMO.
> > +
> > +/* Add MSI-X capability to the config space for the device. */
> > +/* Given a bar and its size, add MSI-X table on top of it
> > + * and fill MSI-X capability in the config space.
> > + * Original bar size must be a power of 2 or 0.
> > + * New bar size is returned. */
> > +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> > + unsigned bar_nr, unsigned bar_size)
> > +{
> > + int config_offset;
> > + uint8_t *config;
> > + uint32_t new_size;
> > +
> > + if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> > + return -EINVAL;
> > + if (bar_size > 0x80000000)
> > + return -ENOSPC;
> > +
> > + /* Add space for MSI-X structures */
> > + if (!bar_size)
> > + new_size = MSIX_PAGE_SIZE;
> > + else if (bar_size < MSIX_PAGE_SIZE) {
> > + bar_size = MSIX_PAGE_SIZE;
> > + new_size = MSIX_PAGE_SIZE * 2;
> > + } else
> > + new_size = bar_size * 2;
> > +
> > + pdev->msix_bar_size = new_size;
> > + config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > + if (config_offset < 0)
> > + return config_offset;
> > + config = pdev->config + config_offset;
> > +
> > + pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > + /* Table on top of BAR */
> > + pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> > + /* Pending bits on top of that */
> > + pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> > + bar_nr);
> > + pdev->msix_cap = config_offset;
> > + /* Make flags bit writeable. */
> > + pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> > + return 0;
> > +}
> > +
>
> > +
> > +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> > + * modified, it should be retrieved with msix_bar_size. */
> > +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> > + unsigned bar_nr, unsigned bar_size)
> > +{
> > + int ret = -ENOMEM;
> > + /* Nothing to do if MSI is not supported by interrupt controller */
> > + if (!msix_supported)
> > + return -ENOTTY;
> > +
> > + if (nentries > MSIX_MAX_ENTRIES)
> > + return -EINVAL;
> > +
> > + dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> > + sizeof *dev->msix_entry_used);
> > + if (!dev->msix_entry_used)
> > + goto err_used;
> no need to check. oom_checker will kill qemu if it fails.
>
> > +
> > + dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> > + if (!dev->msix_table_page)
> > + goto err_page;
> ditto.
>
Good point.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
2009-06-09 17:33 ` [Qemu-devel] " Glauber Costa
@ 2009-06-10 9:59 ` Michael S. Tsirkin
0 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 9:59 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Tue, Jun 09, 2009 at 02:33:33PM -0300, Glauber Costa wrote:
> > env = cpu_single_env;
> > if (!env)
> > @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> > printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
> > #endif
> >
> > - index = (addr >> 4) & 0xff;
> > switch(index) {
> > case 0x02:
> > s->id = (val >> 24);
> > @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
> > s->cpu_env = env;
> >
> > apic_reset(s);
> > + msix_supported = 1;
> >
> > /* XXX: mapping more APICs at the same memory location */
> > if (apic_io_memory == 0) {
> > @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
> > on the global memory bus. */
> > apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
> > apic_mem_write, NULL);
> > - cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> > + /* XXX: what if the base changes? */
> > + cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
> > apic_io_memory);
> +1
>
> I think you have a point here. Your patch is in no way worse than what we had,
> but we're currently not handling correctly the case of base address changing.
Yep.
> Guess it is not common in normal apic usage for OSes...
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
2009-06-09 17:36 ` [Qemu-devel] " Glauber Costa
@ 2009-06-10 10:05 ` Michael S. Tsirkin
2009-06-10 10:46 ` Michael S. Tsirkin
1 sibling, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:05 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions. This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > hw/pci.c | 54 ++++++++++++++++++++++++++++++++++++------------------
> > hw/pci.h | 3 +++
> > 2 files changed, 39 insertions(+), 18 deletions(-)
> >
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> > *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> > }
> >
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > + if (r->addr == -1)
> > + return;
> > + if (r->type & PCI_ADDRESS_SPACE_IO) {
> > + int class;
> > + /* NOTE: specific hack for IDE in PC case:
> > + only one byte must be mapped. */
> > + class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > + if (class == 0x0101 && r->size == 4) {
> > + isa_unassign_ioport(r->addr + 2, 1);
> > + } else {
> > + isa_unassign_ioport(r->addr, r->size);
> > + }
> > + } else {
> > + cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > + r->size,
> > + IO_MEM_UNASSIGNED);
> > + qemu_unregister_coalesced_mmio(r->addr, r->size);
> > + }
> > +}
> > +
> this is a good cleanup...
>
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > + uint32_t size)
> > +{
> > +
> > + PCIIORegion *r = &pci_dev->io_regions[region_num];
> > + if (r->size == size)
> > + return;
> > + r->size = size;
> > + pci_unmap_region(pci_dev, r);
> > + r->addr = -1;
> > + pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
>
Um, I think this needs to be called on load: virtio has a memmory region
if and only if it has MSI-X.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
2009-06-09 17:45 ` [Qemu-devel] " Glauber Costa
@ 2009-06-10 10:11 ` Michael S. Tsirkin
2009-06-10 11:33 ` Michael S. Tsirkin
1 sibling, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:11 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.
>
> > diff --git a/hw/virtio.h b/hw/virtio.h
> > index 04a3c3d..ce05517 100644
> > --- a/hw/virtio.h
> > +++ b/hw/virtio.h
> > @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
> >
> > typedef struct {
> > void (*notify)(void * opaque, uint16_t vector);
> > + void (*save_config)(void * opaque, QEMUFile *f);
> > + void (*save_queue)(void * opaque, int n, QEMUFile *f);
> > + int (*load_config)(void * opaque, QEMUFile *f);
> > + int (*load_queue)(void * opaque, int n, QEMUFile *f);
> > } VirtIOBindings;
> >
> So, what's the overall effect on a virtual machine that gets migrated,
> of a certain device not implementing one of those functions?
Those are implemented by a transport (e.g. virtio_pci) not the device.
> Will it work? Will it break?
It will work - assuming there's nothing transport-specific you need to
save and load. If there is - this patch is not breaking anything
that isn't already broken ...
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
2009-06-09 17:36 ` [Qemu-devel] " Glauber Costa
2009-06-10 10:05 ` Michael S. Tsirkin
@ 2009-06-10 10:46 ` Michael S. Tsirkin
1 sibling, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:46 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions. This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > hw/pci.c | 54 ++++++++++++++++++++++++++++++++++++------------------
> > hw/pci.h | 3 +++
> > 2 files changed, 39 insertions(+), 18 deletions(-)
> >
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> > *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> > }
> >
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > + if (r->addr == -1)
> > + return;
> > + if (r->type & PCI_ADDRESS_SPACE_IO) {
> > + int class;
> > + /* NOTE: specific hack for IDE in PC case:
> > + only one byte must be mapped. */
> > + class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > + if (class == 0x0101 && r->size == 4) {
> > + isa_unassign_ioport(r->addr + 2, 1);
> > + } else {
> > + isa_unassign_ioport(r->addr, r->size);
> > + }
> > + } else {
> > + cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > + r->size,
> > + IO_MEM_UNASSIGNED);
> > + qemu_unregister_coalesced_mmio(r->addr, r->size);
> > + }
> > +}
> > +
> this is a good cleanup...
>
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > + uint32_t size)
> > +{
> > +
> > + PCIIORegion *r = &pci_dev->io_regions[region_num];
> > + if (r->size == size)
> > + return;
> > + r->size = size;
> > + pci_unmap_region(pci_dev, r);
> > + r->addr = -1;
> > + pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
>
This was the missing bit:
Set correct size for msi-x memory region when loading the device.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 589fbb1..f657364 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -133,6 +133,8 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
return ret;
if (msix_present(&proxy->pci_dev))
qemu_get_be16s(f, &proxy->vdev->config_vector);
+
+ pci_resize_io_region(&proxy->pci_dev, 1, msix_bar_size(&proxy->pci_dev));
return 0;
}
--
MST
^ permalink raw reply related [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
2009-06-09 17:45 ` [Qemu-devel] " Glauber Costa
2009-06-10 10:11 ` Michael S. Tsirkin
@ 2009-06-10 11:33 ` Michael S. Tsirkin
1 sibling, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 11:33 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.
Good catch. Fixed.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-10 9:54 ` Michael S. Tsirkin
@ 2009-06-10 14:55 ` Glauber Costa
2009-06-10 15:01 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Glauber Costa @ 2009-06-10 14:55 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > Add routines to manage PCI capability list. First user will be MSI-X.
> > >
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > > hw/pci.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > hw/pci.h | 18 +++++++++++-
> > > 2 files changed, 106 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index 361d741..ed011b5 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > > int version = s->cap_present ? 3 : 2;
> > > int i;
> > >
> > > - qemu_put_be32(f, version); /* PCI device version */
> > > + /* PCI device version and capabilities */
> > > + qemu_put_be32(f, version);
> > > + if (version >= 3)
> > > + qemu_put_be32(f, s->cap_present);
> > > qemu_put_buffer(f, s->config, 256);
> > > for (i = 0; i < 4; i++)
> > > qemu_put_be32(f, s->irq_state[i]);
> > > - if (version >= 3)
> > > - qemu_put_be32(f, s->cap_present);
> > > }
> > What is it doing here?
> > You should just do it right in the first patch, instead of doing in
> > one way there, and fixing here.
> >
> > >
> > > int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > version_id = qemu_get_be32(f);
> > > if (version_id > 3)
> > > return -EINVAL;
> > > - qemu_get_buffer(f, s->config, 256);
> > > - pci_update_mappings(s);
> > > -
> > > - if (version_id >= 2)
> > > - for (i = 0; i < 4; i ++)
> > > - s->irq_state[i] = qemu_get_be32(f);
> > > if (version_id >= 3)
> > > s->cap_present = qemu_get_be32(f);
> > > else
> > ditto.
> > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > if (s->cap_present & ~s->cap_supported)
> > > return -EINVAL;
> > >
> > > + qemu_get_buffer(f, s->config, 256);
> > > + pci_update_mappings(s);
> > > +
> > > + if (version_id >= 2)
> > > + for (i = 0; i < 4; i ++)
> > > + s->irq_state[i] = qemu_get_be32(f);
> > > + /* Clear wmask and used bits for capabilities.
> > > + Must be restored separately, since capabilities can
> > > + be placed anywhere in config space. */
> > > + memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > + for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > + s->wmask[i] = 0xff;
> > > return 0;
> > > }
> > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > lose by keeping it at the same place in config space?
>
> We lose the ability to let user control the capabilities exposed
> by the device.
>
> And generally, I dislike arbitrary limitations. The PCI spec says the
> capability can be anywhere, implementing a linked list of caps is simple
> enough to not invent abritrary restrictions.
yes, but this is migration time, right?
caps can be anywhere, but we don't expect it to change during machine execution
lifetime.
Or I am just confused by the name "pci_device_load" ?
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-10 14:55 ` Glauber Costa
@ 2009-06-10 15:01 ` Michael S. Tsirkin
2009-06-10 15:24 ` Paul Brook
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:01 UTC (permalink / raw)
To: Glauber Costa
Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
Anthony Liguori
On Wed, Jun 10, 2009 at 11:55:40AM -0300, Glauber Costa wrote:
> On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > > Add routines to manage PCI capability list. First user will be MSI-X.
> > > >
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > > hw/pci.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > > hw/pci.h | 18 +++++++++++-
> > > > 2 files changed, 106 insertions(+), 10 deletions(-)
> > > >
> > > > diff --git a/hw/pci.c b/hw/pci.c
> > > > index 361d741..ed011b5 100644
> > > > --- a/hw/pci.c
> > > > +++ b/hw/pci.c
> > > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > > > int version = s->cap_present ? 3 : 2;
> > > > int i;
> > > >
> > > > - qemu_put_be32(f, version); /* PCI device version */
> > > > + /* PCI device version and capabilities */
> > > > + qemu_put_be32(f, version);
> > > > + if (version >= 3)
> > > > + qemu_put_be32(f, s->cap_present);
> > > > qemu_put_buffer(f, s->config, 256);
> > > > for (i = 0; i < 4; i++)
> > > > qemu_put_be32(f, s->irq_state[i]);
> > > > - if (version >= 3)
> > > > - qemu_put_be32(f, s->cap_present);
> > > > }
> > > What is it doing here?
> > > You should just do it right in the first patch, instead of doing in
> > > one way there, and fixing here.
> > >
> > > >
> > > > int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > > version_id = qemu_get_be32(f);
> > > > if (version_id > 3)
> > > > return -EINVAL;
> > > > - qemu_get_buffer(f, s->config, 256);
> > > > - pci_update_mappings(s);
> > > > -
> > > > - if (version_id >= 2)
> > > > - for (i = 0; i < 4; i ++)
> > > > - s->irq_state[i] = qemu_get_be32(f);
> > > > if (version_id >= 3)
> > > > s->cap_present = qemu_get_be32(f);
> > > > else
> > > ditto.
> > > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > > if (s->cap_present & ~s->cap_supported)
> > > > return -EINVAL;
> > > >
> > > > + qemu_get_buffer(f, s->config, 256);
> > > > + pci_update_mappings(s);
> > > > +
> > > > + if (version_id >= 2)
> > > > + for (i = 0; i < 4; i ++)
> > > > + s->irq_state[i] = qemu_get_be32(f);
> > > > + /* Clear wmask and used bits for capabilities.
> > > > + Must be restored separately, since capabilities can
> > > > + be placed anywhere in config space. */
> > > > + memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > > + for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > > + s->wmask[i] = 0xff;
> > > > return 0;
> > > > }
> > > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > > lose by keeping it at the same place in config space?
> >
> > We lose the ability to let user control the capabilities exposed
> > by the device.
> >
> > And generally, I dislike arbitrary limitations. The PCI spec says the
> > capability can be anywhere, implementing a linked list of caps is simple
> > enough to not invent abritrary restrictions.
> yes, but this is migration time, right?
I think so, yes.
>
> caps can be anywhere, but we don't expect it to change during machine execution
> lifetime.
>
> Or I am just confused by the name "pci_device_load" ?
Right. So I want to load an image and it has capability X at offset Y.
wmask has to match. I don't want to assume that we never change Y
for the device without breaking old images, so I clear wmask here
and set it up again after looking up capabilities that I loaded.
Maybe this explanation should go into the comment above?
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-10 15:01 ` Michael S. Tsirkin
@ 2009-06-10 15:24 ` Paul Brook
2009-06-10 15:50 ` Michael S. Tsirkin
2009-06-10 17:43 ` Jamie Lokier
0 siblings, 2 replies; 139+ messages in thread
From: Paul Brook @ 2009-06-10 15:24 UTC (permalink / raw)
To: qemu-devel
Cc: Michael S. Tsirkin, Glauber Costa, Carsten Otte, kvm,
Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
Avi Kivity
> > caps can be anywhere, but we don't expect it to change during machine
> > execution lifetime.
> >
> > Or I am just confused by the name "pci_device_load" ?
>
> Right. So I want to load an image and it has capability X at offset Y.
> wmask has to match. I don't want to assume that we never change Y
> for the device without breaking old images, so I clear wmask here
> and set it up again after looking up capabilities that I loaded.
We should not be loading state into a different device (or a similar device
with a different set of capabilities).
If you want to provide backwards compatibility then you should do that by
creating a device that is the same as the original. As I mentioned in my
earlier mail, loading a snapshot should never do anything that can not be
achieved through normal operation.
Paul
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-10 15:24 ` Paul Brook
@ 2009-06-10 15:50 ` Michael S. Tsirkin
2009-06-10 17:43 ` Jamie Lokier
1 sibling, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:50 UTC (permalink / raw)
To: Paul Brook
Cc: qemu-devel, Glauber Costa, Carsten Otte, kvm, Rusty Russell,
virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity
On Wed, Jun 10, 2009 at 04:24:28PM +0100, Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
>
> We should not be loading state into a different device (or a similar device
> with a different set of capabilities).
>
> If you want to provide backwards compatibility then you should do that by
> creating a device that is the same as the original. As I mentioned in my
> earlier mail, loading a snapshot should never do anything that can not be
> achieved through normal operation.
>
> Paul
Why shouldn't it? You don't load a snapshot while guest is running.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-10 15:24 ` Paul Brook
2009-06-10 15:50 ` Michael S. Tsirkin
@ 2009-06-10 17:43 ` Jamie Lokier
2009-06-10 18:22 ` Michael S. Tsirkin
1 sibling, 1 reply; 139+ messages in thread
From: Jamie Lokier @ 2009-06-10 17:43 UTC (permalink / raw)
To: Paul Brook
Cc: qemu-devel, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
Avi Kivity
Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
>
> We should not be loading state into a different device (or a similar device
> with a different set of capabilities).
>
> If you want to provide backwards compatibility then you should do that by
> creating a device that is the same as the original. As I mentioned in my
> earlier mail, loading a snapshot should never do anything that can not be
> achieved through normal operation.
If you can create a machine be restoring a snapshot which you can't
create by normally starting QEMU, then you'll soon have guests which
work fine from their snapshots, but which cannot be booted without a
snapshot because there's no way to boot the right machine for the guest.
Ssomeone might even have guests like that for years without noticing,
because they always save and restore guest state using snapshots, then
one day they simply want to boot the guest from it's disk image and
find there's no way to do it with any QEMU which runs on their host
platform.
I think the right long term answer to all this is a way to get QEMU to
dump it's current machine configuration in glorious detail as a file
which can be reloaded as a machine configuration.
-- Jamie
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-10 17:43 ` Jamie Lokier
@ 2009-06-10 18:22 ` Michael S. Tsirkin
2009-06-10 19:27 ` Jamie Lokier
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 18:22 UTC (permalink / raw)
To: Jamie Lokier
Cc: Paul Brook, qemu-devel, Carsten Otte, kvm, Glauber Costa,
Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
Avi Kivity
On Wed, Jun 10, 2009 at 06:43:02PM +0100, Jamie Lokier wrote:
> Paul Brook wrote:
> > > > caps can be anywhere, but we don't expect it to change during machine
> > > > execution lifetime.
> > > >
> > > > Or I am just confused by the name "pci_device_load" ?
> > >
> > > Right. So I want to load an image and it has capability X at offset Y.
> > > wmask has to match. I don't want to assume that we never change Y
> > > for the device without breaking old images, so I clear wmask here
> > > and set it up again after looking up capabilities that I loaded.
> >
> > We should not be loading state into a different device (or a similar device
> > with a different set of capabilities).
> >
> > If you want to provide backwards compatibility then you should do that by
> > creating a device that is the same as the original. As I mentioned in my
> > earlier mail, loading a snapshot should never do anything that can not be
> > achieved through normal operation.
>
> If you can create a machine be restoring a snapshot which you can't
> create by normally starting QEMU, then you'll soon have guests which
> work fine from their snapshots, but which cannot be booted without a
> snapshot because there's no way to boot the right machine for the guest.
Yes. This clearly isn't what I'm building here. You *can* create a guest
without msi-x support by passing an appropriate flag.
> Ssomeone might even have guests like that for years without noticing,
> because they always save and restore guest state using snapshots, then
> one day they simply want to boot the guest from it's disk image and
> find there's no way to do it with any QEMU which runs on their host
> platform.
>
> I think the right long term answer to all this is a way to get QEMU to
> dump it's current machine configuration in glorious detail as a file
> which can be reloaded as a machine configuration.
>
> -- Jamie
And then we'll have the same set of problems there.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
2009-06-10 18:22 ` Michael S. Tsirkin
@ 2009-06-10 19:27 ` Jamie Lokier
2009-06-12 8:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
0 siblings, 1 reply; 139+ messages in thread
From: Jamie Lokier @ 2009-06-10 19:27 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Paul Brook, qemu-devel, Carsten Otte, kvm, Glauber Costa,
Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
Avi Kivity
Michael S. Tsirkin wrote:
> > I think the right long term answer to all this is a way to get QEMU to
> > dump it's current machine configuration in glorious detail as a file
> > which can be reloaded as a machine configuration.
>
> And then we'll have the same set of problems there.
We will, and the solution will be the same: options to create devices
as they were in older versions of QEMU. It only needs to cover device
features which matter to guests, not every bug fix.
However with a machine configuration which is generated by QEMU,
there's less worry about proliferation of obscure options, compared
with the command line. You don't necessarily have to document every
backward-compatibility option in any detail, you just have to make
sure it's written and read properly, which is much the same thing as
the snapshot code does.
-- Jamie
^ permalink raw reply [flat|nested] 139+ messages in thread
* Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-10 19:27 ` Jamie Lokier
@ 2009-06-12 8:43 ` Mark McLoughlin
2009-06-12 13:59 ` Michael S. Tsirkin
` (2 more replies)
0 siblings, 3 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 8:43 UTC (permalink / raw)
To: Jamie Lokier
Cc: Michael S. Tsirkin, Carsten Otte, kvm, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Avi Kivity
On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> Michael S. Tsirkin wrote:
> > > I think the right long term answer to all this is a way to get QEMU to
> > > dump it's current machine configuration in glorious detail as a file
> > > which can be reloaded as a machine configuration.
> >
> > And then we'll have the same set of problems there.
>
> We will, and the solution will be the same: options to create devices
> as they were in older versions of QEMU. It only needs to cover device
> features which matter to guests, not every bug fix.
>
> However with a machine configuration which is generated by QEMU,
> there's less worry about proliferation of obscure options, compared
> with the command line. You don't necessarily have to document every
> backward-compatibility option in any detail, you just have to make
> sure it's written and read properly, which is much the same thing as
> the snapshot code does.
This is a sensible plan, but I don't think we should mix these compat
options in with the VM manager supplied configuration.
There are two problems with that approach.
= Problem 1 - VM manager needs to parse qemu config =
Your proposal implies:
- VM manager supplies a basic configuration to qemu
- It then immediately asks qemu for a dump of the machine
configuration in all its glorious detail and retains that
config
- If the VM manager wishes to add a new device it needs to parse the
qemu config and add it, rather than just generate an entirely new
config
= Problem 2 - We can't predict the future =
If a VM manager supplies a configuration which is missing any given
option, qemu cannot tell the difference between:
- This is a basic config, the VM manager wants whatever the default
of the current qemu version
- This is a complete config dumped using an old version of qemu, the
VM manager wants the old default
= Solution - Separate configuration from compat hints =
As I suggested before:
- Allow the VM manager to dump compat hints; this would be an opaque
file format, more like the savevm format than a config file
- Use defaults where compat hints are not available; e.g. if the VM
manager specifies a device config, but no compat hints are
supplied for it, then just use default values
- Make the config override compat hints; e.g. if there are compat
hints specified for a device not included in the machine config,
just ignore those hints
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 8:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
@ 2009-06-12 13:59 ` Michael S. Tsirkin
2009-06-12 14:48 ` Mark McLoughlin
2009-06-12 14:51 ` Anthony Liguori
2009-06-12 14:55 ` Anthony Liguori
2 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-12 13:59 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Carsten Otte, kvm, Glauber Costa, Rusty Russell,
qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
Paul Brook, Avi Kivity
On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> = Solution - Separate configuration from compat hints =
>
> As I suggested before:
>
> - Allow the VM manager to dump compat hints; this would be an opaque
> file format, more like the savevm format than a config file
Why make it "like the savevm" format then?
If they are opaque anyway, compat hints could be part of savevm format.
> - Use defaults where compat hints are not available; e.g. if the VM
> manager specifies a device config, but no compat hints are
> supplied for it, then just use default values
>
> - Make the config override compat hints; e.g. if there are compat
> hints specified for a device not included in the machine config,
> just ignore those hints
>
> Cheers,
> Mark.
If compat hints are opaque and only editable by qemu, we can get into a
situation where one can't create a specific setup with a new qemu.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 13:59 ` Michael S. Tsirkin
@ 2009-06-12 14:48 ` Mark McLoughlin
0 siblings, 0 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 14:48 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Jamie Lokier, Carsten Otte, kvm, Glauber Costa, Rusty Russell,
qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
Paul Brook, Avi Kivity
On Fri, 2009-06-12 at 16:59 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> > = Solution - Separate configuration from compat hints =
> >
> > As I suggested before:
> >
> > - Allow the VM manager to dump compat hints; this would be an opaque
> > file format, more like the savevm format than a config file
>
> Why make it "like the savevm" format then?
> If they are opaque anyway, compat hints could be part of savevm format.
So a "savevm --only-compat-hints" command? It might make sense, since we
would want the compat hints with savevm too.
> > - Use defaults where compat hints are not available; e.g. if the VM
> > manager specifies a device config, but no compat hints are
> > supplied for it, then just use default values
> >
> > - Make the config override compat hints; e.g. if there are compat
> > hints specified for a device not included in the machine config,
> > just ignore those hints
> >
> > Cheers,
> > Mark.
>
> If compat hints are opaque and only editable by qemu, we can get into a
> situation where one can't create a specific setup with a new qemu.
An example?
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 8:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
2009-06-12 13:59 ` Michael S. Tsirkin
@ 2009-06-12 14:51 ` Anthony Liguori
2009-06-12 15:41 ` Mark McLoughlin
2009-06-14 7:55 ` Avi Kivity
2009-06-12 14:55 ` Anthony Liguori
2 siblings, 2 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:51 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>
>> Michael S. Tsirkin wrote:
>>
>>>> I think the right long term answer to all this is a way to get QEMU to
>>>> dump it's current machine configuration in glorious detail as a file
>>>> which can be reloaded as a machine configuration.
>>>>
>>> And then we'll have the same set of problems there.
>>>
>> We will, and the solution will be the same: options to create devices
>> as they were in older versions of QEMU. It only needs to cover device
>> features which matter to guests, not every bug fix.
>>
>> However with a machine configuration which is generated by QEMU,
>> there's less worry about proliferation of obscure options, compared
>> with the command line. You don't necessarily have to document every
>> backward-compatibility option in any detail, you just have to make
>> sure it's written and read properly, which is much the same thing as
>> the snapshot code does.
>>
>
> This is a sensible plan, but I don't think we should mix these compat
> options in with the VM manager supplied configuration.
>
> There are two problems with that approach.
>
> = Problem 1 - VM manager needs to parse qemu config =
>
> Your proposal implies:
>
> - VM manager supplies a basic configuration to qemu
>
> - It then immediately asks qemu for a dump of the machine
> configuration in all its glorious detail and retains that
> config
>
> - If the VM manager wishes to add a new device it needs to parse the
> qemu config and add it, rather than just generate an entirely new
> config
>
What's the problem with parsing the device config and modifying it? Is
it just complexity?
If we provided a mechanism to simplify manipulating a device config,
would that eliminate the concern here?
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 8:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
2009-06-12 13:59 ` Michael S. Tsirkin
2009-06-12 14:51 ` Anthony Liguori
@ 2009-06-12 14:55 ` Anthony Liguori
2009-06-12 15:53 ` Mark McLoughlin
2 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:55 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>
> = Solution - Separate configuration from compat hints =
>
> As I suggested before:
>
> - Allow the VM manager to dump compat hints; this would be an opaque
> file format, more like the savevm format than a config file
>
How is compat hints different from a device tree?
In my mind, that's what compat hints is. I don't see another sane way
to implement it.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 14:51 ` Anthony Liguori
@ 2009-06-12 15:41 ` Mark McLoughlin
2009-06-12 16:11 ` Anthony Liguori
2009-06-14 7:55 ` Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:41 UTC (permalink / raw)
To: Anthony Liguori
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >
> >> Michael S. Tsirkin wrote:
> >>
> >>>> I think the right long term answer to all this is a way to get QEMU to
> >>>> dump it's current machine configuration in glorious detail as a file
> >>>> which can be reloaded as a machine configuration.
> >>>>
> >>> And then we'll have the same set of problems there.
> >>>
> >> We will, and the solution will be the same: options to create devices
> >> as they were in older versions of QEMU. It only needs to cover device
> >> features which matter to guests, not every bug fix.
> >>
> >> However with a machine configuration which is generated by QEMU,
> >> there's less worry about proliferation of obscure options, compared
> >> with the command line. You don't necessarily have to document every
> >> backward-compatibility option in any detail, you just have to make
> >> sure it's written and read properly, which is much the same thing as
> >> the snapshot code does.
> >>
> >
> > This is a sensible plan, but I don't think we should mix these compat
> > options in with the VM manager supplied configuration.
> >
> > There are two problems with that approach.
> >
> > = Problem 1 - VM manager needs to parse qemu config =
> >
> > Your proposal implies:
> >
> > - VM manager supplies a basic configuration to qemu
> >
> > - It then immediately asks qemu for a dump of the machine
> > configuration in all its glorious detail and retains that
> > config
> >
> > - If the VM manager wishes to add a new device it needs to parse the
> > qemu config and add it, rather than just generate an entirely new
> > config
> >
>
> What's the problem with parsing the device config and modifying it? Is
> it just complexity?
Yes, complexity is the issue.
> If we provided a mechanism to simplify manipulating a device config,
> would that eliminate the concern here?
In libvirt's case, a lot of the complexity would come from needing to
figure out what to change.
i.e. libvirt produces a qemu configuration (currently a command line)
from a guest XML's configuration; with this idea, libvirt would probably
compare the old XML config to the new XML config, and then apply those
differences to the qemu configuration.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 14:55 ` Anthony Liguori
@ 2009-06-12 15:53 ` Mark McLoughlin
2009-06-12 16:12 ` Anthony Liguori
2009-06-14 9:34 ` Michael S. Tsirkin
0 siblings, 2 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:53 UTC (permalink / raw)
To: Anthony Liguori
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >
> > = Solution - Separate configuration from compat hints =
> >
> > As I suggested before:
> >
> > - Allow the VM manager to dump compat hints; this would be an opaque
> > file format, more like the savevm format than a config file
> >
>
> How is compat hints different from a device tree?
>
> In my mind, that's what compat hints is. I don't see another sane way
> to implement it.
A device tree with a different purpose than a config file.
In its simplest form it could be a device tree with a version number for
each device[1].
The other obvious piece to add to it would be PCI addresses, so that
even if you remove a device, the addresses assigned to existing devices
don't change.
Cheers,
Mark.
[1] - Adding such a per-device version number to the config file would
solve problem (2)
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 15:41 ` Mark McLoughlin
@ 2009-06-12 16:11 ` Anthony Liguori
2009-06-12 16:48 ` Mark McLoughlin
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:11 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
>
>> Mark McLoughlin wrote:
>>
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>
>>>
>>>> Michael S. Tsirkin wrote:
>>>>
>>>>
>>>>>> I think the right long term answer to all this is a way to get QEMU to
>>>>>> dump it's current machine configuration in glorious detail as a file
>>>>>> which can be reloaded as a machine configuration.
>>>>>>
>>>>>>
>>>>> And then we'll have the same set of problems there.
>>>>>
>>>>>
>>>> We will, and the solution will be the same: options to create devices
>>>> as they were in older versions of QEMU. It only needs to cover device
>>>> features which matter to guests, not every bug fix.
>>>>
>>>> However with a machine configuration which is generated by QEMU,
>>>> there's less worry about proliferation of obscure options, compared
>>>> with the command line. You don't necessarily have to document every
>>>> backward-compatibility option in any detail, you just have to make
>>>> sure it's written and read properly, which is much the same thing as
>>>> the snapshot code does.
>>>>
>>>>
>>> This is a sensible plan, but I don't think we should mix these compat
>>> options in with the VM manager supplied configuration.
>>>
>>> There are two problems with that approach.
>>>
>>> = Problem 1 - VM manager needs to parse qemu config =
>>>
>>> Your proposal implies:
>>>
>>> - VM manager supplies a basic configuration to qemu
>>>
>>> - It then immediately asks qemu for a dump of the machine
>>> configuration in all its glorious detail and retains that
>>> config
>>>
>>> - If the VM manager wishes to add a new device it needs to parse the
>>> qemu config and add it, rather than just generate an entirely new
>>> config
>>>
>>>
>> What's the problem with parsing the device config and modifying it? Is
>> it just complexity?
>>
>
> Yes, complexity is the issue.
>
>
>> If we provided a mechanism to simplify manipulating a device config,
>> would that eliminate the concern here?
>>
>
> In libvirt's case, a lot of the complexity would come from needing to
> figure out what to change.
>
Right, libvirt wants to be able to easily say "add a scsi block device
to this VM". The way I see this working is that there would be a
default pc.dtc. We would still have a -drive file=foo.img,if=scsi
option that would really just be a wrapper around first searching for an
existing LSI controller, if one exists, attaching the lun, if not,
create one, etc.
libvirt could continue to use this sort of interface. However, as it
wants to do more advanced things, it may have to dive into the device
tree itself.
On live migration, QEMU will save a copy of the device tree somewhere
and libvirt needs to keep track of it. It can treat it as opaque. -M
/path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as
expected IMHO.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 15:53 ` Mark McLoughlin
@ 2009-06-12 16:12 ` Anthony Liguori
2009-06-12 16:48 ` Mark McLoughlin
2009-06-14 9:34 ` Michael S. Tsirkin
1 sibling, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:12 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
>
>> Mark McLoughlin wrote:
>>
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>
>>> = Solution - Separate configuration from compat hints =
>>>
>>> As I suggested before:
>>>
>>> - Allow the VM manager to dump compat hints; this would be an opaque
>>> file format, more like the savevm format than a config file
>>>
>>>
>> How is compat hints different from a device tree?
>>
>> In my mind, that's what compat hints is. I don't see another sane way
>> to implement it.
>>
>
> A device tree with a different purpose than a config file.
>
> In its simplest form it could be a device tree with a version number for
> each device[1].
>
I think the point is that you don't need version numbers if you have a
proper device tree. NB the device tree contains no host configuration
information.
Regards,
Anthony Liguori
> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.
>
> Cheers,
> Mark.
>
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
>
>
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 16:11 ` Anthony Liguori
@ 2009-06-12 16:48 ` Mark McLoughlin
2009-06-12 17:00 ` Anthony Liguori
2009-06-14 9:50 ` Michael S. Tsirkin
0 siblings, 2 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
To: Anthony Liguori
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, 2009-06-12 at 11:11 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> >
> >> Mark McLoughlin wrote:
> >>
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>
> >>>
> >>>> Michael S. Tsirkin wrote:
> >>>>
> >>>>
> >>>>>> I think the right long term answer to all this is a way to get QEMU to
> >>>>>> dump it's current machine configuration in glorious detail as a file
> >>>>>> which can be reloaded as a machine configuration.
> >>>>>>
> >>>>>>
> >>>>> And then we'll have the same set of problems there.
> >>>>>
> >>>>>
> >>>> We will, and the solution will be the same: options to create devices
> >>>> as they were in older versions of QEMU. It only needs to cover device
> >>>> features which matter to guests, not every bug fix.
> >>>>
> >>>> However with a machine configuration which is generated by QEMU,
> >>>> there's less worry about proliferation of obscure options, compared
> >>>> with the command line. You don't necessarily have to document every
> >>>> backward-compatibility option in any detail, you just have to make
> >>>> sure it's written and read properly, which is much the same thing as
> >>>> the snapshot code does.
> >>>>
> >>>>
> >>> This is a sensible plan, but I don't think we should mix these compat
> >>> options in with the VM manager supplied configuration.
> >>>
> >>> There are two problems with that approach.
> >>>
> >>> = Problem 1 - VM manager needs to parse qemu config =
> >>>
> >>> Your proposal implies:
> >>>
> >>> - VM manager supplies a basic configuration to qemu
> >>>
> >>> - It then immediately asks qemu for a dump of the machine
> >>> configuration in all its glorious detail and retains that
> >>> config
> >>>
> >>> - If the VM manager wishes to add a new device it needs to parse the
> >>> qemu config and add it, rather than just generate an entirely new
> >>> config
> >>>
> >>>
> >> What's the problem with parsing the device config and modifying it? Is
> >> it just complexity?
> >>
> >
> > Yes, complexity is the issue.
> >
> >
> >> If we provided a mechanism to simplify manipulating a device config,
> >> would that eliminate the concern here?
> >>
> >
> > In libvirt's case, a lot of the complexity would come from needing to
> > figure out what to change.
> >
>
> Right, libvirt wants to be able to easily say "add a scsi block device
> to this VM". The way I see this working is that there would be a
> default pc.dtc. We would still have a -drive file=foo.img,if=scsi
> option that would really just be a wrapper around first searching for an
> existing LSI controller, if one exists, attaching the lun, if not,
> create one, etc.
>
> libvirt could continue to use this sort of interface. However, as it
> wants to do more advanced things, it may have to dive into the device
> tree itself.
>
> On live migration, QEMU will save a copy of the device tree somewhere
> and libvirt needs to keep track of it. It can treat it as opaque. -M
> /path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as
> expected IMHO.
So, when libvirt creates a guest for the first time, it makes a copy of
the device tree and continues to use that even if qemu is upgraded.
That's enough to ensure compat is retained for all built-in devices.
However, in order to retain compat for that SCSI device (e.g. ensuring
the PCI address doesn't change as other devices are added an removed),
we're back to the same problem ... either:
1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
out what address to use, libvirt would need to query qemu for what
address was originally allocated to device or it would do all the
PCI address allocation itself ... or:
2) Don't use the command line, instead get a dump of the entire
device tree (including the SCSI device) - if the device is to be
removed or modified in future, libvirt would need to modify the
device tree
The basic problem would be that the command line config would have very
limited ability to override the device tree config.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 16:12 ` Anthony Liguori
@ 2009-06-12 16:48 ` Mark McLoughlin
2009-06-14 7:58 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
To: Anthony Liguori
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, 2009-06-12 at 11:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> >
> >> Mark McLoughlin wrote:
> >>
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>
> >>> = Solution - Separate configuration from compat hints =
> >>>
> >>> As I suggested before:
> >>>
> >>> - Allow the VM manager to dump compat hints; this would be an opaque
> >>> file format, more like the savevm format than a config file
> >>>
> >>>
> >> How is compat hints different from a device tree?
> >>
> >> In my mind, that's what compat hints is. I don't see another sane way
> >> to implement it.
> >>
> >
> > A device tree with a different purpose than a config file.
> >
> > In its simplest form it could be a device tree with a version number for
> > each device[1].
> >
>
> I think the point is that you don't need version numbers if you have a
> proper device tree.
How do you add a new attribute to the device tree and, when a supplied
device tree lacking said attribute, distinguish between a device tree
from an old version of qemu (i.e. use the old default) and a partial
device tree from the VM manager (i.e. use the new default) ?
> NB the device tree contains no host configuration information.
So, it wouldn't e.g. include the path to the image file for a block
device? That would always be specified on the command line?
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 16:48 ` Mark McLoughlin
@ 2009-06-12 17:00 ` Anthony Liguori
2009-06-12 17:31 ` Mark McLoughlin
2009-06-14 9:50 ` Michael S. Tsirkin
1 sibling, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-12 17:00 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
Mark McLoughlin wrote:
> So, when libvirt creates a guest for the first time, it makes a copy of
> the device tree and continues to use that even if qemu is upgraded.
> That's enough to ensure compat is retained for all built-in devices.
>
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
>
> 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
> out what address to use, libvirt would need to query qemu for what
> address was originally allocated to device or it would do all the
> PCI address allocation itself ... or:
>
> 2) Don't use the command line, instead get a dump of the entire
> device tree (including the SCSI device) - if the device is to be
> removed or modified in future, libvirt would need to modify the
> device tree
>
> The basic problem would be that the command line config would have very
> limited ability to override the device tree config.
>
After libvirt has done -drive file=foo... it should dump the machine
config and use that from then on.
To combined to a single thread...
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>
Please define "attribute". I don't follow what you're asking.
>> NB the device tree contains no host configuration information.
>>
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>
No, the IDE definition would contain some sort of symbolic node name. A
separate mechanism (either command line or host config file) would then
link a image file to the symbolic name.
libvirt should really never worry about the machine config file for
normal things unless it needs to change what devices are exposed to a guest.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 17:00 ` Anthony Liguori
@ 2009-06-12 17:31 ` Mark McLoughlin
2009-06-12 17:44 ` Blue Swirl
2009-06-16 18:38 ` Jamie Lokier
0 siblings, 2 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:31 UTC (permalink / raw)
To: Anthony Liguori
Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So, when libvirt creates a guest for the first time, it makes a copy of
> > the device tree and continues to use that even if qemu is upgraded.
> > That's enough to ensure compat is retained for all built-in devices.
> >
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> >
> > 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
> > out what address to use, libvirt would need to query qemu for what
> > address was originally allocated to device or it would do all the
> > PCI address allocation itself ... or:
> >
> > 2) Don't use the command line, instead get a dump of the entire
> > device tree (including the SCSI device) - if the device is to be
> > removed or modified in future, libvirt would need to modify the
> > device tree
> >
> > The basic problem would be that the command line config would have very
> > limited ability to override the device tree config.
> >
>
> After libvirt has done -drive file=foo... it should dump the machine
> config and use that from then on.
Right - libvirt then wouldn't be able to avoid the complexity of merging
any future changes into the dumped machine config.
> To combined to a single thread...
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >
>
> Please define "attribute". I don't follow what you're asking.
e.g. a per-device "enable MSI support" flag.
If qemu is supplied with a device tree that lacks that flag, does it
enable or disable MSI?
Enable by default is bad - it could be a device tree dumped from an old
version of qemu, so compat would be broken.
Disable by default is bad - it could be a simple device tree supplied by
the user, and the latest features are wanted.
Maybe we want a per-device "this is a complete device description" flag
and if anything is missing from a supposedly complete description, the
old defaults would be used. A config dumped from qemu would have this
flag set, a config generated by libvirt would not have the flag.
> >> NB the device tree contains no host configuration information.
> >>
> >
> > So, it wouldn't e.g. include the path to the image file for a block
> > device? That would always be specified on the command line?
> >
>
> No, the IDE definition would contain some sort of symbolic node name. A
> separate mechanism (either command line or host config file) would then
> link a image file to the symbolic name.
Okay.
> libvirt should really never worry about the machine config file for
> normal things unless it needs to change what devices are exposed to a guest.
But changing devices *is* normal ... e.g. removing a block device.
Writing out a device tree is not a problem for libvirt (or any other
management tools), it's the need to merge changes into an existing
device tree is where the real complexity would lie.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 17:31 ` Mark McLoughlin
@ 2009-06-12 17:44 ` Blue Swirl
2009-06-12 17:55 ` Mark McLoughlin
2009-06-16 18:38 ` Jamie Lokier
1 sibling, 1 reply; 139+ messages in thread
From: Blue Swirl @ 2009-06-12 17:44 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Christian Borntraeger, Paul Brook, Avi Kivity
On 6/12/09, Mark McLoughlin <markmc@redhat.com> wrote:
> On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
> > Mark McLoughlin wrote:
> > > So, when libvirt creates a guest for the first time, it makes a copy of
> > > the device tree and continues to use that even if qemu is upgraded.
> > > That's enough to ensure compat is retained for all built-in devices.
> > >
> > > However, in order to retain compat for that SCSI device (e.g. ensuring
> > > the PCI address doesn't change as other devices are added an removed),
> > > we're back to the same problem ... either:
> > >
> > > 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
> > > out what address to use, libvirt would need to query qemu for what
> > > address was originally allocated to device or it would do all the
> > > PCI address allocation itself ... or:
> > >
> > > 2) Don't use the command line, instead get a dump of the entire
> > > device tree (including the SCSI device) - if the device is to be
> > > removed or modified in future, libvirt would need to modify the
> > > device tree
> > >
> > > The basic problem would be that the command line config would have very
> > > limited ability to override the device tree config.
> > >
> >
> > After libvirt has done -drive file=foo... it should dump the machine
> > config and use that from then on.
>
> Right - libvirt then wouldn't be able to avoid the complexity of merging
> any future changes into the dumped machine config.
>
> > To combined to a single thread...
> > > How do you add a new attribute to the device tree and, when a supplied
> > > device tree lacking said attribute, distinguish between a device tree
> > > from an old version of qemu (i.e. use the old default) and a partial
> > > device tree from the VM manager (i.e. use the new default) ?
> > >
> >
> > Please define "attribute". I don't follow what you're asking.
>
> e.g. a per-device "enable MSI support" flag.
>
> If qemu is supplied with a device tree that lacks that flag, does it
> enable or disable MSI?
>
> Enable by default is bad - it could be a device tree dumped from an old
> version of qemu, so compat would be broken.
>
> Disable by default is bad - it could be a simple device tree supplied by
> the user, and the latest features are wanted.
>
> Maybe we want a per-device "this is a complete device description" flag
> and if anything is missing from a supposedly complete description, the
> old defaults would be used. A config dumped from qemu would have this
> flag set, a config generated by libvirt would not have the flag.
If the device has different behavior or different properties from
guest perspective compared to the old device, it should get a new
device type so that you could specify in the device tree either the
old device or the new one. Flags won't help in the long term.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 17:44 ` Blue Swirl
@ 2009-06-12 17:55 ` Mark McLoughlin
0 siblings, 0 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:55 UTC (permalink / raw)
To: Blue Swirl
Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, 2009-06-12 at 20:44 +0300, Blue Swirl wrote:
> If the device has different behavior or different properties from
> guest perspective compared to the old device, it should get a new
> device type so that you could specify in the device tree either the
> old device or the new one.
Yes, that works - it's analogous to a device (type, version) pair.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 14:51 ` Anthony Liguori
2009-06-12 15:41 ` Mark McLoughlin
@ 2009-06-14 7:55 ` Avi Kivity
1 sibling, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-14 7:55 UTC (permalink / raw)
To: Anthony Liguori
Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Anthony Liguori wrote:
>
> What's the problem with parsing the device config and modifying it?
> Is it just complexity?
Two-way modification. Management wants to store the configuration in
their database and tell the hypervisor what the machine looks like. If
qemu also tells management what the machine looks like, we can easily
get conflicts.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 16:48 ` Mark McLoughlin
@ 2009-06-14 7:58 ` Avi Kivity
2009-06-15 5:32 ` Configuration vs. compat hints Markus Armbruster
2009-06-15 9:09 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
0 siblings, 2 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-14 7:58 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Mark McLoughlin wrote:
>> I think the point is that you don't need version numbers if you have a
>> proper device tree.
>>
>
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>
-baseline 0.10
>
>> NB the device tree contains no host configuration information.
>>
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>
Or in a different file. I agree splitting host and guest configuration
is a must-have, this ensures portability of virtual machines across
hosts and time.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 15:53 ` Mark McLoughlin
2009-06-12 16:12 ` Anthony Liguori
@ 2009-06-14 9:34 ` Michael S. Tsirkin
2009-06-14 9:37 ` Avi Kivity
2009-06-15 9:02 ` Mark McLoughlin
1 sibling, 2 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14 9:34 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> > Mark McLoughlin wrote:
> > > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> > >
> > > = Solution - Separate configuration from compat hints =
> > >
> > > As I suggested before:
> > >
> > > - Allow the VM manager to dump compat hints; this would be an opaque
> > > file format, more like the savevm format than a config file
> > >
> >
> > How is compat hints different from a device tree?
> >
> > In my mind, that's what compat hints is. I don't see another sane way
> > to implement it.
>
> A device tree with a different purpose than a config file.
>
> In its simplest form it could be a device tree with a version number for
> each device[1].
>
> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.
Could you clarify this requirement please?
If we want to remove a device from under a running guest, you need
hotplug. So we can't just remove several lines from the config and hope
that it'll work simply because the PCI address is stable.
OTOH, if you reboot the guest, it's ok for addresses to change.
> Cheers,
> Mark.
>
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-14 9:34 ` Michael S. Tsirkin
@ 2009-06-14 9:37 ` Avi Kivity
2009-06-14 9:47 ` Michael S. Tsirkin
2009-06-15 9:02 ` Mark McLoughlin
1 sibling, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-14 9:37 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Michael S. Tsirkin wrote:
>
> If we want to remove a device from under a running guest, you need
> hotplug. So we can't just remove several lines from the config and hope
> that it'll work simply because the PCI address is stable.
>
Why not?
> OTOH, if you reboot the guest, it's ok for addresses to change.
>
No, it's not. Some guests depend on addressing for their configuration
(for example older Linux guests will swap eth0/eth1 if you swap their
slots).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-14 9:37 ` Avi Kivity
@ 2009-06-14 9:47 ` Michael S. Tsirkin
2009-06-15 9:38 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14 9:47 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Sun, Jun 14, 2009 at 12:37:13PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>>
>> If we want to remove a device from under a running guest, you need
>> hotplug. So we can't just remove several lines from the config and hope
>> that it'll work simply because the PCI address is stable.
>>
>
> Why not?
E.g. configuration cycles address a specific bus/slot.
You need cooperation from guest if you want to move
a device.
>> OTOH, if you reboot the guest, it's ok for addresses to change.
>>
>
> No, it's not. Some guests depend on addressing for their configuration
> (for example older Linux guests will swap eth0/eth1 if you swap their
> slots).
Ah, I misunderstood what's meant by the address. I agree that it's
useful to be able to control device's placement on the bus.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 16:48 ` Mark McLoughlin
2009-06-12 17:00 ` Anthony Liguori
@ 2009-06-14 9:50 ` Michael S. Tsirkin
2009-06-15 9:08 ` Mark McLoughlin
2009-06-15 9:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Avi Kivity
1 sibling, 2 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14 9:50 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Avi Kivity
On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
>
> 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
> out what address to use, libvirt would need to query qemu for what
> address was originally allocated to device or it would do all the
> PCI address allocation itself ...
This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints
2009-06-14 7:58 ` Avi Kivity
@ 2009-06-15 5:32 ` Markus Armbruster
2009-06-15 9:09 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
1 sibling, 0 replies; 139+ messages in thread
From: Markus Armbruster @ 2009-06-15 5:32 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Carsten Otte, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity <avi@redhat.com> writes:
> Mark McLoughlin wrote:
[...]
>>> NB the device tree contains no host configuration information.
>>>
>>
>> So, it wouldn't e.g. include the path to the image file for a block
>> device? That would always be specified on the command line?
>>
>
> Or in a different file. I agree splitting host and guest
> configuration is a must-have, this ensures portability of virtual
> machines across hosts and time.
Splitting into two separate sections should suffice, they could live in
the same file for convenience.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-14 9:34 ` Michael S. Tsirkin
2009-06-14 9:37 ` Avi Kivity
@ 2009-06-15 9:02 ` Mark McLoughlin
1 sibling, 0 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-15 9:02 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Avi Kivity
On Sun, 2009-06-14 at 12:34 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> > The other obvious piece to add to it would be PCI addresses, so that
> > even if you remove a device, the addresses assigned to existing devices
> > don't change.
>
> Could you clarify this requirement please?
Avi clarified, but I've written it up here too:
https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-14 9:50 ` Michael S. Tsirkin
@ 2009-06-15 9:08 ` Mark McLoughlin
2009-06-15 9:27 ` Avi Kivity
2009-06-15 9:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-15 9:08 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Avi Kivity
On Sun, 2009-06-14 at 12:50 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> >
> > 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
> > out what address to use, libvirt would need to query qemu for what
> > address was originally allocated to device or it would do all the
> > PCI address allocation itself ...
>
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?
Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.
However, the first problem is that it isn't a solution to the guest ABI
problem more generally.
And the second problem is that for e.g. libvirt to use it, it would have
to be possible to query qemu for what PCI slots were assigned to the
devices - libvirt would need to be able to parse 'info pci' and match
the devices listed with the devices specified on the command line.
Again, details written up here:
https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-14 7:58 ` Avi Kivity
2009-06-15 5:32 ` Configuration vs. compat hints Markus Armbruster
@ 2009-06-15 9:09 ` Mark McLoughlin
2009-06-15 11:32 ` Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-15 9:09 UTC (permalink / raw)
To: Avi Kivity
Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Sun, 2009-06-14 at 10:58 +0300, Avi Kivity wrote:
> Mark McLoughlin wrote:
>
>
>
> >> I think the point is that you don't need version numbers if you have a
> >> proper device tree.
> >>
> >
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >
>
> -baseline 0.10
That's a version number :-)
(I was responding to Anthony's "you don't need a version number")
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 9:08 ` Mark McLoughlin
@ 2009-06-15 9:27 ` Avi Kivity
2009-06-15 10:32 ` Michael S. Tsirkin
` (2 more replies)
0 siblings, 3 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 9:27 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Michael S. Tsirkin, Anthony Liguori, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>
>
> Yep, most people seem to agree that it makes sense to allow this, but
> some believe it should only be via a machine description file, not the
> command line.
>
I don't understand this opposition. It's clear a machine config file is
a long way in our future. It's also clear lack of stable PCI addresses
hurts us now.
> However, the first problem is that it isn't a solution to the guest ABI
> problem more generally.
>
pci_addr was never meant to bring world peace, just stable PCI
addresses. The other issues should be addressed separately.
> And the second problem is that for e.g. libvirt to use it, it would have
> to be possible to query qemu for what PCI slots were assigned to the
> devices - libvirt would need to be able to parse 'info pci' and match
> the devices listed with the devices specified on the command line.
>
If all devices (including vga, ide) are set up with pci_addr, then this
is unneeded. You do need to export available slot numbers from qemu.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-14 9:47 ` Michael S. Tsirkin
@ 2009-06-15 9:38 ` Avi Kivity
0 siblings, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 9:38 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/14/2009 12:47 PM, Michael S. Tsirkin wrote:
> Michael S. Tsirkin wrote:
>>> If we want to remove a device from under a running guest, you need
>>> hotplug. So we can't just remove several lines from the config and hope
>>> that it'll work simply because the PCI address is stable.
>>>
>>>
>> Why not?
>>
>
> E.g. configuration cycles address a specific bus/slot.
> You need cooperation from guest if you want to move
> a device.
>
By "remove several lines from the config" I understood the guest needs
to be restarted. Of course if you don't restart the guest you need true
hotplug.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-14 9:50 ` Michael S. Tsirkin
2009-06-15 9:08 ` Mark McLoughlin
@ 2009-06-15 9:43 ` Avi Kivity
2009-06-15 10:29 ` Michael S. Tsirkin
2009-06-15 12:45 ` Anthony Liguori
1 sibling, 2 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 9:43 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>
>> However, in order to retain compat for that SCSI device (e.g. ensuring
>> the PCI address doesn't change as other devices are added an removed),
>> we're back to the same problem ... either:
>>
>> 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>> out what address to use, libvirt would need to query qemu for what
>> address was originally allocated to device or it would do all the
>> PCI address allocation itself ...
>>
>
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?
>
Yes, the user build the machine using the command line and monitor (or,
in 2017, the machine configuration file), then turns on the power.
Command line options are the parts lying around when we start.
btw, -drive needs to be separated:
-controller type=lsi1234,pci_addr=foobar,name=blah
-drive file=foo.img,controller=blah,index=0
-drive file=bar.img,controller=blah,index=1
Drives to not have pci addresses.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 9:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Avi Kivity
@ 2009-06-15 10:29 ` Michael S. Tsirkin
2009-06-15 12:45 ` Anthony Liguori
1 sibling, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:29 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 12:43:48PM +0300, Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>> 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>>> out what address to use, libvirt would need to query qemu for what
>>> address was originally allocated to device or it would do all the
>>> PCI address allocation itself ...
>>>
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>
>
> Yes, the user build the machine using the command line and monitor (or,
> in 2017, the machine configuration file), then turns on the power.
> Command line options are the parts lying around when we start.
>
> btw, -drive needs to be separated:
>
> -controller type=lsi1234,pci_addr=foobar,name=blah
> -drive file=foo.img,controller=blah,index=0
> -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.
Maybe we need a generic 'bus options' flag.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 9:27 ` Avi Kivity
@ 2009-06-15 10:32 ` Michael S. Tsirkin
2009-06-15 10:44 ` Gleb Natapov
2009-06-15 11:27 ` Avi Kivity
2009-06-15 11:35 ` Configuration vs. compat hints Markus Armbruster
2009-06-15 12:41 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
2 siblings, 2 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:32 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 12:27:08PM +0300, Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>
>
> I don't understand this opposition. It's clear a machine config file is
> a long way in our future. It's also clear lack of stable PCI addresses
> hurts us now.
>
>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>
>
> pci_addr was never meant to bring world peace, just stable PCI
> addresses. The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>
>
> If all devices (including vga, ide) are set up with pci_addr, then this
> is unneeded.
Right. I think it could be an all or nothing at all approach.
> You do need to export available slot numbers from qemu.
Why would a slot be unavailable?
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 10:32 ` Michael S. Tsirkin
@ 2009-06-15 10:44 ` Gleb Natapov
2009-06-15 10:46 ` Michael S. Tsirkin
2009-06-15 11:27 ` Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:44 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > You do need to export available slot numbers from qemu.
>
> Why would a slot be unavailable?
>
Because it does not exist?
--
Gleb.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 10:44 ` Gleb Natapov
@ 2009-06-15 10:46 ` Michael S. Tsirkin
2009-06-15 10:52 ` Gleb Natapov
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:46 UTC (permalink / raw)
To: Gleb Natapov
Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > You do need to export available slot numbers from qemu.
> >
> > Why would a slot be unavailable?
> >
> Because it does not exist?
We can create a slot with any number, can't we?
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 10:46 ` Michael S. Tsirkin
@ 2009-06-15 10:52 ` Gleb Natapov
2009-06-15 11:07 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:52 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > You do need to export available slot numbers from qemu.
> > >
> > > Why would a slot be unavailable?
> > >
> > Because it does not exist?
>
> We can create a slot with any number, can't we?
What do you mean? If the mobo has 4 slots you can't create fifth.
KVM describes 32 slots in the BIOS.
--
Gleb.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 10:52 ` Gleb Natapov
@ 2009-06-15 11:07 ` Michael S. Tsirkin
2009-06-15 11:14 ` Gleb Natapov
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:07 UTC (permalink / raw)
To: Gleb Natapov
Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > You do need to export available slot numbers from qemu.
> > > >
> > > > Why would a slot be unavailable?
> > > >
> > > Because it does not exist?
> >
> > We can create a slot with any number, can't we?
> What do you mean? If the mobo has 4 slots you can't create fifth.
> KVM describes 32 slots in the BIOS.
Do you mean the KVM kernel module here? I don't know much about the
BIOS. Can't qemu control the number of slots declared?
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 11:07 ` Michael S. Tsirkin
@ 2009-06-15 11:14 ` Gleb Natapov
2009-06-15 11:34 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Gleb Natapov @ 2009-06-15 11:14 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > You do need to export available slot numbers from qemu.
> > > > >
> > > > > Why would a slot be unavailable?
> > > > >
> > > > Because it does not exist?
> > >
> > > We can create a slot with any number, can't we?
> > What do you mean? If the mobo has 4 slots you can't create fifth.
> > KVM describes 32 slots in the BIOS.
>
> Do you mean the KVM kernel module here? I don't know much about the
No I don't mean KVM kernel module here.
> BIOS. Can't qemu control the number of slots declared?
>
Qemu represents HW, BIOS drives this HW. They should be in sync on such
important issues like pci slot configuration. Even if QEMU can control
the number of slots declared (which it can't easily do), it will be able
to do it only on startup (before BIOS runs). The way to have dynamic
number of slots may be pci bridge emulation. Not sure what is needed
from BIOS for that.
--
Gleb.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 10:32 ` Michael S. Tsirkin
2009-06-15 10:44 ` Gleb Natapov
@ 2009-06-15 11:27 ` Avi Kivity
2009-06-15 11:48 ` Michael S. Tsirkin
1 sibling, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 11:27 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>> You do need to export available slot numbers from qemu.
>>
>
> Why would a slot be unavailable?
>
A slot needs to be configured in ACPI, and not be taken by onboard chips
(piix takes slot 0, for example).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 9:09 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
@ 2009-06-15 11:32 ` Avi Kivity
2009-06-15 12:48 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 11:32 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>> I think the point is that you don't need version numbers if you have a
>>>> proper device tree.
>>>>
>>>>
>>> How do you add a new attribute to the device tree and, when a supplied
>>> device tree lacking said attribute, distinguish between a device tree
>>> from an old version of qemu (i.e. use the old default) and a partial
>>> device tree from the VM manager (i.e. use the new default) ?
>>>
>>>
>> -baseline 0.10
>>
>
> That's a version number :-)
>
> (I was responding to Anthony's "you don't need a version number")
>
If you want to prevent incompatibilities, you need to make everything
new (potentially including bugfixes) non-default. Eventually the
default configuration becomes increasingly unusable and you need a new
baseline. You must still be able to fall back to the old baseline for
older guests. I don't think games with configuration files can hide that.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 11:14 ` Gleb Natapov
@ 2009-06-15 11:34 ` Michael S. Tsirkin
0 siblings, 0 replies; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:34 UTC (permalink / raw)
To: Gleb Natapov
Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 02:14:15PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > > You do need to export available slot numbers from qemu.
> > > > > >
> > > > > > Why would a slot be unavailable?
> > > > > >
> > > > > Because it does not exist?
> > > >
> > > > We can create a slot with any number, can't we?
> > > What do you mean? If the mobo has 4 slots you can't create fifth.
> > > KVM describes 32 slots in the BIOS.
> >
> > Do you mean the KVM kernel module here? I don't know much about the
> No I don't mean KVM kernel module here.
>
> > BIOS. Can't qemu control the number of slots declared?
> >
> Qemu represents HW, BIOS drives this HW. They should be in sync on such
> important issues like pci slot configuration.
As a simple solution, let's stick to 32 slots per bus. That's the
maximum that the PCI spec allows, anyway.
> Even if QEMU can control the number of slots declared (which it can't
> easily do), it will be able to do it only on startup (before BIOS
> runs).
That's OK - this is when the machine description is read.
> The way to have dynamic
> number of slots may be pci bridge emulation. Not sure what is needed
> from BIOS for that.
Since bridge can be hot-plugged, probably nothing?
But we don't necessarily need dynamic number of slots IMO.
>
> --
> Gleb.
--
MST
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints
2009-06-15 9:27 ` Avi Kivity
2009-06-15 10:32 ` Michael S. Tsirkin
@ 2009-06-15 11:35 ` Markus Armbruster
2009-06-15 11:43 ` Avi Kivity
2009-06-15 12:41 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
2 siblings, 1 reply; 139+ messages in thread
From: Markus Armbruster @ 2009-06-15 11:35 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Carsten Otte, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity <avi@redhat.com> writes:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>
>
> I don't understand this opposition. It's clear a machine config file
> is a long way in our future. It's also clear lack of stable PCI
> addresses hurts us now.
Correct.
>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>
>
> pci_addr was never meant to bring world peace, just stable PCI
> addresses. The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>
>
> If all devices (including vga, ide) are set up with pci_addr, then
> this is unneeded. You do need to export available slot numbers from
> qemu.
Not really. QEMU gives just the host bridge a fixed slot[*]. All the
other slots are available.
The real problem is devices that get implicitly added, like the SCSI
controller. Those devices get their slots auto-assigned, which can
interfere with slot numbers chosen by the user. We need a way to avoid
that, as you suggested elsewhere in this thread.
[*] There's an exception or two for oddball targets.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints
2009-06-15 11:35 ` Configuration vs. compat hints Markus Armbruster
@ 2009-06-15 11:43 ` Avi Kivity
2009-06-15 11:59 ` Stefano Stabellini
` (2 more replies)
0 siblings, 3 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 11:43 UTC (permalink / raw)
To: Markus Armbruster
Cc: Mark McLoughlin, Carsten Otte, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Anthony Liguori,
qemu-devel
(adding cc)
On 06/15/2009 02:35 PM, Markus Armbruster wrote:
> Not really. QEMU gives just the host bridge a fixed slot[*]. All the
> other slots are available.
>
qemu needs to export these two bits of information: the first free slot
and the number of slots.
More generally, which slots are open. We can assume 1:31, but that's
unlovely.
> The real problem is devices that get implicitly added, like the SCSI
> controller. Those devices get their slots auto-assigned, which can
> interfere with slot numbers chosen by the user. We need a way to avoid
> that, as you suggested elsewhere in this thread.
>
Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and
-drive pci_addr= (and later, -disk-controller)? Stalling while waiting
for the ultimate config file is only generating pain and out-of-tree
patches.
(I'd be quite happy constructing the entire machine config on the
command line, but I realize it's just me)
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 11:27 ` Avi Kivity
@ 2009-06-15 11:48 ` Michael S. Tsirkin
2009-06-15 11:56 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:48 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 02:27:14PM +0300, Avi Kivity wrote:
> On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>>> You do need to export available slot numbers from qemu.
>>>
>>
>> Why would a slot be unavailable?
>>
>
> A slot needs to be configured in ACPI,
Can we configure all possible 32 slots?
> and not be taken by onboard chips
> (piix takes slot 0, for example).
piix is the root complex, isn't it? Are there other examples? If not,
we could teach management about the root complex being special ...
> --
> error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 11:48 ` Michael S. Tsirkin
@ 2009-06-15 11:56 ` Avi Kivity
2009-06-15 12:41 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 11:56 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>> A slot needs to be configured in ACPI,
>>
>
> Can we configure all possible 32 slots?
>
That's what we do. But one is always taken. In the future, perhaps more.
>> and not be taken by onboard chips
>> (piix takes slot 0, for example).
>>
>
> piix is the root complex, isn't it? Are there other examples? If not,
> we could teach management about the root complex being special ...
>
We should just tell the user which slots are open.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Re: Configuration vs. compat hints
2009-06-15 11:43 ` Avi Kivity
@ 2009-06-15 11:59 ` Stefano Stabellini
2009-06-15 12:41 ` [Qemu-devel] " Markus Armbruster
2009-06-15 14:23 ` Javier Guerra
2 siblings, 0 replies; 139+ messages in thread
From: Stefano Stabellini @ 2009-06-15 11:59 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, kvm@vger.kernel.org, Michael S. Tsirkin,
Glauber Costa, Russell, Markus Armbruster, qemu-devel, Blue Swirl,
Christian Borntraeger, Rusty, Brook, Paul,
virtualization@lists.linux-foundation.org, Carsten Otte
Avi Kivity wrote:
> (I'd be quite happy constructing the entire machine config on the
> command line, but I realize it's just me)
>
It is not just you.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 9:27 ` Avi Kivity
2009-06-15 10:32 ` Michael S. Tsirkin
2009-06-15 11:35 ` Configuration vs. compat hints Markus Armbruster
@ 2009-06-15 12:41 ` Anthony Liguori
2009-06-15 12:55 ` Avi Kivity
2009-06-15 13:04 ` Configuration vs. compat hints Markus Armbruster
2 siblings, 2 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:41 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Michael S. Tsirkin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>
>
> I don't understand this opposition. It's clear a machine config file
> is a long way in our future. It's also clear lack of stable PCI
> addresses hurts us now.
Is there opposition? I don't ever recall seeing a patch...
I think it's a perfectly fine idea.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] Re: Configuration vs. compat hints
2009-06-15 11:43 ` Avi Kivity
2009-06-15 11:59 ` Stefano Stabellini
@ 2009-06-15 12:41 ` Markus Armbruster
2009-06-15 12:50 ` Anthony Liguori
2009-06-15 14:23 ` Javier Guerra
2 siblings, 1 reply; 139+ messages in thread
From: Markus Armbruster @ 2009-06-15 12:41 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Carsten Otte
Avi Kivity <avi@redhat.com> writes:
> (adding cc)
>
> On 06/15/2009 02:35 PM, Markus Armbruster wrote:
>> Not really. QEMU gives just the host bridge a fixed slot[*]. All the
>> other slots are available.
>>
>
> qemu needs to export these two bits of information: the first free
> slot and the number of slots.
>
> More generally, which slots are open. We can assume 1:31, but that's
> unlovely.
Point.
>> The real problem is devices that get implicitly added, like the SCSI
>> controller. Those devices get their slots auto-assigned, which can
>> interfere with slot numbers chosen by the user. We need a way to avoid
>> that, as you suggested elsewhere in this thread.
>>
>
> Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
> and -drive pci_addr= (and later, -disk-controller)? Stalling while
> waiting for the ultimate config file is only generating pain and
> out-of-tree patches.
Yup.
I got bit-rotten patches for pci_addr=, and I can unrot them if they're
wanted.
> (I'd be quite happy constructing the entire machine config on the
> command line, but I realize it's just me)
Ha, .bash_history as config file...
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 11:56 ` Avi Kivity
@ 2009-06-15 12:41 ` Michael S. Tsirkin
2009-06-15 12:50 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 12:41 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 02:56:42PM +0300, Avi Kivity wrote:
> On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>>> A slot needs to be configured in ACPI,
>>>
>>
>> Can we configure all possible 32 slots?
>>
>
> That's what we do. But one is always taken. In the future, perhaps more.
>
>>> and not be taken by onboard chips
>>> (piix takes slot 0, for example).
>>>
>>
>> piix is the root complex, isn't it? Are there other examples? If not,
>> we could teach management about the root complex being special ...
>>
>
> We should just tell the user which slots are open.
This might be tricky if the config is passed in with the command line
flags.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 9:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Avi Kivity
2009-06-15 10:29 ` Michael S. Tsirkin
@ 2009-06-15 12:45 ` Anthony Liguori
2009-06-15 13:03 ` Avi Kivity
2009-06-15 13:17 ` Gerd Hoffmann
1 sibling, 2 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:45 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>> 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to
>>> figure
>>> out what address to use, libvirt would need to query qemu for
>>> what
>>> address was originally allocated to device or it would do all the
>>> PCI address allocation itself ...
>>>
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>
>
> Yes, the user build the machine using the command line and monitor
> (or, in 2017, the machine configuration file),
Considering pbrook just posted a machine config for arm, I think it
would be rather sad if pc wasn't converted to it by 2017...
> then turns on the power. Command line options are the parts lying
> around when we start.
>
> btw, -drive needs to be separated:
>
> -controller type=lsi1234,pci_addr=foobar,name=blah
> -drive file=foo.img,controller=blah,index=0
> -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.
Drivers don't have indexes and buses but we specify it on the -drive
line. -drive is convenient syntax. It stops being convenient when you
force it to be two options.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 11:32 ` Avi Kivity
@ 2009-06-15 12:48 ` Anthony Liguori
2009-06-15 13:12 ` Avi Kivity
2009-06-15 14:00 ` Mark McLoughlin
0 siblings, 2 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:48 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>>> I think the point is that you don't need version numbers if you
>>>>> have a
>>>>> proper device tree.
>>>>>
>>>>>
>>>> How do you add a new attribute to the device tree and, when a supplied
>>>> device tree lacking said attribute, distinguish between a device tree
>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>
>>>>
>>> -baseline 0.10
>>>
>>
>> That's a version number :-)
>>
>> (I was responding to Anthony's "you don't need a version number")
>>
>
> If you want to prevent incompatibilities, you need to make everything
> new (potentially including bugfixes) non-default. Eventually the
> default configuration becomes increasingly unusable and you need a new
> baseline. You must still be able to fall back to the old baseline for
> older guests. I don't think games with configuration files can hide
> that.
-M pc1
-M pc2
etc.
This is pretty easy to maintain with config files.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:41 ` Michael S. Tsirkin
@ 2009-06-15 12:50 ` Avi Kivity
2009-06-15 12:52 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 12:50 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
> We should just tell the user which slots are open.
>
>
> This might be tricky if the config is passed in with the command line
> flags.
>
qemu -show-available-pci-slots
(the qemu equivalent to KVM_CHECK_EXTENSION)
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: [Qemu-devel] Re: Configuration vs. compat hints
2009-06-15 12:41 ` [Qemu-devel] " Markus Armbruster
@ 2009-06-15 12:50 ` Anthony Liguori
0 siblings, 0 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:50 UTC (permalink / raw)
To: Markus Armbruster
Cc: Avi Kivity, Mark McLoughlin, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Carsten Otte
Markus Armbruster wrote:
> Avi Kivity <avi@redhat.com> writes:
>
>
>> Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
>> and -drive pci_addr= (and later, -disk-controller)? Stalling while
>> waiting for the ultimate config file is only generating pain and
>> out-of-tree patches.
>>
>
> Yup.
>
> I got bit-rotten patches for pci_addr=, and I can unrot them if they're
> wanted.
>
Yes, would be good to have patches on the list to discuss. In
principle, I have no objection to this.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:50 ` Avi Kivity
@ 2009-06-15 12:52 ` Anthony Liguori
2009-06-15 13:09 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:52 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>> We should just tell the user which slots are open.
>>
>> This might be tricky if the config is passed in with the command line
>> flags.
>>
>
> qemu -show-available-pci-slots
Why does the user care?
Let QEMU allocate the PCI slot, then query it to see what slot it
assigned and remember that.
It's not a good idea to have management applications attempt to do PCI
slot allocation. For instance, one day we may decide to make virtio
devices multi-function.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:41 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
@ 2009-06-15 12:55 ` Avi Kivity
2009-06-15 13:04 ` Configuration vs. compat hints Markus Armbruster
1 sibling, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 12:55 UTC (permalink / raw)
To: Anthony Liguori
Cc: Mark McLoughlin, Michael S. Tsirkin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 03:41 PM, Anthony Liguori wrote:
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>
>> I don't understand this opposition. It's clear a machine config file
>> is a long way in our future. It's also clear lack of stable PCI
>> addresses hurts us now.
>
>
> Is there opposition? I don't ever recall seeing a patch...
Izik Eidus posted a patch (using a different syntax) in November 2007.
>
> I think it's a perfectly fine idea.
Good.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:45 ` Anthony Liguori
@ 2009-06-15 13:03 ` Avi Kivity
2009-06-15 13:20 ` Anthony Liguori
2009-06-15 13:17 ` Gerd Hoffmann
1 sibling, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 13:03 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>
>> Yes, the user build the machine using the command line and monitor
>> (or, in 2017, the machine configuration file),
>
>
> Considering pbrook just posted a machine config for arm, I think it
> would be rather sad if pc wasn't converted to it by 2017...
I'd be sad too, but not surprised.
>> then turns on the power. Command line options are the parts lying
>> around when we start.
>>
>> btw, -drive needs to be separated:
>>
>> -controller type=lsi1234,pci_addr=foobar,name=blah
>> -drive file=foo.img,controller=blah,index=0
>> -drive file=bar.img,controller=blah,index=1
>>
>> Drives to not have pci addresses.
>
> Drivers don't have indexes and buses but we specify it on the -drive
> line.
Drives do have indexes. On old parallel scsi drives you set the index
by clicking a button on the back of the drive to cycle through scsi
addresses 0-7. An IDE drive's index is determined by the cable
(master/slave). A SATA drive's index is determined by which header on
the motherboard the drive connects to.
If by bus you mean the if= parameter, then drives certainly do have
buses. Just try connecting the scsi drive from the previous paragraph
to a USB port.
> -drive is convenient syntax. It stops being convenient when you force
> it to be two options.
controller= defaults to some builtin thing which autoinstantiates when
necessary, so the old -drive syntax works.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints
2009-06-15 12:41 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
2009-06-15 12:55 ` Avi Kivity
@ 2009-06-15 13:04 ` Markus Armbruster
1 sibling, 0 replies; 139+ messages in thread
From: Markus Armbruster @ 2009-06-15 13:04 UTC (permalink / raw)
To: Anthony Liguori
Cc: Avi Kivity, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Anthony Liguori <anthony@codemonkey.ws> writes:
> Avi Kivity wrote:
>> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>>
>>>
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>>
>>
>> I don't understand this opposition. It's clear a machine config
>> file is a long way in our future. It's also clear lack of stable
>> PCI addresses hurts us now.
>
> Is there opposition? I don't ever recall seeing a patch...
http://www.archivum.info/qemu-devel@nongnu.org/2009-01/msg01458.html
> I think it's a perfectly fine idea.
Off to dust off my patch series.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:52 ` Anthony Liguori
@ 2009-06-15 13:09 ` Avi Kivity
2009-06-15 13:23 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 13:09 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 03:52 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>> We should just tell the user which slots are open.
>>> This might be tricky if the config is passed in with the command line
>>> flags.
>>
>> qemu -show-available-pci-slots
>
> Why does the user care?
>
> Let QEMU allocate the PCI slot, then query it to see what slot it
> assigned and remember that.
It's a roundabout way of doing things.
As an example, if you try to fit too many devices into the machine, you
have to try to add all devices and watch for a qemu error. If you know
in advance how many slots you have, you never enter into that situation
(and you need to show the limit to the user anyway).
>
> It's not a good idea to have management applications attempt to do PCI
> slot allocation. For instance, one day we may decide to make virtio
> devices multi-function.
Non-virtio, as well. But we can't make that the default, so the user
will have to specify this anyway.
Given that you can't hotunplug individual functions, the user will have
to specify exactly how functions are aggregated into devices. My
recommendation would be for a GUI to allow the user to select a 'quad
port virtio NIC' or 'dual port virtio scsi controller' rather than
trying to do anything automatic.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:48 ` Anthony Liguori
@ 2009-06-15 13:12 ` Avi Kivity
2009-06-15 13:24 ` Anthony Liguori
2009-06-15 14:00 ` Mark McLoughlin
1 sibling, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 13:12 UTC (permalink / raw)
To: Anthony Liguori
Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 03:48 PM, Anthony Liguori wrote:
>>>>> device tree lacking said attribute, distinguish between a device tree
>>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>>
>>>> -baseline 0.10
>>>
>>> That's a version number :-)
>>>
>>> (I was responding to Anthony's "you don't need a version number")
>>
>> If you want to prevent incompatibilities, you need to make everything
>> new (potentially including bugfixes) non-default. Eventually the
>> default configuration becomes increasingly unusable and you need a
>> new baseline. You must still be able to fall back to the old
>> baseline for older guests. I don't think games with configuration
>> files can hide that.
> How do you add a new attribute to the device tree and, when a supplied
>
> -M pc1
> -M pc2
Certainly preferable to -baseline.
> This is pretty easy to maintain with config files.
Let's not tie the two together.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:45 ` Anthony Liguori
2009-06-15 13:03 ` Avi Kivity
@ 2009-06-15 13:17 ` Gerd Hoffmann
1 sibling, 0 replies; 139+ messages in thread
From: Gerd Hoffmann @ 2009-06-15 13:17 UTC (permalink / raw)
To: Anthony Liguori
Cc: Avi Kivity, Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier,
Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
Hi,
>> Yes, the user build the machine using the command line and monitor
>> (or, in 2017, the machine configuration file),
>
> Considering pbrook just posted a machine config for arm, I think it
> would be rather sad if pc wasn't converted to it by 2017...
It shouldn't last until 2017, but the process isn't that trivial.
Some qemu code / control flow needs serious restruction until it is in a
state that creating the devices from a fdt can actually work.
> Drivers don't have indexes and buses but we specify it on the -drive
> line. -drive is convenient syntax. It stops being convenient when you
> force it to be two options.
One more issue: -drive also mixes host and guest configuration. These
must be separated too.
cheers,
Gerd
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:03 ` Avi Kivity
@ 2009-06-15 13:20 ` Anthony Liguori
2009-06-15 13:35 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:20 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>
>>> Yes, the user build the machine using the command line and monitor
>>> (or, in 2017, the machine configuration file),
>>
>>
>> Considering pbrook just posted a machine config for arm, I think it
>> would be rather sad if pc wasn't converted to it by 2017...
>
> I'd be sad too, but not surprised.
>
>>> then turns on the power. Command line options are the parts lying
>>> around when we start.
>>>
>>> btw, -drive needs to be separated:
>>>
>>> -controller type=lsi1234,pci_addr=foobar,name=blah
>>> -drive file=foo.img,controller=blah,index=0
>>> -drive file=bar.img,controller=blah,index=1
>>>
>>> Drives to not have pci addresses.
>>
>> Drivers don't have indexes and buses but we specify it on the -drive
>> line.
>
> Drives do have indexes. On old parallel scsi drives you set the index
> by clicking a button on the back of the drive to cycle through scsi
> addresses 0-7. An IDE drive's index is determined by the cable
> (master/slave). A SATA drive's index is determined by which header on
> the motherboard the drive connects to.
It's not at all that simple. SCSI has a hierarchical address mechanism
with 0-7 targets but then potentially multiple LUNs per target. Today,
we always emulate a single LUN per target but if we ever wanted to
support more than 7 disks on a SCSI controller, we would have to add
multiple LUN support too. So the current linear unit= parameter is
actually pretty broken for SCSI.
For IDE, it's a combination of bus, slot, and master/slave. For virtio,
it's just a PCI address. What we really need is something that is more
opaque and controller specific. For instance, if we were going to do
controllers...
-controller type=lsi1234,pci_addr=foobar,name=blah
-controller-disk controller=blah,target=0,lun=1,name=sda
-controller type=ide,pci_addr=barfoo,name=ide
-controller-disk controller=ide,slot=secondary,cable=slave,name=hdd
-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd
And having "-hdd file=foo.img" be short-hand for "-drive
file=%s,controller-disk=%s".
>
>
> If by bus you mean the if= parameter, then drives certainly do have
> buses. Just try connecting the scsi drive from the previous paragraph
> to a USB port.
No, I meant drive file=foo.img,bus=3. If that doesn't seem obvious what
it should do to you that's because it isn't at all obvious :-) It ends
up skipping a predefined number of locations in the drive table. This
is pretty broken fundamentally because it assumes controllers always
support a fixed number of devices. Nothing really respects bus_id
though so in practice, I assume it's almost universally broken.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:09 ` Avi Kivity
@ 2009-06-15 13:23 ` Anthony Liguori
2009-06-15 13:42 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:23 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>> We should just tell the user which slots are open.
>>>> This might be tricky if the config is passed in with the command
>>>> line
>>>> flags.
>>>
>>> qemu -show-available-pci-slots
>>
>> Why does the user care?
>>
>> Let QEMU allocate the PCI slot, then query it to see what slot it
>> assigned and remember that.
>
> It's a roundabout way of doing things.
Having libvirt do PCI slot allocation scares me. It assumes we can
return a whitelist of available slots, and then let libvirt just
randomly assign things. There's knowledge though in slot assignment
that's board-specific. For instance, depending on how many LNK lines
you have, you may want to put things in slots in such a way to optimize
interrupt balancing or something like that.
Some platforms have quirks about expecting a particular slot to have a
particular device. It's still an optimal device but it has to be in
that slot. You can't really express that via an available slot list.
> Non-virtio, as well. But we can't make that the default, so the user
> will have to specify this anyway.
>
> Given that you can't hotunplug individual functions, the user will
> have to specify exactly how functions are aggregated into devices. My
> recommendation would be for a GUI to allow the user to select a 'quad
> port virtio NIC' or 'dual port virtio scsi controller' rather than
> trying to do anything automatic.
Yeah, I haven't thought much about that.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:12 ` Avi Kivity
@ 2009-06-15 13:24 ` Anthony Liguori
2009-06-15 13:43 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:24 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
>
> Certainly preferable to -baseline.
>
>> This is pretty easy to maintain with config files.
>
> Let's not tie the two together.
I mentioned it because it suggests a good transition. We at least have
to think through how things map to the post-config file world regardless
of whether that's a few months from now or a decade :-)
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:20 ` Anthony Liguori
@ 2009-06-15 13:35 ` Avi Kivity
2009-06-15 13:45 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 13:35 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>
>>>> then turns on the power. Command line options are the parts lying
>>>> around when we start.
>>>>
>>>> btw, -drive needs to be separated:
>>>>
>>>> -controller type=lsi1234,pci_addr=foobar,name=blah
>>>> -drive file=foo.img,controller=blah,index=0
>>>> -drive file=bar.img,controller=blah,index=1
>>>>
>>>> Drives to not have pci addresses.
>>>
>>> Drivers don't have indexes and buses but we specify it on the -drive
>>> line.
>>
>> Drives do have indexes. On old parallel scsi drives you set the
>> index by clicking a button on the back of the drive to cycle through
>> scsi addresses 0-7. An IDE drive's index is determined by the cable
>> (master/slave). A SATA drive's index is determined by which header
>> on the motherboard the drive connects to.
>
>
> It's not at all that simple. SCSI has a hierarchical address
> mechanism with 0-7 targets but then potentially multiple LUNs per
> target. Today, we always emulate a single LUN per target but if we
> ever wanted to support more than 7 disks on a SCSI controller, we
> would have to add multiple LUN support too. So the current linear
> unit= parameter is actually pretty broken for SCSI.
Well, another level in the hierarchy, but I don't think it materially
changes things.
>
> For IDE, it's a combination of bus, slot, and master/slave. For
> virtio, it's just a PCI address. What we really need is something
> that is more opaque and controller specific.
virtio also has a bus (did you mean the pci bus for IDE?), master/slave
is the index. virtio doesn't have index, but IMO that was a mistake and
we should have designed it as a disk controller in the first place.
> For instance, if we were going to do controllers...
>
> -controller type=lsi1234,pci_addr=foobar,name=blah
> -controller-disk controller=blah,target=0,lun=1,name=sda
>
> -controller type=ide,pci_addr=barfoo,name=ide
> -controller-disk controller=ide,slot=secondary,cable=slave,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> And having "-hdd file=foo.img" be short-hand for "-drive
> file=%s,controller-disk=%s".
Yeah.
>>
>>
>> If by bus you mean the if= parameter, then drives certainly do have
>> buses. Just try connecting the scsi drive from the previous
>> paragraph to a USB port.
>
> No, I meant drive file=foo.img,bus=3. If that doesn't seem obvious
> what it should do to you that's because it isn't at all obvious :-)
> It ends up skipping a predefined number of locations in the drive
> table. This is pretty broken fundamentally because it assumes
> controllers always support a fixed number of devices. Nothing really
> respects bus_id though so in practice, I assume it's almost
> universally broken.
Isn't the drive table something totally internal? And how does bus=
relate to it?
Confused.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:23 ` Anthony Liguori
@ 2009-06-15 13:42 ` Avi Kivity
2009-06-15 13:51 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 13:42 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 04:23 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>>> Avi Kivity wrote:
>>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>>> We should just tell the user which slots are open.
>>>>> This might be tricky if the config is passed in with the command
>>>>> line
>>>>> flags.
>>>>
>>>> qemu -show-available-pci-slots
>>>
>>> Why does the user care?
>>>
>>> Let QEMU allocate the PCI slot, then query it to see what slot it
>>> assigned and remember that.
>>
>> It's a roundabout way of doing things.
>
> Having libvirt do PCI slot allocation scares me. It assumes we can
> return a whitelist of available slots, and then let libvirt just
> randomly assign things. There's knowledge though in slot assignment
> that's board-specific. For instance, depending on how many LNK lines
> you have, you may want to put things in slots in such a way to
> optimize interrupt balancing or something like that.
How would qemu know which slots to optimize for?
In practice, I don't see that as a real problem. We should (a) add an
ioapic and four more pci links (b) recommend that slots be assigned in
ascending order, and everything works.
I don't see your concern about libvirt allocating slots. If a human can
plug a card into a slot, so can libvirt. Doing an interactive
back-and-forth (equivalent to plugging a card while blindfolded, then
looking to see which slot we hit) is certainly more difficult.
> Some platforms have quirks about expecting a particular slot to have a
> particular device. It's still an optimal device but it has to be in
> that slot. You can't really express that via an available slot list.
I'll be surprised if we ever measure different dma speeds on different
slots in the qemu virtual pci bus. If we do, we'll find a way to
express them:
$ qemu -print-pci
slot 0:01: available 33MHz
slot 0:02: available 33MHz
slot 0:03: available 66MHz
I feel a little silly typing this.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:24 ` Anthony Liguori
@ 2009-06-15 13:43 ` Avi Kivity
0 siblings, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 13:43 UTC (permalink / raw)
To: Anthony Liguori
Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 04:24 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> Certainly preferable to -baseline.
>>
>>> This is pretty easy to maintain with config files.
>>
>> Let's not tie the two together.
>
> I mentioned it because it suggests a good transition. We at least
> have to think through how things map to the post-config file world
> regardless of whether that's a few months from now or a decade :-)
Sure, it's good both from the transitional point of view and in its own
right.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:35 ` Avi Kivity
@ 2009-06-15 13:45 ` Anthony Liguori
2009-06-15 13:54 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:45 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>> It's not at all that simple. SCSI has a hierarchical address
>> mechanism with 0-7 targets but then potentially multiple LUNs per
>> target. Today, we always emulate a single LUN per target but if we
>> ever wanted to support more than 7 disks on a SCSI controller, we
>> would have to add multiple LUN support too. So the current linear
>> unit= parameter is actually pretty broken for SCSI.
>
> Well, another level in the hierarchy, but I don't think it materially
> changes things.
Depends on whether you expect to say index=0,lun=3 or index=3. If you
mean the later, then it's quite conceivable that each target supports
less than the maximum number of LUNs. This makes things pretty
confusing to the user because they have to know that in the current
implementation, index=0 is valid, index=1 isn't, but index=8 is.
>> No, I meant drive file=foo.img,bus=3. If that doesn't seem obvious
>> what it should do to you that's because it isn't at all obvious :-)
>> It ends up skipping a predefined number of locations in the drive
>> table. This is pretty broken fundamentally because it assumes
>> controllers always support a fixed number of devices. Nothing really
>> respects bus_id though so in practice, I assume it's almost
>> universally broken.
>
> Isn't the drive table something totally internal? And how does bus=
> relate to it?
The reality of unit=X,bus=Y,if=Z is that they expand to:
drive_table_index=Y*max_devs[Z] + X
Whereas max_devs = {"ide":4, "scsi": 7, *:0}
How drive_table_index is interpreted is "if" specific. For if=scsi,
each lsi device gets a base drive table index that starts at bus_index *
7. For virtio, the first empty spot in drive_table results in no more
drives being created.
It's broken by design.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:42 ` Avi Kivity
@ 2009-06-15 13:51 ` Anthony Liguori
2009-06-15 14:06 ` Dor Laor
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:51 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>
> How would qemu know which slots to optimize for?
>
> In practice, I don't see that as a real problem. We should (a) add an
> ioapic and four more pci links (b) recommend that slots be assigned in
> ascending order, and everything works.
>
> I don't see your concern about libvirt allocating slots. If a human
> can plug a card into a slot, so can libvirt. Doing an interactive
> back-and-forth (equivalent to plugging a card while blindfolded, then
> looking to see which slot we hit) is certainly more difficult.
Let's take a concrete example because I think you missed my point. For
the r2d board, if you have 1 on-board NIC, it has to go in slot 2.
Additional NICs can go in any slot, but the primary on-board NIC is
expected to live in slot 2. It's possible to not have that on-board NIC.
If you let QEMU allocate which PCI slot a device goes in, we can hide
this detail from libvirt. If you have libvirt do PCI slot allocation by
default, it has to know about this restriction in the r2d board unless
you have a clever way to express this sort of information.
Once QEMU has allocated a device to a slot, libvirt can do a good job
maintaining that relationship.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:45 ` Anthony Liguori
@ 2009-06-15 13:54 ` Avi Kivity
2009-06-15 15:07 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 13:54 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 04:45 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>> It's not at all that simple. SCSI has a hierarchical address
>>> mechanism with 0-7 targets but then potentially multiple LUNs per
>>> target. Today, we always emulate a single LUN per target but if we
>>> ever wanted to support more than 7 disks on a SCSI controller, we
>>> would have to add multiple LUN support too. So the current linear
>>> unit= parameter is actually pretty broken for SCSI.
>>
>> Well, another level in the hierarchy, but I don't think it materially
>> changes things.
>
> Depends on whether you expect to say index=0,lun=3 or index=3. If you
> mean the later, then it's quite conceivable that each target supports
> less than the maximum number of LUNs. This makes things pretty
> confusing to the user because they have to know that in the current
> implementation, index=0 is valid, index=1 isn't, but index=8 is.
I'd object to any implicit addressing rules. If we have to say
target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of
index=8345345235 so be it.
>>> No, I meant drive file=foo.img,bus=3. If that doesn't seem obvious
>>> what it should do to you that's because it isn't at all obvious :-)
>>> It ends up skipping a predefined number of locations in the drive
>>> table. This is pretty broken fundamentally because it assumes
>>> controllers always support a fixed number of devices. Nothing
>>> really respects bus_id though so in practice, I assume it's almost
>>> universally broken.
>>
>> Isn't the drive table something totally internal? And how does bus=
>> relate to it?
>
> The reality of unit=X,bus=Y,if=Z is that they expand to:
>
> drive_table_index=Y*max_devs[Z] + X
>
> Whereas max_devs = {"ide":4, "scsi": 7, *:0}
>
> How drive_table_index is interpreted is "if" specific. For if=scsi,
> each lsi device gets a base drive table index that starts at bus_index
> * 7. For virtio, the first empty spot in drive_table results in no
> more drives being created.
>
> It's broken by design.
Agreed. Pity that it's exposed to the poor users.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 12:48 ` Anthony Liguori
2009-06-15 13:12 ` Avi Kivity
@ 2009-06-15 14:00 ` Mark McLoughlin
2009-06-15 14:20 ` Anthony Liguori
1 sibling, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-15 14:00 UTC (permalink / raw)
To: Anthony Liguori
Cc: Avi Kivity, Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
> Avi Kivity wrote:
> > On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
> >>>>> I think the point is that you don't need version numbers if you
> >>>>> have a
> >>>>> proper device tree.
> >>>>>
> >>>>>
> >>>> How do you add a new attribute to the device tree and, when a supplied
> >>>> device tree lacking said attribute, distinguish between a device tree
> >>>> from an old version of qemu (i.e. use the old default) and a partial
> >>>> device tree from the VM manager (i.e. use the new default) ?
> >>>>
> >>>>
> >>> -baseline 0.10
> >>>
> >>
> >> That's a version number :-)
> >>
> >> (I was responding to Anthony's "you don't need a version number")
> >>
> >
> > If you want to prevent incompatibilities, you need to make everything
> > new (potentially including bugfixes) non-default.
No need to punish new guests in order to maintain compatibility for old
guests.
> > Eventually the
> > default configuration becomes increasingly unusable and you need a new
> > baseline. You must still be able to fall back to the old baseline for
> > older guests. I don't think games with configuration files can hide
> > that.
>
> -M pc1
> -M pc2
>
> etc.
>
> This is pretty easy to maintain with config files.
I think this would be reasonable, but it is essentially just a version
number which you objected to on the basis that it would make
cherry-picking harder for distros.
One thing that would be nice with this '-M pc1' thing would be to retain
'-M pc' as a symlink to the latest version. We'd also need a way to read
the symlink too, so that you can query what the current latest version
is and use that in future.
How would this machine type version relate to e.g. changing the default
PCI class of virtio-blk? Would we bump the version number of all machine
types can use virtio-blk?
A per-device version number is workable alternative, but only with a
saveabi type file IMHO.
I've tried to summarise the options here:
https://fedoraproject.org/wiki/Features/KVM_Stable_Guest_ABI
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:51 ` Anthony Liguori
@ 2009-06-15 14:06 ` Dor Laor
2009-06-15 14:24 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Dor Laor @ 2009-06-15 14:06 UTC (permalink / raw)
To: Anthony Liguori
Cc: Avi Kivity, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>>
>> How would qemu know which slots to optimize for?
>>
>> In practice, I don't see that as a real problem. We should (a) add
>> an ioapic and four more pci links (b) recommend that slots be
>> assigned in ascending order, and everything works.
>>
>> I don't see your concern about libvirt allocating slots. If a human
>> can plug a card into a slot, so can libvirt. Doing an interactive
>> back-and-forth (equivalent to plugging a card while blindfolded, then
>> looking to see which slot we hit) is certainly more difficult.
>
> Let's take a concrete example because I think you missed my point.
> For the r2d board, if you have 1 on-board NIC, it has to go in slot
> 2. Additional NICs can go in any slot, but the primary on-board NIC
> is expected to live in slot 2. It's possible to not have that
> on-board NIC.
Libvirt does not support r2d. I hope it won't start to support it.
We can have default values for these types of devices or something like
pci_addr=auto.
>
> If you let QEMU allocate which PCI slot a device goes in, we can hide
> this detail from libvirt. If you have libvirt do PCI slot allocation
> by default, it has to know about this restriction in the r2d board
> unless you have a clever way to express this sort of information.
>
> Once QEMU has allocated a device to a slot, libvirt can do a good job
> maintaining that relationship.
>
The end user should have a mechanism to control device slot positioning.
For example, if you have several pci devices, some
get high rate of interrupts and some not, if you want to optimize you
guest you should isolate the high rate 'interesting' devices.
This is something the user will need to do. I agree that the default
behavior might be 'auto'
Also, while moving from one qemu version to another, you'll need to
represent the older behavior. -qemu-0.10 is not good enough
since there will be multiple versions in the future with multiple
distributions setting their defaults.
> Regards,
>
> Anthony Liguori
>
>
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 14:00 ` Mark McLoughlin
@ 2009-06-15 14:20 ` Anthony Liguori
2009-06-15 14:34 ` Michael S. Tsirkin
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 14:20 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Avi Kivity, Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>
>>> Eventually the
>>> default configuration becomes increasingly unusable and you need a new
>>> baseline. You must still be able to fall back to the old baseline for
>>> older guests. I don't think games with configuration files can hide
>>> that.
>>>
>> -M pc1
>> -M pc2
>>
>> etc.
>>
>> This is pretty easy to maintain with config files.
>>
>
> I think this would be reasonable, but it is essentially just a version
> number which you objected to on the basis that it would make
> cherry-picking harder for distros.
>
It doesn't have to be pc1, pc2. It could be pc-with-usb or
pc-with-balloon. If a distro cherry picks in such a way that their pc
is not a standard QEMU pc, they would add a new PC type that's specific
to their distro.
> One thing that would be nice with this '-M pc1' thing would be to retain
> '-M pc' as a symlink to the latest version. We'd also need a way to read
> the symlink too, so that you can query what the current latest version
> is and use that in future.
>
Another option is an explicit -M default which always uses the default
machine for the architecture. Likewise, we would need a way to query
what the default machine was for an architecture.
> How would this machine type version relate to e.g. changing the default
> PCI class of virtio-blk? Would we bump the version number of all machine
> types can use virtio-blk?
>
You would introduce a new machine type. For instance,
pc-virtio-class-other. The names don't have to look like that, I'm just
doing that to make a point. This may mean that you end up with dozens
of machine types but you preserve compatibility, which is a good thing.
Of course, the flip side is that you make preserving the machine config
the duty of the user and we don't maintain compatible machine types.
This won't work without a proper config file though so for now, we're
stuck maintaining machine types.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints
2009-06-15 11:43 ` Avi Kivity
2009-06-15 11:59 ` Stefano Stabellini
2009-06-15 12:41 ` [Qemu-devel] " Markus Armbruster
@ 2009-06-15 14:23 ` Javier Guerra
2 siblings, 0 replies; 139+ messages in thread
From: Javier Guerra @ 2009-06-15 14:23 UTC (permalink / raw)
To: Avi Kivity
Cc: Markus Armbruster, Mark McLoughlin, Carsten Otte, kvm,
Michael S. Tsirkin, Glauber Costa, Rusty Russell, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Anthony Liguori,
qemu-devel
On Mon, Jun 15, 2009 at 6:43 AM, Avi Kivity<avi@redhat.com> wrote:
> (I'd be quite happy constructing the entire machine config on the command
> line, but I realize it's just me)
as a user-only (well, i'm a developer, but don't meddle in kernel
affairs since 0.99pl9); I also like that kvm is totally CLI-managed.
but migration-wise, i think it could be nicer if the 'origin' process
could send the config to the 'target' one. IOW: the -incoming flag
shouldn't need any other parameter, and the 'migrate' command should
send the whole hardware description before the CPU state, and fail
with a 'can't comply' message if the target complains.
of course, that's a simplification. for example, the 'target' process
should be able to respect some parameters, mostly the 'external'
descriptions, like storage pathnames, or '-net tap' ones.
--
Javier
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 14:06 ` Dor Laor
@ 2009-06-15 14:24 ` Anthony Liguori
2009-06-15 14:37 ` Michael S. Tsirkin
2009-06-15 15:05 ` Avi Kivity
0 siblings, 2 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 14:24 UTC (permalink / raw)
To: dlaor
Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger,
Michael S. Tsirkin, Avi Kivity, Paul Brook
Dor Laor wrote:
> Libvirt does not support r2d. I hope it won't start to support it.
It supports mips, sparc, and ppc machines now. I don't see why it
wouldn't support r2d. For ppcemb, I expect this same problem to occur.
This sort of restriction is going to be common with embedded boards.
> We can have default values for these types of devices or something
> like pci_addr=auto.
Why wouldn't libvirt always use pci_addr=auto? If the only argument for
having libvirt do pci slot allocation is error messages, can't we find a
nice way to allow libvirt to create friendly error messages when QEMU fails?
>> If you let QEMU allocate which PCI slot a device goes in, we can hide
>> this detail from libvirt. If you have libvirt do PCI slot allocation
>> by default, it has to know about this restriction in the r2d board
>> unless you have a clever way to express this sort of information.
>>
>> Once QEMU has allocated a device to a slot, libvirt can do a good job
>> maintaining that relationship.
>>
>
> The end user should have a mechanism to control device slot
> positioning. For example, if you have several pci devices, some
> get high rate of interrupts and some not, if you want to optimize you
> guest you should isolate the high rate 'interesting' devices.
> This is something the user will need to do. I agree that the default
> behavior might be 'auto'
I'm not at all arguing against pci_addr. I'm arguing about how libvirt
should use it with respect to the "genesis" use-case where libvirt has
no specific reason to choose one PCI slot over another. In that case,
I'm merely advocating that we want to let QEMU make the decision.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 14:20 ` Anthony Liguori
@ 2009-06-15 14:34 ` Michael S. Tsirkin
2009-06-15 15:11 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:34 UTC (permalink / raw)
To: Anthony Liguori
Cc: Mark McLoughlin, Avi Kivity, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 09:20:00AM -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
>> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>>
>>>> Eventually the default configuration becomes increasingly unusable
>>>> and you need a new baseline. You must still be able to fall back
>>>> to the old baseline for older guests. I don't think games with
>>>> configuration files can hide that.
>>>>
>>> -M pc1
>>> -M pc2
>>>
>>> etc.
>>>
>>> This is pretty easy to maintain with config files.
>>>
>>
>> I think this would be reasonable, but it is essentially just a version
>> number which you objected to on the basis that it would make
>> cherry-picking harder for distros.
>>
>
> It doesn't have to be pc1, pc2. It could be pc-with-usb or
> pc-with-balloon. If a distro cherry picks in such a way that their pc
> is not a standard QEMU pc, they would add a new PC type that's specific
> to their distro.
>
>> One thing that would be nice with this '-M pc1' thing would be to retain
>> '-M pc' as a symlink to the latest version. We'd also need a way to read
>> the symlink too, so that you can query what the current latest version
>> is and use that in future.
>>
>
> Another option is an explicit -M default which always uses the default
> machine for the architecture. Likewise, we would need a way to query
> what the default machine was for an architecture.
>
>> How would this machine type version relate to e.g. changing the default
>> PCI class of virtio-blk? Would we bump the version number of all machine
>> types can use virtio-blk?
>>
> You would introduce a new machine type. For instance,
> pc-virtio-class-other. The names don't have to look like that, I'm just
> doing that to make a point. This may mean that you end up with dozens
> of machine types but you preserve compatibility, which is a good thing.
And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
more straightforward to have capability bits which can be switched on
and off independently rather than trying to fit unrelated features into
a machine type? IMO it only seems more work at first, and QA gets a bit
nervious that they can't exhaustively test all options. But in the long
run it simplifies things as you don't have to set policy and invent
silly names.
> Of course, the flip side is that you make preserving the machine config
> the duty of the user and we don't maintain compatible machine types.
> This won't work without a proper config file though so for now, we're
> stuck maintaining machine types.
>
> Regards,
>
> Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 14:24 ` Anthony Liguori
@ 2009-06-15 14:37 ` Michael S. Tsirkin
2009-06-15 15:03 ` Anthony Liguori
2009-06-15 15:05 ` Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:37 UTC (permalink / raw)
To: Anthony Liguori
Cc: dlaor, Avi Kivity, Carsten Otte, Rusty Russell, kvm,
Mark McLoughlin, Glauber Costa, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 09:24:32AM -0500, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now. I don't see why it
> wouldn't support r2d. For ppcemb, I expect this same problem to occur.
> This sort of restriction is going to be common with embedded boards.
>
>> We can have default values for these types of devices or something
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto? If the only argument for
> having libvirt do pci slot allocation is error messages, can't we find a
> nice way to allow libvirt to create friendly error messages when QEMU
> fails?
>
>>> If you let QEMU allocate which PCI slot a device goes in, we can hide
>>> this detail from libvirt. If you have libvirt do PCI slot allocation
>>> by default, it has to know about this restriction in the r2d board
>>> unless you have a clever way to express this sort of information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good job
>>> maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr. I'm arguing about how libvirt
> should use it with respect to the "genesis" use-case where libvirt has
> no specific reason to choose one PCI slot over another. In that case,
> I'm merely advocating that we want to let QEMU make the decision.
The allocation code could be moved out into a library, and libvirt could
link with it (ducks).
> Regards,
>
> Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 14:37 ` Michael S. Tsirkin
@ 2009-06-15 15:03 ` Anthony Liguori
2009-06-15 15:08 ` Daniel P. Berrange
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:03 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: dlaor, Avi Kivity, Carsten Otte, Rusty Russell, kvm,
Mark McLoughlin, Glauber Costa, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Michael S. Tsirkin wrote:
>> I'm not at all arguing against pci_addr. I'm arguing about how libvirt
>> should use it with respect to the "genesis" use-case where libvirt has
>> no specific reason to choose one PCI slot over another. In that case,
>> I'm merely advocating that we want to let QEMU make the decision.
>>
>
> The allocation code could be moved out into a library, and libvirt could
> link with it (ducks).
>
Why does libvirt want to do allocation?
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 14:24 ` Anthony Liguori
2009-06-15 14:37 ` Michael S. Tsirkin
@ 2009-06-15 15:05 ` Avi Kivity
2009-06-15 15:11 ` Anthony Liguori
2009-06-15 16:27 ` Mark McLoughlin
1 sibling, 2 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 15:05 UTC (permalink / raw)
To: Anthony Liguori
Cc: dlaor, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now. I don't see why it
> wouldn't support r2d. For ppcemb, I expect this same problem to
> occur. This sort of restriction is going to be common with embedded
> boards.
I expect these restrictions will have to be known by the management
application. Otherwise the users will try invalid configurations only
to receive errors when they launch them. GUIs exist to guide users, not
as an inefficient means of trial-and-error.
>
>> We can have default values for these types of devices or something
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto? If the only argument
> for having libvirt do pci slot allocation is error messages, can't we
> find a nice way to allow libvirt to create friendly error messages
> when QEMU fails?
Error messages are not the only argument for pushing slot allocation to
management. See my previous messages on the topic.
>>> If you let QEMU allocate which PCI slot a device goes in, we can
>>> hide this detail from libvirt. If you have libvirt do PCI slot
>>> allocation by default, it has to know about this restriction in the
>>> r2d board unless you have a clever way to express this sort of
>>> information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good
>>> job maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr. I'm arguing about how
> libvirt should use it with respect to the "genesis" use-case where
> libvirt has no specific reason to choose one PCI slot over another.
> In that case, I'm merely advocating that we want to let QEMU make the
> decision.
However this may end up, isn't it offtopic? Whatever we do we have to
support both pci_addr= and default placement, so we can push this
discussion to livirt-devel and bid them godspeed.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 13:54 ` Avi Kivity
@ 2009-06-15 15:07 ` Anthony Liguori
2009-06-15 15:11 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:07 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
>
> I'd object to any implicit addressing rules. If we have to say
> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of
> index=8345345235 so be it.
The next observation is that while we expand the SCSI addressing, the
current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).
An alternative would be to either always expand or always flatten
addressing. I think the later has a lot of merit. Consider:
-controller type=lsi1234,addr=00:01,name=blah
-controller-disk controller=blah,addr=00:01,name=sda
-controller type=ide,addr=00.02,name=ide
-controller-disk controller=ide,addr=3,name=hdd
-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd
This means that addr's format depends on the parent device node which is
a bit less explicit than the previous example. However, it is much more
consistent and easier to implement. Basically, when adding a device to
it's parent, you hand the parent the "addr" field and that lets you say
where you want to sit on the bus.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:03 ` Anthony Liguori
@ 2009-06-15 15:08 ` Daniel P. Berrange
2009-06-15 15:12 ` Dor Laor
0 siblings, 1 reply; 139+ messages in thread
From: Daniel P. Berrange @ 2009-06-15 15:08 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, dlaor, Avi Kivity, Carsten Otte,
Rusty Russell, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
> Michael S. Tsirkin wrote:
> >>I'm not at all arguing against pci_addr. I'm arguing about how libvirt
> >>should use it with respect to the "genesis" use-case where libvirt has
> >>no specific reason to choose one PCI slot over another. In that case,
> >>I'm merely advocating that we want to let QEMU make the decision.
> >>
> >
> >The allocation code could be moved out into a library, and libvirt could
> >link with it (ducks).
> >
>
> Why does libvirt want to do allocation?
It doesn't want to. As Mark said, libvirt just wants to be able to ensure
a stable guest ABI, of which stable PCI addresses is one aspect. This does
not imply libvirt wants to allocate the PCI addresses, just that it wants
a way to keep them stable. All else being equal I'd rather libvirt wasn't
in the PCI address allocation business.
Regards,
Daniel
--
|: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :|
|: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 14:34 ` Michael S. Tsirkin
@ 2009-06-15 15:11 ` Anthony Liguori
0 siblings, 0 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mark McLoughlin, Avi Kivity, Jamie Lokier, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Michael S. Tsirkin wrote:
> And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
> more straightforward to have capability bits which can be switched on
> and off independently rather than trying to fit unrelated features into
> a machine type? IMO it only seems more work at first, and QA gets a bit
> nervious that they can't exhaustively test all options. But in the long
> run it simplifies things as you don't have to set policy and invent
> silly names.
>
We're strictly talking about default machine configs. That has nothing
to do with capabilities. You still need to know what the default set of
enabled capabilities were and keep track of that. All that I'm
suggesting is that we use the machine name to collapse the default set
of capabilities into something that libvirt can track.
The advantage of using something more opaque like that is that it
simplifies things for management tools as they don't have to keep track
of "capabilities" that we're adding. Heck, you could even do:
pc-00000034
Where "pc-%08x" % (capabilities) :-)
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:07 ` Anthony Liguori
@ 2009-06-15 15:11 ` Avi Kivity
2009-06-15 15:20 ` Anthony Liguori
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 15:11 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 06:07 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> I'd object to any implicit addressing rules. If we have to say
>> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of
>> index=8345345235 so be it.
>
> The next observation is that while we expand the SCSI addressing, the
> current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).
>
> An alternative would be to either always expand or always flatten
> addressing. I think the later has a lot of merit. Consider:
>
> -controller type=lsi1234,addr=00:01,name=blah
> -controller-disk controller=blah,addr=00:01,name=sda
>
> -controller type=ide,addr=00.02,name=ide
> -controller-disk controller=ide,addr=3,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> This means that addr's format depends on the parent device node which
> is a bit less explicit than the previous example. However, it is much
> more consistent and easier to implement. Basically, when adding a
> device to it's parent, you hand the parent the "addr" field and that
> lets you say where you want to sit on the bus.
I would prefer explicit names (pci_addr, lun, etc.) but would be okay
with generic names too.
There's value in sticking to well-understood names and address formats.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:05 ` Avi Kivity
@ 2009-06-15 15:11 ` Anthony Liguori
2009-06-15 16:27 ` Mark McLoughlin
1 sibling, 0 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
To: Avi Kivity
Cc: dlaor, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> However this may end up, isn't it offtopic? Whatever we do we have to
> support both pci_addr= and default placement, so we can push this
> discussion to livirt-devel and bid them godspeed.
I'm not sure how we got here but yeah, let's table this part of the
discussion.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:08 ` Daniel P. Berrange
@ 2009-06-15 15:12 ` Dor Laor
2009-06-15 15:15 ` Avi Kivity
2009-06-15 16:27 ` Mark McLoughlin
0 siblings, 2 replies; 139+ messages in thread
From: Dor Laor @ 2009-06-15 15:12 UTC (permalink / raw)
To: Daniel P. Berrange
Cc: Anthony Liguori, Michael S. Tsirkin, Avi Kivity, Carsten Otte,
Rusty Russell, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook
Daniel P. Berrange wrote:
> On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
>
>> Michael S. Tsirkin wrote:
>>
>>>> I'm not at all arguing against pci_addr. I'm arguing about how libvirt
>>>> should use it with respect to the "genesis" use-case where libvirt has
>>>> no specific reason to choose one PCI slot over another. In that case,
>>>> I'm merely advocating that we want to let QEMU make the decision.
>>>>
>>>>
>>> The allocation code could be moved out into a library, and libvirt could
>>> link with it (ducks).
>>>
>>>
>> Why does libvirt want to do allocation?
>>
>
> It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> a stable guest ABI, of which stable PCI addresses is one aspect. This does
> not imply libvirt wants to allocate the PCI addresses, just that it wants
> a way to keep them stable. All else being equal I'd rather libvirt wasn't
> in the PCI address allocation business.
>
It's not about what libvirt wants. It's about what will serve the end
user the most.
Apart for stable guest ABI, end users need to have the option to control
the slot for
their devices. Just like them have for physical machines. It's not
theoretical discussion,
limiting issues with shared irq is one real life example.
Thanks, dor
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:12 ` Dor Laor
@ 2009-06-15 15:15 ` Avi Kivity
2009-06-16 18:32 ` Jamie Lokier
2009-06-15 16:27 ` Mark McLoughlin
1 sibling, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 15:15 UTC (permalink / raw)
To: dlaor
Cc: Daniel P. Berrange, Anthony Liguori, Michael S. Tsirkin,
Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
Paul Brook
On 06/15/2009 06:12 PM, Dor Laor wrote:
>> It doesn't want to. As Mark said, libvirt just wants to be able to
>> ensure
>> a stable guest ABI, of which stable PCI addresses is one aspect. This
>> does
>> not imply libvirt wants to allocate the PCI addresses, just that it
>> wants
>> a way to keep them stable. All else being equal I'd rather libvirt
>> wasn't
>> in the PCI address allocation business.
>
>
> It's not about what libvirt wants. It's about what will serve the end
> user the most.
> Apart for stable guest ABI, end users need to have the option to
> control the slot for
> their devices. Just like them have for physical machines. It's not
> theoretical discussion,
> limiting issues with shared irq is one real life example.
>
Another issue is enumeration. Guests will present their devices in the
order they find them on the pci bus (of course enumeration is guest
specific). So if I have 2 virtio controllers the only way I can
distinguish between them is using their pci slots.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:11 ` Avi Kivity
@ 2009-06-15 15:20 ` Anthony Liguori
2009-06-15 15:26 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:20 UTC (permalink / raw)
To: Avi Kivity
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> I would prefer explicit names (pci_addr, lun, etc.) but would be okay
> with generic names too.
I think having a generic address has a lot of value in terms of code
implementation. Otherwise, the valid options for -drive become
context-sensitive which is going to be annoying and error-prone. Some
sanity could be added by using addressing prefixes like addr=pci:00:01.0
or addr=scsi:0.3 but I'll leave that up to whoever takes this on.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:20 ` Anthony Liguori
@ 2009-06-15 15:26 ` Avi Kivity
0 siblings, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 15:26 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 06:20 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> I would prefer explicit names (pci_addr, lun, etc.) but would be okay
>> with generic names too.
>
> I think having a generic address has a lot of value in terms of code
> implementation. Otherwise, the valid options for -drive become
> context-sensitive which is going to be annoying and error-prone. Some
> sanity could be added by using addressing prefixes like
> addr=pci:00:01.0 or addr=scsi:0.3 but I'll leave that up to whoever
> takes this on.
The code problems are easily solved by adding another level of
indirection. User confusion problems are only aggravated by additional
abstraction, though ("what do I put in addr=, here?").
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:05 ` Avi Kivity
2009-06-15 15:11 ` Anthony Liguori
@ 2009-06-15 16:27 ` Mark McLoughlin
2009-06-15 17:09 ` Avi Kivity
2009-06-15 18:12 ` Anthony Liguori
1 sibling, 2 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
To: Avi Kivity
Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, 2009-06-15 at 18:05 +0300, Avi Kivity wrote:
> On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> > Dor Laor wrote:
> >> Libvirt does not support r2d. I hope it won't start to support it.
> >
> > It supports mips, sparc, and ppc machines now. I don't see why it
> > wouldn't support r2d. For ppcemb, I expect this same problem to
> > occur. This sort of restriction is going to be common with embedded
> > boards.
>
> I expect these restrictions will have to be known by the management
> application. Otherwise the users will try invalid configurations only
> to receive errors when they launch them. GUIs exist to guide users, not
> as an inefficient means of trial-and-error.
So long as the restrictions would be known to the management app via
some "what slots are available" mechanism in qemu, that sounds fine.
> > I'm not at all arguing against pci_addr. I'm arguing about how
> > libvirt should use it with respect to the "genesis" use-case where
> > libvirt has no specific reason to choose one PCI slot over another.
> > In that case, I'm merely advocating that we want to let QEMU make the
> > decision.
>
> However this may end up, isn't it offtopic? Whatever we do we have to
> support both pci_addr= and default placement, so we can push this
> discussion to livirt-devel and bid them godspeed.
Presumably you're not proposing that qemu-devel completely ignore the
typical requirements of management apps?
You can push the discussion to libvirt-devel, and the conclusion would
most likely be:
"We can do slot allocation if you provide us with a way to query free
slots, or we can use qemu's default allocation if you provide us a
way to query the allocation.
We'd prefer the default allocation problem, but we don't really
care. Both require about the same amount of work for us."
libvirt was only mentioned in this thread as a concrete example of how
the suggested solutions would actually be used by management apps.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:12 ` Dor Laor
2009-06-15 15:15 ` Avi Kivity
@ 2009-06-15 16:27 ` Mark McLoughlin
2009-06-15 17:13 ` Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
To: dlaor
Cc: Daniel P. Berrange, Anthony Liguori, Michael S. Tsirkin,
Avi Kivity, Carsten Otte, Rusty Russell, kvm, Glauber Costa,
qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
Paul Brook
On Mon, 2009-06-15 at 18:12 +0300, Dor Laor wrote:
> > It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> > a stable guest ABI, of which stable PCI addresses is one aspect. This does
> > not imply libvirt wants to allocate the PCI addresses, just that it wants
> > a way to keep them stable. All else being equal I'd rather libvirt wasn't
> > in the PCI address allocation business.
> >
>
> It's not about what libvirt wants. It's about what will serve the end
> user the most.
Absolutely. And not just about what most helps end users of libvirt
based management apps, but also any app managing qemu.
> Apart for stable guest ABI, end users need to have the option to
> control the slot for their devices. Just like them have for physical
> machines. It's not theoretical discussion, limiting issues with shared
> irq is one real life example.
Providing end users with the *option* to choose PCI slots sounds like a
fine feature request for any management app.
Requiring all management apps to force end users to explicitly choose
PCI slots in order for slots to be stable is not so reasonable.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 16:27 ` Mark McLoughlin
@ 2009-06-15 17:09 ` Avi Kivity
2009-06-15 18:12 ` Anthony Liguori
1 sibling, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 17:09 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
Anthony Liguori
[-- Attachment #1.1: Type: text/plain, Size: 1669 bytes --]
On 06/15/2009 07:27 PM, Mark McLoughlin wrote:
>> However this may end up, isn't it offtopic? Whatever we do we have to
>> support both pci_addr= and default placement, so we can push this
>> discussion to livirt-devel and bid them godspeed.
>>
>
> Presumably you're not proposing that qemu-devel completely ignore the
> typical requirements of management apps?
>
We propose to allow both qemu-allocated slots and user-allocated slots,
so we're only ignoring the actual decision by the management tool
providers, not their requirements.
> You can push the discussion to libvirt-devel, and the conclusion would
> most likely be:
>
> "We can do slot allocation if you provide us with a way to query free
> slots, or we can use qemu's default allocation if you provide us a
> way to query the allocation.
>
> We'd prefer the default allocation problem, but we don't really
> care. Both require about the same amount of work for us."
>
Well, they'll find out if they try default allocation. It's traditional
to try all the complicated solutions before trying the simplest one, so
I guess we'll just have to let them.
> libvirt was only mentioned in this thread as a concrete example of how
> the suggested solutions would actually be used by management apps.
>
True, others will wind up doing things differently. In fact, I'm a
little surprised that libvirt is involved, since the place to do
inventory is in the management app itself (it's true that libvirt also
maintains its own database, so the line is blurred).
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
[-- Attachment #1.2: Type: text/html, Size: 2316 bytes --]
[-- Attachment #2: Type: text/plain, Size: 184 bytes --]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 16:27 ` Mark McLoughlin
@ 2009-06-15 17:13 ` Avi Kivity
0 siblings, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 17:13 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Carsten Otte, Daniel P. Berrange, kvm, Michael S. Tsirkin,
Glauber Costa, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Anthony Liguori
[-- Attachment #1.1: Type: text/plain, Size: 749 bytes --]
On 06/15/2009 07:27 PM, Mark McLoughlin wrote:
>
> Providing end users with the *option* to choose PCI slots sounds like a
> fine feature request for any management app.
>
> Requiring all management apps to force end users to explicitly choose
> PCI slots in order for slots to be stable is not so reasonable.
>
Think any installer's partitioning utility. It will provide a default
placement and try to hide it from you. If you ask, it will let you
place the partitions yourself.
The management app is the end user's agent. When we push something
there, we allow it to choose something, or push the decision further up
to the user.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
[-- Attachment #1.2: Type: text/html, Size: 1135 bytes --]
[-- Attachment #2: Type: text/plain, Size: 184 bytes --]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 16:27 ` Mark McLoughlin
2009-06-15 17:09 ` Avi Kivity
@ 2009-06-15 18:12 ` Anthony Liguori
2009-06-15 18:21 ` Avi Kivity
2009-06-16 12:14 ` Mark McLoughlin
1 sibling, 2 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:12 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Avi Kivity, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Mark McLoughlin wrote:
> So long as the restrictions would be known to the management app via
> some "what slots are available" mechanism in qemu, that sounds fine.
>
I'm not sure a "what slots are available" mechanism is as straight
forward as has been claimed. It doesn't matter though because it's
orthogonal to the current proposal.
>>> I'm not at all arguing against pci_addr. I'm arguing about how
>>> libvirt should use it with respect to the "genesis" use-case where
>>> libvirt has no specific reason to choose one PCI slot over another.
>>> In that case, I'm merely advocating that we want to let QEMU make the
>>> decision.
>>>
>> However this may end up, isn't it offtopic? Whatever we do we have to
>> support both pci_addr= and default placement, so we can push this
>> discussion to livirt-devel and bid them godspeed.
>>
>
> Presumably you're not proposing that qemu-devel completely ignore the
> typical requirements of management apps?
>
This is a happy case where the current proposals allow both usages to
occur. Which one libvirt chooses it up to it.
To summarize, I think we have:
1) Introduce addressing to all host device configurations
- Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1"
or in flattened form "addr=bus:dev.fn or addr=target.lun". I prefer the
later form but I think either would be acceptable.
2) Whenever the default machine type changes in a guest-visible way,
introduce a new machine type
- Use explicit versions in name: pc-v1, pc-v2 or use more descriptive
names pc-with-usb
- Easily transitions to device config files
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 18:12 ` Anthony Liguori
@ 2009-06-15 18:21 ` Avi Kivity
2009-06-15 18:24 ` Anthony Liguori
2009-06-15 18:44 ` Blue Swirl
2009-06-16 12:14 ` Mark McLoughlin
1 sibling, 2 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-15 18:21 UTC (permalink / raw)
To: Anthony Liguori
Cc: Mark McLoughlin, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>
> 2) Whenever the default machine type changes in a guest-visible way,
> introduce a new machine type
s/whenever/qemu stable release/
> - Use explicit versions in name: pc-v1, pc-v2
pc-qemu-0.10?
This is similar to a hardware vendor's model number (though they tend to
change components without changing model numbers, though naughty vendors)
> or use more descriptive names pc-with-usb
> - Easily transitions to device config files
Combinatorial explosion. Just use -usb.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 18:21 ` Avi Kivity
@ 2009-06-15 18:24 ` Anthony Liguori
2009-06-15 18:44 ` Blue Swirl
1 sibling, 0 replies; 139+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:24 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>>
>> 2) Whenever the default machine type changes in a guest-visible way,
>> introduce a new machine type
>
> s/whenever/qemu stable release/
>
>> - Use explicit versions in name: pc-v1, pc-v2
>
> pc-qemu-0.10?
>
> This is similar to a hardware vendor's model number (though they tend
> to change components without changing model numbers, though naughty
> vendors)
Yup, that makes a whole lot of sense.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 18:21 ` Avi Kivity
2009-06-15 18:24 ` Anthony Liguori
@ 2009-06-15 18:44 ` Blue Swirl
2009-06-16 8:56 ` Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Blue Swirl @ 2009-06-15 18:44 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, qemu-devel,
virtualization, Christian Borntraeger, Michael S. Tsirkin,
Paul Brook, Anthony Liguori
On 6/15/09, Avi Kivity <avi@redhat.com> wrote:
> On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>
> >
> > 2) Whenever the default machine type changes in a guest-visible way,
> introduce a new machine type
> >
>
> s/whenever/qemu stable release/
>
>
> > - Use explicit versions in name: pc-v1, pc-v2
> >
>
> pc-qemu-0.10?
pc-2009.06? Or given the hardware, should that be pc-1997?
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 18:44 ` Blue Swirl
@ 2009-06-16 8:56 ` Avi Kivity
0 siblings, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-16 8:56 UTC (permalink / raw)
To: Blue Swirl
Cc: Anthony Liguori, Mark McLoughlin, dlaor, Carsten Otte,
Rusty Russell, kvm, Glauber Costa, Michael S. Tsirkin, qemu-devel,
virtualization, Christian Borntraeger, Paul Brook
On 06/15/2009 09:44 PM, Blue Swirl wrote:
>> pc-qemu-0.10?
>>
>
> pc-2009.06? Or given the hardware, should that be pc-1997?
>
pc-qemu-0.10 has the obvious benefit of allowing people to immediately
know what's the oldest version of qemu that supports it.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 18:12 ` Anthony Liguori
2009-06-15 18:21 ` Avi Kivity
@ 2009-06-16 12:14 ` Mark McLoughlin
2009-06-16 12:28 ` Avi Kivity
1 sibling, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:14 UTC (permalink / raw)
To: Anthony Liguori
Cc: Avi Kivity, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So long as the restrictions would be known to the management app via
> > some "what slots are available" mechanism in qemu, that sounds fine.
> >
>
> I'm not sure a "what slots are available" mechanism is as straight
> forward as has been claimed.
If qemu can't provide that information, then the management app does not
have sufficient information to do the slot allocation itself. In which
case, it must leave it up to qemu to do it.
> It doesn't matter though because it's orthogonal to the current proposal.
It is not orthogonal to solving the actual problem at hand, though -
i.e. how to allow management apps to provide stable PCI addresses.
> >>> I'm not at all arguing against pci_addr. I'm arguing about how
> >>> libvirt should use it with respect to the "genesis" use-case where
> >>> libvirt has no specific reason to choose one PCI slot over another.
> >>> In that case, I'm merely advocating that we want to let QEMU make the
> >>> decision.
> >>>
> >> However this may end up, isn't it offtopic? Whatever we do we have to
> >> support both pci_addr= and default placement, so we can push this
> >> discussion to livirt-devel and bid them godspeed.
> >>
> >
> > Presumably you're not proposing that qemu-devel completely ignore the
> > typical requirements of management apps?
> >
>
> This is a happy case where the current proposals allow both usages to
> occur. Which one libvirt chooses it up to it.
>
> To summarize, I think we have:
>
> 1) Introduce addressing to all host device configurations
> - Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1"
> or in flattened form "addr=bus:dev.fn or addr=target.lun". I prefer the
> later form but I think either would be acceptable.
That helps, but it's not enough on its own.
The management app needs to figure out what addresses to pass either by:
a) Initially allowing qemu to do the address allocation, and
thereafter using those addresses - this requires some way to query
the addresses of devices
or b) Doing the initial address allocation itself - this requires some
way to query what slots are available.
> 2) Whenever the default machine type changes in a guest-visible way,
> introduce a new machine type
> - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive
> names pc-with-usb
> - Easily transitions to device config files
To be clear - you're not proposing this is a solution to the "stable PCI
addresses" problem, are you? The main requirement is for the addresses
to stay stable even if the user adds/removes other devices.
This is a fine solution to the "stable guest ABI" problem ... assuming
there's some way of querying the current default machine type.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-16 12:14 ` Mark McLoughlin
@ 2009-06-16 12:28 ` Avi Kivity
2009-06-16 12:39 ` Mark McLoughlin
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-16 12:28 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
>
>> Mark McLoughlin wrote:
>>
>>> So long as the restrictions would be known to the management app via
>>> some "what slots are available" mechanism in qemu, that sounds fine.
>>>
>>>
>> I'm not sure a "what slots are available" mechanism is as straight
>> forward as has been claimed.
>>
>
> If qemu can't provide that information, then the management app does not
> have sufficient information to do the slot allocation itself. In which
> case, it must leave it up to qemu to do it.
>
A given -M machine will have well-known open slots (since it's an ABI),
same as it has rtl8139 and ne2000 cards. Worst case we hardcode those
numbers (gasp, faint).
>> It doesn't matter though because it's orthogonal to the current proposal.
>>
>
> It is not orthogonal to solving the actual problem at hand, though -
> i.e. how to allow management apps to provide stable PCI addresses.
>
It's part of the solution, but hardly a difficult the most difficult part.
> This is a fine solution to the "stable guest ABI" problem ... assuming
> there's some way of querying the current default machine type.
>
$ qemu -print-default-machine
or maybe
$ qemu -show default-machine
$ qemu -show pci-bus
$ qemu -show me a way out
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-16 12:28 ` Avi Kivity
@ 2009-06-16 12:39 ` Mark McLoughlin
2009-06-16 12:51 ` Avi Kivity
2009-06-16 18:44 ` Jamie Lokier
0 siblings, 2 replies; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:39 UTC (permalink / raw)
To: Avi Kivity
Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Tue, 2009-06-16 at 15:28 +0300, Avi Kivity wrote:
> On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> > On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> >
> >> Mark McLoughlin wrote:
> >>
> >>> So long as the restrictions would be known to the management app via
> >>> some "what slots are available" mechanism in qemu, that sounds fine.
> >>>
> >>>
> >> I'm not sure a "what slots are available" mechanism is as straight
> >> forward as has been claimed.
> >>
> >
> > If qemu can't provide that information, then the management app does not
> > have sufficient information to do the slot allocation itself. In which
> > case, it must leave it up to qemu to do it.
> >
>
> A given -M machine will have well-known open slots (since it's an ABI),
> same as it has rtl8139 and ne2000 cards.
If they're so obviously well-known, I don't see how the query mechanism
would not be straightforward, which is the comment I was replying to.
> Worst case we hardcode those numbers (gasp, faint).
Maybe we can just add the open slots to the -help output. That'd be nice
and clean.
> >> It doesn't matter though because it's orthogonal to the current proposal.
> >>
> >
> > It is not orthogonal to solving the actual problem at hand, though -
> > i.e. how to allow management apps to provide stable PCI addresses.
> >
>
> It's part of the solution, but hardly a difficult the most difficult part.
Agree.
> > This is a fine solution to the "stable guest ABI" problem ... assuming
> > there's some way of querying the current default machine type.
> >
>
> $ qemu -print-default-machine
Or:
$ readlink /usr/share/qemu/machine-types/pc.dt
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-16 12:39 ` Mark McLoughlin
@ 2009-06-16 12:51 ` Avi Kivity
2009-06-16 18:44 ` Jamie Lokier
1 sibling, 0 replies; 139+ messages in thread
From: Avi Kivity @ 2009-06-16 12:51 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/16/2009 03:39 PM, Mark McLoughlin wrote:
>> Worst case we hardcode those numbers (gasp, faint).
>>
>
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.
>
Yeah, there's precedent too.
> Or:
>
> $ readlink /usr/share/qemu/machine-types/pc.dt
>
>
That works if you have exactly one qemu installed. It's best if qemu
itself is the entry point (qemu -print-device-tree).
Though I wouldn't want to inflict it upon the management application
writers.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-15 15:15 ` Avi Kivity
@ 2009-06-16 18:32 ` Jamie Lokier
2009-06-17 6:38 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:32 UTC (permalink / raw)
To: Avi Kivity
Cc: dlaor, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Mark McLoughlin
Avi Kivity wrote:
> Another issue is enumeration. Guests will present their devices in the
> order they find them on the pci bus (of course enumeration is guest
> specific). So if I have 2 virtio controllers the only way I can
> distinguish between them is using their pci slots.
virtio controllers really should have a user-suppliable string or UUID
to identify them to the guest. Don't they?
-- Jamie
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-12 17:31 ` Mark McLoughlin
2009-06-12 17:44 ` Blue Swirl
@ 2009-06-16 18:38 ` Jamie Lokier
1 sibling, 0 replies; 139+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:38 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Anthony Liguori, Michael S. Tsirkin, Carsten Otte, kvm,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity
Mark McLoughlin wrote:
> > After libvirt has done -drive file=foo... it should dump the machine
> > config and use that from then on.
>
> Right - libvirt then wouldn't be able to avoid the complexity of merging
> any future changes into the dumped machine config.
As long as qemu can accept a machine config _and_ -drive file=foo (and
monitor commands to add/remove devices), libvirt could merge by simply
calling qemu with whatever additional command line options or monitor
commands modify the config, then dump the new config.
That way, virtio would not have to deal with that complexity. It
would be written in one place: qemu.
Or better, a utility: qemu-machine-config.
-- Jamie
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-16 12:39 ` Mark McLoughlin
2009-06-16 12:51 ` Avi Kivity
@ 2009-06-16 18:44 ` Jamie Lokier
2009-06-17 8:33 ` Mark McLoughlin
1 sibling, 1 reply; 139+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:44 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Avi Kivity, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Mark McLoughlin wrote:
> > Worst case we hardcode those numbers (gasp, faint).
>
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.
Make them part of the machine configuration.
After all, they are part of the machine configuration, and ACPI, BIOS
etc. need to know about all the machine slots anyway.
Having said that, I prefer the idea that slot allocation is handled
either in Qemu, or in a separate utility called qemu-machine-config
(for working with machine configs), or in a library
libqemu-machine-config.so.
I particularly don't like the idea of arcane machine-dependent slot
allocation knowledge living in libvirt, because it needs to be in Qemu
anyway for non-libvirt users. No point in having two implementations
of something tricky and likely to have machine quirks, if one will do.
-- Jamie
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-16 18:32 ` Jamie Lokier
@ 2009-06-17 6:38 ` Avi Kivity
2009-06-17 11:51 ` Jamie Lokier
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-17 6:38 UTC (permalink / raw)
To: Jamie Lokier
Cc: dlaor, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Mark McLoughlin
On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> Avi Kivity wrote:
>
>> Another issue is enumeration. Guests will present their devices in the
>> order they find them on the pci bus (of course enumeration is guest
>> specific). So if I have 2 virtio controllers the only way I can
>> distinguish between them is using their pci slots.
>>
>
> virtio controllers really should have a user-suppliable string or UUID
> to identify them to the guest. Don't they?
>
virtio controllers don't exist. When they do, they may have a UUID or
not, but in either case guest infrastructure is in place for reporting
the PCI slot, not the UUID.
virtio disks do have a UUID. I don't think older versions of Windows
will use it though, so if you reorder your slots you'll see your drive
letters change. Same with Linux if you don't use udev by-uuid rules.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-16 18:44 ` Jamie Lokier
@ 2009-06-17 8:33 ` Mark McLoughlin
2009-06-17 9:03 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-17 8:33 UTC (permalink / raw)
To: Jamie Lokier
Cc: Avi Kivity, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Tue, 2009-06-16 at 19:44 +0100, Jamie Lokier wrote:
> Mark McLoughlin wrote:
> > > Worst case we hardcode those numbers (gasp, faint).
> >
> > Maybe we can just add the open slots to the -help output. That'd be nice
> > and clean.
I was being sarcastic - libvirt currently must parse qemu -help, and
even has some test infrastructure to check that it works with various
versions of qemu. Extending this would not be nice and clean :-)
> I particularly don't like the idea of arcane machine-dependent slot
> allocation knowledge living in libvirt, because it needs to be in Qemu
> anyway for non-libvirt users. No point in having two implementations
> of something tricky and likely to have machine quirks, if one will do.
Indeed.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-17 8:33 ` Mark McLoughlin
@ 2009-06-17 9:03 ` Avi Kivity
2009-06-17 9:18 ` Mark McLoughlin
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-17 9:03 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>> I particularly don't like the idea of arcane machine-dependent slot
>> allocation knowledge living in libvirt, because it needs to be in Qemu
>> anyway for non-libvirt users. No point in having two implementations
>> of something tricky and likely to have machine quirks, if one will do.
>>
>
> Indeed.
>
I don't understand this. Management already has to allocate MAC
addresses, UUIDs, IDE interface and master/slave role, SCSI
LUNs/targets/whatever. It has to understand NUMA (if not do actual
allocation). Even if it doesn't allocate the slots, it has to be able
to query them so it can tell the user which NIC or controller is
connected where, or to do hotunplug. It has to understand that there is
a limitation on the number of slots, and know what that limitation is
(unless it feels that launching an overcommitted guest and showing an
error to the user is preferable to not allowing the user to overcommit
in the first place.
If you'll review my patent application for pci slot allocation, you'll
see the following line:
slot_nr = nb_allocated_slots++; /* Allocate pci slot */
while there is a lot of complicated setup code before that (see the
prior art section as well), I believe licensees could well implement the
algorithm in two short months, including testing.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-17 9:03 ` Avi Kivity
@ 2009-06-17 9:18 ` Mark McLoughlin
2009-06-17 9:26 ` Avi Kivity
0 siblings, 1 reply; 139+ messages in thread
From: Mark McLoughlin @ 2009-06-17 9:18 UTC (permalink / raw)
To: Avi Kivity
Cc: Jamie Lokier, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
> >> I particularly don't like the idea of arcane machine-dependent slot
> >> allocation knowledge living in libvirt, because it needs to be in Qemu
> >> anyway for non-libvirt users. No point in having two implementations
> >> of something tricky and likely to have machine quirks, if one will do.
> >
> > Indeed.
>
> I don't understand this.
Take note of the "arcane machine-dependent slot allocation knowledge"
bit.
If the algorithm in for management apps is as simple as "query qemu for
available slots and sequentially allocate slots", then that's perfectly
fine.
If management apps need to hard-code which slots are available on
different targets and different qemu versions, or restrictions on which
devices can use which slots, or knowledge that some devices can be
multi-function, or ... anything like that is just lame.
Cheers,
Mark.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-17 9:18 ` Mark McLoughlin
@ 2009-06-17 9:26 ` Avi Kivity
2009-06-17 11:58 ` Jamie Lokier
0 siblings, 1 reply; 139+ messages in thread
From: Avi Kivity @ 2009-06-17 9:26 UTC (permalink / raw)
To: Mark McLoughlin
Cc: Jamie Lokier, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
On 06/17/2009 12:18 PM, Mark McLoughlin wrote:
> On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
>
>> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>>
>>>> I particularly don't like the idea of arcane machine-dependent slot
>>>> allocation knowledge living in libvirt, because it needs to be in Qemu
>>>> anyway for non-libvirt users. No point in having two implementations
>>>> of something tricky and likely to have machine quirks, if one will do.
>>>>
>>> Indeed.
>>>
>> I don't understand this.
>>
>
> Take note of the "arcane machine-dependent slot allocation knowledge"
> bit.
>
> If the algorithm in for management apps is as simple as "query qemu for
> available slots and sequentially allocate slots", then that's perfectly
> fine.
>
That's the thinking.
> If management apps need to hard-code which slots are available on
> different targets and different qemu versions, or restrictions on which
> devices can use which slots, or knowledge that some devices can be
> multi-function, or ... anything like that is just lame.
>
You can't abstract these things away. If you can't put a NIC in slot 4,
and you have 7 slots, then you cannot have 7 NICs. Having qemu allocate
the slot numbers does not absolve management from knowing this
limitation and preventing the user from creating a machine with 7 slots.
Likewise, management will have to know which devices are multi-function,
since that affects their hotpluggability. Ditto if some slot if faster
than others, if you want to make use of this information you have to let
the upper layers know.
It could be done using an elaborate machine description that qemu
exposes to management coupled with a constraint solver that optimizes
the machine layout according to user specifications and hardware
limitations. Or we could take the view that real life is not perfect
(especially where computers are involved), add some machine specific
knowledge, and spend the rest of the summer at the beach.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-17 6:38 ` Avi Kivity
@ 2009-06-17 11:51 ` Jamie Lokier
0 siblings, 0 replies; 139+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:51 UTC (permalink / raw)
To: Avi Kivity
Cc: dlaor, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
Rusty Russell, qemu-devel, virtualization, Blue Swirl,
Christian Borntraeger, Paul Brook, Mark McLoughlin
Avi Kivity wrote:
> On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> >Avi Kivity wrote:
> >
> >>Another issue is enumeration. Guests will present their devices in the
> >>order they find them on the pci bus (of course enumeration is guest
> >>specific). So if I have 2 virtio controllers the only way I can
> >>distinguish between them is using their pci slots.
> >
> >virtio controllers really should have a user-suppliable string or UUID
> >to identify them to the guest. Don't they?
>
> virtio controllers don't exist. When they do, they may have a UUID or
> not, but in either case guest infrastructure is in place for reporting
> the PCI slot, not the UUID.
>
> virtio disks do have a UUID. I don't think older versions of Windows
> will use it though, so if you reorder your slots you'll see your drive
> letters change. Same with Linux if you don't use udev by-uuid rules.
I guess I meant virtio disks, so that's ok.
-- Jamie
^ permalink raw reply [flat|nested] 139+ messages in thread
* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
2009-06-17 9:26 ` Avi Kivity
@ 2009-06-17 11:58 ` Jamie Lokier
0 siblings, 0 replies; 139+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:58 UTC (permalink / raw)
To: Avi Kivity
Cc: Mark McLoughlin, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
Glauber Costa, Rusty Russell, qemu-devel, virtualization,
Blue Swirl, Christian Borntraeger, Paul Brook
Avi Kivity wrote:
> >If management apps need to hard-code which slots are available on
> >different targets and different qemu versions, or restrictions on which
> >devices can use which slots, or knowledge that some devices can be
> >multi-function, or ... anything like that is just lame.
> >
>
> You can't abstract these things away. If you can't put a NIC in slot 4,
> and you have 7 slots, then you cannot have 7 NICs. Having qemu allocate
> the slot numbers does not absolve management from knowing this
> limitation and preventing the user from creating a machine with 7 slots.
>
> Likewise, management will have to know which devices are multi-function,
> since that affects their hotpluggability. Ditto if some slot if faster
> than others, if you want to make use of this information you have to let
> the upper layers know.
>
> It could be done using an elaborate machine description that qemu
> exposes to management coupled with a constraint solver that optimizes
> the machine layout according to user specifications and hardware
> limitations. Or we could take the view that real life is not perfect
> (especially where computers are involved), add some machine specific
> knowledge, and spend the rest of the summer at the beach.
To be honest, an elaborate machine description is probably fine...
A fancy constraint solver is not required. A simple one strikes me as
about as simple as what you'd hard-code anyway, but with fewer special
cases.
Note that the result can fail due to things like insufficient address
space for all the device BARs even when they _are_ in the right slots.
Especially if there are lots of slots, or bridges which can provide
unlimited slots.
That is arcane: device-dependent, CPU-dependent, machine-dependent,
RAM-size dependent (in a non-linear way), device-option-dependent and
probably QEMU-version-dependent too.
It would be nice if libvirt (et al) would prevent the user from
creating a VM with insufficient BAR space for that machine, but I'm
not sure how to do it sanely, without arcane knowledge getting about.
Maybe that idea of a .so shared by qemu and libvirt, to manipulate
device configurations, is a sane one after all.
-- Jamie
^ permalink raw reply [flat|nested] 139+ messages in thread
end of thread, other threads:[~2009-06-17 11:58 UTC | newest]
Thread overview: 139+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <cover.1244192535.git.mst@redhat.com>
2009-06-05 10:22 ` [PATCHv3 01/13] qemu: make default_write_config use mask table Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 02/13] qemu: capability bits in pci save/restore Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 03/13] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
2009-06-09 17:11 ` [Qemu-devel] " Glauber Costa
2009-06-10 9:54 ` Michael S. Tsirkin
2009-06-10 14:55 ` Glauber Costa
2009-06-10 15:01 ` Michael S. Tsirkin
2009-06-10 15:24 ` Paul Brook
2009-06-10 15:50 ` Michael S. Tsirkin
2009-06-10 17:43 ` Jamie Lokier
2009-06-10 18:22 ` Michael S. Tsirkin
2009-06-10 19:27 ` Jamie Lokier
2009-06-12 8:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
2009-06-12 13:59 ` Michael S. Tsirkin
2009-06-12 14:48 ` Mark McLoughlin
2009-06-12 14:51 ` Anthony Liguori
2009-06-12 15:41 ` Mark McLoughlin
2009-06-12 16:11 ` Anthony Liguori
2009-06-12 16:48 ` Mark McLoughlin
2009-06-12 17:00 ` Anthony Liguori
2009-06-12 17:31 ` Mark McLoughlin
2009-06-12 17:44 ` Blue Swirl
2009-06-12 17:55 ` Mark McLoughlin
2009-06-16 18:38 ` Jamie Lokier
2009-06-14 9:50 ` Michael S. Tsirkin
2009-06-15 9:08 ` Mark McLoughlin
2009-06-15 9:27 ` Avi Kivity
2009-06-15 10:32 ` Michael S. Tsirkin
2009-06-15 10:44 ` Gleb Natapov
2009-06-15 10:46 ` Michael S. Tsirkin
2009-06-15 10:52 ` Gleb Natapov
2009-06-15 11:07 ` Michael S. Tsirkin
2009-06-15 11:14 ` Gleb Natapov
2009-06-15 11:34 ` Michael S. Tsirkin
2009-06-15 11:27 ` Avi Kivity
2009-06-15 11:48 ` Michael S. Tsirkin
2009-06-15 11:56 ` Avi Kivity
2009-06-15 12:41 ` Michael S. Tsirkin
2009-06-15 12:50 ` Avi Kivity
2009-06-15 12:52 ` Anthony Liguori
2009-06-15 13:09 ` Avi Kivity
2009-06-15 13:23 ` Anthony Liguori
2009-06-15 13:42 ` Avi Kivity
2009-06-15 13:51 ` Anthony Liguori
2009-06-15 14:06 ` Dor Laor
2009-06-15 14:24 ` Anthony Liguori
2009-06-15 14:37 ` Michael S. Tsirkin
2009-06-15 15:03 ` Anthony Liguori
2009-06-15 15:08 ` Daniel P. Berrange
2009-06-15 15:12 ` Dor Laor
2009-06-15 15:15 ` Avi Kivity
2009-06-16 18:32 ` Jamie Lokier
2009-06-17 6:38 ` Avi Kivity
2009-06-17 11:51 ` Jamie Lokier
2009-06-15 16:27 ` Mark McLoughlin
2009-06-15 17:13 ` Avi Kivity
2009-06-15 15:05 ` Avi Kivity
2009-06-15 15:11 ` Anthony Liguori
2009-06-15 16:27 ` Mark McLoughlin
2009-06-15 17:09 ` Avi Kivity
2009-06-15 18:12 ` Anthony Liguori
2009-06-15 18:21 ` Avi Kivity
2009-06-15 18:24 ` Anthony Liguori
2009-06-15 18:44 ` Blue Swirl
2009-06-16 8:56 ` Avi Kivity
2009-06-16 12:14 ` Mark McLoughlin
2009-06-16 12:28 ` Avi Kivity
2009-06-16 12:39 ` Mark McLoughlin
2009-06-16 12:51 ` Avi Kivity
2009-06-16 18:44 ` Jamie Lokier
2009-06-17 8:33 ` Mark McLoughlin
2009-06-17 9:03 ` Avi Kivity
2009-06-17 9:18 ` Mark McLoughlin
2009-06-17 9:26 ` Avi Kivity
2009-06-17 11:58 ` Jamie Lokier
2009-06-15 11:35 ` Configuration vs. compat hints Markus Armbruster
2009-06-15 11:43 ` Avi Kivity
2009-06-15 11:59 ` Stefano Stabellini
2009-06-15 12:41 ` [Qemu-devel] " Markus Armbruster
2009-06-15 12:50 ` Anthony Liguori
2009-06-15 14:23 ` Javier Guerra
2009-06-15 12:41 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
2009-06-15 12:55 ` Avi Kivity
2009-06-15 13:04 ` Configuration vs. compat hints Markus Armbruster
2009-06-15 9:43 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Avi Kivity
2009-06-15 10:29 ` Michael S. Tsirkin
2009-06-15 12:45 ` Anthony Liguori
2009-06-15 13:03 ` Avi Kivity
2009-06-15 13:20 ` Anthony Liguori
2009-06-15 13:35 ` Avi Kivity
2009-06-15 13:45 ` Anthony Liguori
2009-06-15 13:54 ` Avi Kivity
2009-06-15 15:07 ` Anthony Liguori
2009-06-15 15:11 ` Avi Kivity
2009-06-15 15:20 ` Anthony Liguori
2009-06-15 15:26 ` Avi Kivity
2009-06-15 13:17 ` Gerd Hoffmann
2009-06-14 7:55 ` Avi Kivity
2009-06-12 14:55 ` Anthony Liguori
2009-06-12 15:53 ` Mark McLoughlin
2009-06-12 16:12 ` Anthony Liguori
2009-06-12 16:48 ` Mark McLoughlin
2009-06-14 7:58 ` Avi Kivity
2009-06-15 5:32 ` Configuration vs. compat hints Markus Armbruster
2009-06-15 9:09 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
2009-06-15 11:32 ` Avi Kivity
2009-06-15 12:48 ` Anthony Liguori
2009-06-15 13:12 ` Avi Kivity
2009-06-15 13:24 ` Anthony Liguori
2009-06-15 13:43 ` Avi Kivity
2009-06-15 14:00 ` Mark McLoughlin
2009-06-15 14:20 ` Anthony Liguori
2009-06-15 14:34 ` Michael S. Tsirkin
2009-06-15 15:11 ` Anthony Liguori
2009-06-14 9:34 ` Michael S. Tsirkin
2009-06-14 9:37 ` Avi Kivity
2009-06-14 9:47 ` Michael S. Tsirkin
2009-06-15 9:38 ` Avi Kivity
2009-06-15 9:02 ` Mark McLoughlin
2009-06-05 10:23 ` [PATCHv3 04/13] qemu: helper routines for pci access Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 05/13] qemu: MSI-X support functions Michael S. Tsirkin
2009-06-09 17:26 ` [Qemu-devel] " Glauber Costa
2009-06-10 9:58 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 06/13] qemu: add flag to disable MSI-X by default Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
2009-06-09 17:33 ` [Qemu-devel] " Glauber Costa
2009-06-10 9:59 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 08/13] qemu: add support for resizing regions Michael S. Tsirkin
2009-06-09 17:36 ` [Qemu-devel] " Glauber Costa
2009-06-10 10:05 ` Michael S. Tsirkin
2009-06-10 10:46 ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 09/13] qemu: virtio support for many interrupt vectors Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 10/13] qemu: MSI-X support in virtio PCI Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 11/13] qemu: request 3 vectors in virtio-net Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 12/13] qemu: virtio save/load bindings Michael S. Tsirkin
2009-06-09 17:45 ` [Qemu-devel] " Glauber Costa
2009-06-10 10:11 ` Michael S. Tsirkin
2009-06-10 11:33 ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 13/13] qemu: add pci_get/set_byte Michael S. Tsirkin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).