From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from [140.186.70.92] (port=38593 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PiEMf-0000L8-7S for qemu-devel@nongnu.org; Wed, 26 Jan 2011 18:01:27 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PiEMd-0000i9-35 for qemu-devel@nongnu.org; Wed, 26 Jan 2011 18:01:25 -0500 Received: from mail-qw0-f45.google.com ([209.85.216.45]:41799) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PiEMc-0000i3-Tn for qemu-devel@nongnu.org; Wed, 26 Jan 2011 18:01:23 -0500 Received: by qwk4 with SMTP id 4so1492349qwk.4 for ; Wed, 26 Jan 2011 15:01:22 -0800 (PST) Message-ID: <4D40A7B9.1040404@codemonkey.ws> Date: Wed, 26 Jan 2011 17:01:13 -0600 From: Anthony Liguori MIME-Version: 1.0 Subject: Re: [Qemu-devel] [PATCH V9 07/16] xen: Add the Xen platform pci device References: <1295965760-31508-1-git-send-email-anthony.perard@citrix.com> <1295965760-31508-8-git-send-email-anthony.perard@citrix.com> In-Reply-To: <1295965760-31508-8-git-send-email-anthony.perard@citrix.com> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: anthony.perard@citrix.com Cc: Xen Devel , QEMU-devel , Stefano Stabellini , Steven Smith On 01/25/2011 08:29 AM, anthony.perard@citrix.com wrote: > From: Steven Smith > > Introduce a new emulated PCI device, specific to fully virtualized Xen > guests. The device is necessary for PV on HVM drivers to work. > > Signed-off-by: Steven Smith > Signed-off-by: Anthony PERARD > Signed-off-by: Stefano Stabellini > --- > Makefile.target | 1 + > hw/hw.h | 3 + > hw/pci_ids.h | 2 + > hw/xen_machine_fv.c | 3 + > hw/xen_platform.c | 427 +++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/xen_platform.h | 8 + > 6 files changed, 444 insertions(+), 0 deletions(-) > create mode 100644 hw/xen_platform.c > create mode 100644 hw/xen_platform.h > > diff --git a/Makefile.target b/Makefile.target > index 8b5fe36..8126da9 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -218,6 +218,7 @@ obj-i386-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o > > # xen full virtualized machine > obj-i386-$(CONFIG_XEN) += xen_machine_fv.o > +obj-i386-$(CONFIG_XEN) += xen_platform.o > > # Inter-VM PCI shared memory > obj-$(CONFIG_KVM) += ivshmem.o > diff --git a/hw/hw.h b/hw/hw.h > index dd993de..298df31 100644 > --- a/hw/hw.h > +++ b/hw/hw.h > @@ -672,6 +672,9 @@ extern const VMStateDescription vmstate_i2c_slave; > #define VMSTATE_INT32_LE(_f, _s) \ > VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t) > > +#define VMSTATE_UINT8_TEST(_f, _s, _t) \ > + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint8, uint8_t) > + > #define VMSTATE_UINT16_TEST(_f, _s, _t) \ > VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint16, uint16_t) > > diff --git a/hw/pci_ids.h b/hw/pci_ids.h > index ea3418c..6e9eabc 100644 > --- a/hw/pci_ids.h > +++ b/hw/pci_ids.h > @@ -108,3 +108,5 @@ > #define PCI_DEVICE_ID_INTEL_82371AB 0x7111 > #define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112 > #define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113 > + > +#define PCI_VENDOR_ID_XENSOURCE 0x5853 > diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c > index 657c1e8..2f51625 100644 > --- a/hw/xen_machine_fv.c > +++ b/hw/xen_machine_fv.c > @@ -36,6 +36,7 @@ > > #include "xen_common.h" > #include "xen/hvm/hvm_info_table.h" > +#include "xen_platform.h" > > #define MAX_IDE_BUS 2 > > @@ -89,6 +90,8 @@ static void xen_init_fv(ram_addr_t ram_size, > > pc_vga_init(pci_bus); > > + pci_xen_platform_init(pci_bus); > + > /* init basic PC hardware */ > pc_basic_device_init(isa_irq,&floppy_controller,&rtc_state); > > diff --git a/hw/xen_platform.c b/hw/xen_platform.c > new file mode 100644 > index 0000000..abfcdb7 > --- /dev/null > +++ b/hw/xen_platform.c > @@ -0,0 +1,427 @@ > +/* > + * XEN platform pci device, formerly known as the event channel device > + * > + * Copyright (c) 2003-2004 Intel Corp. > + * Copyright (c) 2006 XenSource > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "hw.h" > +#include "pc.h" > +#include "pci.h" > +#include "irq.h" > +#include "xen_common.h" > +#include "net.h" > +#include "xen_platform.h" > +#include "xen_backend.h" > +#include "qemu-log.h" > +#include "rwhandler.h" > + > +#include > +#include > + > +//#define DEBUG_PLATFORM > + > +#ifdef DEBUG_PLATFORM > +#define DPRINTF(fmt, ...) do { \ > + fprintf(stderr, "xen_platform: " fmt, ## __VA_ARGS__); \ > +} while (0) > +#else > +#define DPRINTF(fmt, ...) do { } while (0) > +#endif > + > +#define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */ > + > +typedef struct PCIXenPlatformState { > + PCIDevice pci_dev; > + uint8_t flags; /* used only for version_id == 2 */ > + int drivers_blacklisted; > + uint16_t driver_product_version; > + > + /* Log from guest drivers */ > + int throttling_disabled; > + char log_buffer[4096]; > + int log_buffer_off; > +} PCIXenPlatformState; > + > +#define XEN_PLATFORM_IOPORT 0x10 > + > +/* We throttle access to dom0 syslog, to avoid DOS attacks. This is > + modelled as a token bucket, with one token for every byte of log. > + The bucket size is 128KB (->1024 lines of 128 bytes each) and > + refills at 256B/s. It starts full. The guest is blocked if no > + tokens are available when it tries to generate a log message. */ > +#define BUCKET_MAX_SIZE (128*1024) > +#define BUCKET_FILL_RATE 256 > + > +static void throttle(PCIXenPlatformState *s, unsigned count) > +{ > + static unsigned available; > + static int64_t last_refill; > + static int started; > + static int warned; > + > + int64_t waiting_for, now; > + int64_t delay; > + > + if (s->throttling_disabled) { > + return; > + } > + > + if (!started) { > + last_refill = qemu_get_clock_ns(rt_clock); > + available = BUCKET_MAX_SIZE; > + started = 1; > + } > + > + if (count> BUCKET_MAX_SIZE) { > + DPRINTF("tried to get %u tokens, but bucket size is %u\n", > + BUCKET_MAX_SIZE, count); > + exit(1); > + } > + > + if (available< count) { > + /* The bucket is empty. Refill it */ > + > + /* When will it be full enough to handle this request? */ > + delay = muldiv64(count - available, 1000000000, BUCKET_FILL_RATE); > + > + waiting_for = last_refill + delay; > + > + /* How long do we have to wait? (might be negative) */ > + waiting_for = waiting_for - qemu_get_clock_ns(rt_clock); > + > + /* Wait for it. */ > + if (waiting_for> 0) { > + struct timespec ts; > + if (!warned) { > + DPRINTF("throttling guest access to syslog"); > + warned = 1; > + } > + ts.tv_sec = waiting_for / 1000000000; > + ts.tv_nsec = waiting_for % 1000000000; > + while (nanosleep(&ts,&ts)< 0&& errno == EINTR) { > + } > + } > + > + /* Refill */ > + now = qemu_get_clock_ns(rt_clock); > + available += muldiv64(now - last_refill, > + BUCKET_FILL_RATE, > + 1000000000); > + if (available> BUCKET_MAX_SIZE) { > + available = BUCKET_MAX_SIZE; > + } > + last_refill = now; > + } > + > + assert(available>= count); > + > + available -= count; > +} > + > +/* Send bytes to syslog */ > +static void log_writeb(PCIXenPlatformState *s, char val) > +{ > + if (val == '\n' || s->log_buffer_off == sizeof(s->log_buffer) - 1) { > + /* Flush buffer */ > + s->log_buffer[s->log_buffer_off] = 0; > + throttle(s, s->log_buffer_off); > + DPRINTF("%s\n", s->log_buffer); > + s->log_buffer_off = 0; > + } else { > + s->log_buffer[s->log_buffer_off++] = val; > + } > +} > This doesn't belong in qemu. Just pipe it out to a character device and you can direct that to whatever you want with whatever type of throttling you see fit. Hard coding a policy like this in qemu is wrong. > + > +/* Xen Platform, Fixed IOPort */ > + > +static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val) > +{ > + PCIXenPlatformState *s = opaque; > + > + switch (addr - XEN_PLATFORM_IOPORT) { > + case 0: > + /* TODO: */ > + /* Unplug devices. Value is a bitmask of which devices to > + unplug, with bit 0 the IDE devices, bit 1 the network > + devices, and bit 2 the non-primary-master IDE devices. */ > + break; > + case 2: > + switch (val) { > + case 1: > + DPRINTF("Citrix Windows PV drivers loaded in guest\n"); > + break; > + case 0: > + DPRINTF("Guest claimed to be running PV product 0?\n"); > + break; > + default: > + DPRINTF("Unknown PV product %d loaded in guest\n", val); > + break; > + } > + s->driver_product_version = val; > + break; > + } > +} > + > +static void platform_fixed_ioport_writel(void *opaque, uint32_t addr, > + uint32_t val) > +{ > + switch (addr - XEN_PLATFORM_IOPORT) { > + case 0: > + /* PV driver version */ > + break; > + } > +} > + > +static void platform_fixed_ioport_writeb(void *opaque, uint32_t addr, uint32_t val) > +{ > + PCIXenPlatformState *s = opaque; > + > + switch (addr - XEN_PLATFORM_IOPORT) { > + case 0: /* Platform flags */ { > + hvmmem_type_t mem_type = (val& PFFLAG_ROM_LOCK) ? > + HVMMEM_ram_ro : HVMMEM_ram_rw; > + if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type, 0xc0, 0x40)) { > + DPRINTF("unable to change ro/rw state of ROM memory area!\n"); > + } else { > + s->flags = val& PFFLAG_ROM_LOCK; > + DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n", > + (mem_type == HVMMEM_ram_ro ? "ro":"rw")); > + } > + break; > + } > + case 2: > + log_writeb(s, val); > + break; > + } > +} > + > +static uint32_t platform_fixed_ioport_readw(void *opaque, uint32_t addr) > +{ > + PCIXenPlatformState *s = opaque; > + > + switch (addr - XEN_PLATFORM_IOPORT) { > + case 0: > + if (s->drivers_blacklisted) { > + /* The drivers will recognise this magic number and refuse > + * to do anything. */ > + return 0xd249; > + } else { > + /* Magic value so that you can identify the interface. */ > + return 0x49d2; > + } > + default: > + return 0xffff; > + } > +} > + > +static uint32_t platform_fixed_ioport_readb(void *opaque, uint32_t addr) > +{ > + PCIXenPlatformState *s = opaque; > + > + switch (addr - XEN_PLATFORM_IOPORT) { > + case 0: > + /* Platform flags */ > + return s->flags; > + case 2: > + /* Version number */ > + return 1; > + default: > + return 0xff; > + } > +} > + > +static void platform_fixed_ioport_reset(void *opaque) > +{ > + PCIXenPlatformState *s = opaque; > + > + platform_fixed_ioport_writeb(s, XEN_PLATFORM_IOPORT, 0); > +} > + > +static void platform_fixed_ioport_init(PCIXenPlatformState* s) > +{ > + register_ioport_write(XEN_PLATFORM_IOPORT, 16, 4, platform_fixed_ioport_writel, s); > + register_ioport_write(XEN_PLATFORM_IOPORT, 16, 2, platform_fixed_ioport_writew, s); > + register_ioport_write(XEN_PLATFORM_IOPORT, 16, 1, platform_fixed_ioport_writeb, s); > + register_ioport_read(XEN_PLATFORM_IOPORT, 16, 2, platform_fixed_ioport_readw, s); > + register_ioport_read(XEN_PLATFORM_IOPORT, 16, 1, platform_fixed_ioport_readb, s); > +} > + > +/* Xen Platform PCI Device */ > + > +static uint32_t xen_platform_ioport_readb(void *opaque, uint32_t addr) > +{ > + addr&= 0xff; > + > + if (addr == 0) { > + return platform_fixed_ioport_readb(opaque, XEN_PLATFORM_IOPORT); > + } else { > + return ~0u; > + } > +} > + > +static void xen_platform_ioport_writeb(void *opaque, uint32_t addr, uint32_t val) > +{ > + PCIXenPlatformState *s = opaque; > + > + addr&= 0xff; > + val&= 0xff; > + > + switch (addr) { > + case 0: /* Platform flags */ > + platform_fixed_ioport_writeb(opaque, XEN_PLATFORM_IOPORT, val); > + break; > + case 8: > + log_writeb(s, val); > + break; > + default: > + break; > + } > +} > + > +static void platform_ioport_map(PCIDevice *pci_dev, int region_num, pcibus_t addr, pcibus_t size, int type) > +{ > + PCIXenPlatformState *d = DO_UPCAST(PCIXenPlatformState, pci_dev, pci_dev); > + > + register_ioport_write(addr, size, 1, xen_platform_ioport_writeb, d); > + register_ioport_read(addr, size, 1, xen_platform_ioport_readb, d); > +} > + > +static uint32_t platform_mmio_read(ReadWriteHandler *handler, pcibus_t addr, int len) > +{ > + DPRINTF("Warning: attempted read from physical address " > + "0x" TARGET_FMT_plx " in xen platform mmio space\n", addr); > + > + return 0; > +} > + > +static void platform_mmio_write(ReadWriteHandler *handler, pcibus_t addr, > + uint32_t val, int len) > +{ > + DPRINTF("Warning: attempted write of 0x%x to physical " > + "address 0x" TARGET_FMT_plx " in xen platform mmio space\n", > + val, addr); > +} > + > +static ReadWriteHandler platform_mmio_handler = { > + .read =&platform_mmio_read, > + .write =&platform_mmio_write, > +}; > + > +static void platform_mmio_map(PCIDevice *d, int region_num, > + pcibus_t addr, pcibus_t size, int type) > +{ > + int mmio_io_addr; > + > + mmio_io_addr = cpu_register_io_memory_simple(&platform_mmio_handler, > + DEVICE_NATIVE_ENDIAN); > + > + cpu_register_physical_memory(addr, size, mmio_io_addr); > +} > + > +static int xen_platform_post_load(void *opaque, int version_id) > +{ > + PCIXenPlatformState *s = opaque; > + > + platform_fixed_ioport_writeb(s, XEN_PLATFORM_IOPORT, s->flags); > + > + return 0; > +} > + > +static const VMStateDescription vmstate_xen_platform = { > + .name = "platform", > + .version_id = 4, > + .minimum_version_id = 4, > + .minimum_version_id_old = 4, > + .post_load = xen_platform_post_load, > + .fields = (VMStateField []) { > + VMSTATE_PCI_DEVICE(pci_dev, PCIXenPlatformState), > + VMSTATE_UINT8(flags, PCIXenPlatformState), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +static int xen_platform_initfn(PCIDevice *dev) > +{ > + PCIXenPlatformState *d = DO_UPCAST(PCIXenPlatformState, pci_dev, dev); > + uint8_t *pci_conf; > + > + pci_conf = d->pci_dev.config; > + > + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_XENSOURCE); > + pci_config_set_device_id(pci_conf, 0x0001); > + pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY); > + > + pci_config_set_revision(pci_conf, 1); > + pci_config_set_prog_interface(pci_conf, 0); > + > + pci_config_set_class(pci_conf, PCI_CLASS_OTHERS<< 8 | 0x80); > + > + pci_conf[PCI_INTERRUPT_PIN] = 1; > + > + /* Microsoft WHQL requires non-zero subsystem IDs. */ > + /* http://www.pcisig.com/reflector/msg02205.html. */ > + pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, pci_conf[PCI_VENDOR_ID]); > + pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, 0x0001); > The generic PCI layer handles this FWIW. Regards, Anthony Liguori > + > + pci_register_bar(&d->pci_dev, 0, 0x100, > + PCI_BASE_ADDRESS_SPACE_IO, platform_ioport_map); > + > + /* reserve 16MB mmio address for share memory*/ > + pci_register_bar(&d->pci_dev, 1, 0x1000000, > + PCI_BASE_ADDRESS_MEM_PREFETCH, platform_mmio_map); > + > + platform_fixed_ioport_init(d); > + > + return 0; > +} > + > +static void platform_reset(DeviceState *dev) > +{ > + PCIXenPlatformState *s = DO_UPCAST(PCIXenPlatformState, pci_dev.qdev, dev); > + > + platform_fixed_ioport_reset(s); > +} > + > +void pci_xen_platform_init(PCIBus *bus) > +{ > + PCIDevice *dev; > + > + dev = pci_create(bus, -1, "xen-platform"); > + > + qdev_init_nofail(&dev->qdev); > +} > + > +static PCIDeviceInfo xen_platform_info = { > + .init = xen_platform_initfn, > + .qdev.name = "xen-platform", > + .qdev.desc = "XEN platform pci device", > + .qdev.size = sizeof(PCIXenPlatformState), > + .qdev.vmsd =&vmstate_xen_platform, > + .qdev.reset = platform_reset, > +}; > + > +static void xen_platform_register(void) > +{ > + pci_qdev_register(&xen_platform_info); > +} > + > +device_init(xen_platform_register); > diff --git a/hw/xen_platform.h b/hw/xen_platform.h > new file mode 100644 > index 0000000..574eecd > --- /dev/null > +++ b/hw/xen_platform.h > @@ -0,0 +1,8 @@ > +#ifndef XEN_PLATFORM_H > +#define XEN_PLATFORM_H > + > +#include "hw/pci.h" > + > +void pci_xen_platform_init(PCIBus *bus); > + > +#endif >