From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1Mv6Qr-00076N-Oh for qemu-devel@nongnu.org; Tue, 06 Oct 2009 05:34:09 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1Mv6Qo-00074w-E9 for qemu-devel@nongnu.org; Tue, 06 Oct 2009 05:34:09 -0400 Received: from [199.232.76.173] (port=43224 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Mv6Qn-00074m-KP for qemu-devel@nongnu.org; Tue, 06 Oct 2009 05:34:05 -0400 Received: from mx1.redhat.com ([209.132.183.28]:46427) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1Mv6Qm-0007Zr-R7 for qemu-devel@nongnu.org; Tue, 06 Oct 2009 05:34:05 -0400 Date: Tue, 6 Oct 2009 11:32:01 +0200 From: "Michael S. Tsirkin" Subject: Re: [Qemu-devel] [PATCH 9/9] [RFC] pci: pcie host and mmcfg support. Message-ID: <20091006093201.GA8854@redhat.com> References: <1247656509-32227-1-git-send-email-yamahata@valinux.co.jp> <1247656509-32227-10-git-send-email-yamahata@valinux.co.jp> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1247656509-32227-10-git-send-email-yamahata@valinux.co.jp> List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Isaku Yamahata Cc: qemu-devel@nongnu.org On Wed, Jul 15, 2009 at 08:15:09PM +0900, Isaku Yamahata wrote: > This patch adds common routines for pcie host bridge and pcie mmcfg. > This will be used by q35 based chipset emulation. > > Signed-off-by: Isaku Yamahata > --- > hw/pci.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++------- > hw/pci.h | 29 ++++++- > hw/pci_host.h | 16 ++++ > 3 files changed, 239 insertions(+), 32 deletions(-) > > diff --git a/hw/pci.c b/hw/pci.c > index 3b19c3d..eb761ce 100644 > --- a/hw/pci.c > +++ b/hw/pci.c > @@ -23,6 +23,7 @@ > */ > #include "hw.h" > #include "pci.h" > +#include "pci_host.h" > #include "monitor.h" > #include "net.h" > #include "sysemu.h" > @@ -166,27 +167,32 @@ int pci_bus_num(PCIBus *s) > void pci_device_save(PCIDevice *s, QEMUFile *f) > { > int i; > + uint32_t config_size = pcie_config_size(s); > > qemu_put_be32(f, 2); /* PCI device version */ > - qemu_put_buffer(f, s->config, 256); > + qemu_put_buffer(f, s->config, config_size); > for (i = 0; i < 4; i++) > qemu_put_be32(f, s->irq_state[i]); > } > > int pci_device_load(PCIDevice *s, QEMUFile *f) > { > - uint8_t config[PCI_CONFIG_SPACE_SIZE]; > + uint32_t config_size = pcie_config_size(s); > + uint8_t *config; > uint32_t version_id; > int i; > > version_id = qemu_get_be32(f); > if (version_id > 2) > return -EINVAL; > - qemu_get_buffer(f, config, sizeof config); > - for (i = 0; i < sizeof config; ++i) > + > + config = qemu_malloc(config_size); > + qemu_get_buffer(f, config, config_size); > + for (i = 0; i < config_size; ++i) > if ((config[i] ^ s->config[i]) & s->cmask[i] & ~s->wmask[i]) > return -EINVAL; > - memcpy(s->config, config, sizeof config); > + memcpy(s->config, config, config_size); > + qemu_free(config); > > pci_update_mappings(s); > > @@ -302,14 +308,31 @@ static void pci_init_cmask(PCIDevice *dev) > static void pci_init_wmask(PCIDevice *dev) > { > int i; > + uint32_t config_size = pcie_config_size(dev); > + > dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff; > dev->wmask[PCI_INTERRUPT_LINE] = 0xff; > dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY > | PCI_COMMAND_MASTER; > - for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) > + for (i = PCI_CONFIG_HEADER_SIZE; i < config_size; ++i) > dev->wmask[i] = 0xff; > } > > +static void pci_config_init(PCIDevice *pci_dev) > +{ > + int config_size = pcie_config_size(pci_dev); > +#define PCI_CONFIG_ALLOC(d, member, size) \ > + do { \ > + (d)->member = \ > + (typeof((d)->member))qemu_mallocz(sizeof((d)->member[0]) * \ > + size); \ > + } while (0) > + PCI_CONFIG_ALLOC(pci_dev, config, config_size); > + PCI_CONFIG_ALLOC(pci_dev, cmask, config_size); > + PCI_CONFIG_ALLOC(pci_dev, wmask, config_size); > + PCI_CONFIG_ALLOC(pci_dev, used, config_size); > +} > + > /* -1 for devfn means auto assign */ > static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, > const char *name, int devfn, > @@ -330,6 +353,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, > pci_dev->devfn = devfn; > pstrcpy(pci_dev->name, sizeof(pci_dev->name), name); > memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state)); > + pci_config_init(pci_dev); > pci_set_default_subsystem_id(pci_dev); > pci_init_cmask(pci_dev); > pci_init_wmask(pci_dev); > @@ -531,40 +555,48 @@ static void pci_update_mappings(PCIDevice *d) > } > } > > +static uint8_t pcie_config_get_byte(PCIDevice *d, uint32_t addr) We don't need these wrappers. From software perspective pci express looks just like pci: little endian, so use pci_get_byte. > +{ > + uint8_t *conf = &d->config[addr]; > + if (conf != NULL) Please just write if (conf) But in this case: conf can not ever be NULL, can it? And if it's NULL, device was not initialized yet, so returning 0 does not make sense either? > + return *conf; > + return 0; > +} > + > +static uint32_t pcie_config_get(PCIDevice *d, uint32_t addr, int len) > +{ > + int i; > + union { > + uint8_t val8[4]; > + uint32_t val32; > + } v = { .val32 = 0 }; > + > + for (i = 0; i < len; i++) { > + v.val8[i] = pcie_config_get_byte(d, addr + i); > + } > + > + return le32_to_cpu(v.val32); > +} > + > uint32_t pci_default_read_config(PCIDevice *d, > uint32_t address, int len) > { > - uint32_t val; > + uint32_t config_size = pcie_config_size(d); > > - switch(len) { > - default: > - case 4: > - if (address <= 0xfc) { > - val = le32_to_cpu(*(uint32_t *)(d->config + address)); > - break; > - } > - /* fall through */ > - case 2: > - if (address <= 0xfe) { > - val = le16_to_cpu(*(uint16_t *)(d->config + address)); > - break; > - } > - /* fall through */ > - case 1: > - val = d->config[address]; > - break; > - } > - return val; > + assert(len == 1 || len == 2 || len == 4); > + len = MIN(len, config_size - address); > + return pcie_config_get(d, address, len); > } If you unwrap all the extra layers that were added, this will become even simpler: uint32_t pci_default_read_config(PCIDevice *d, uint32_t address, int len) { uint32_t val = 0; assert(len == 1 || len == 2 || len == 4); len = MIN(len, pcie_config_size(d) - address); memcpy(&val, d->config + addr, len); return le32_to_cpu(val); } which would be a good cleanup. > > void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) > { > uint8_t orig[PCI_CONFIG_SPACE_SIZE]; > int i; > + uint32_t config_size = pcie_config_size(d); > > /* not efficient, but simple */ > memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE); > - for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) { > + for(i = 0; i < l && addr < config_size; val >>= 8, ++i, ++addr) { > uint8_t wmask = d->wmask[addr]; > d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask); > } > @@ -660,6 +692,143 @@ uint32_t pci_data_read(void *opaque, uint32_t addr, int len) > pci_addr_to_dev(s, addr, &pci_dev, &config_addr); > return pci_dev_data_read(pci_dev, config_addr, len); > } > + > +static void pcie_mmcfg_addr_to_dev(PCIBus *s, uint32_t mmcfg_addr, > + PCIDevice **pci_dev, uint32_t *conf_addr) Can you document what this does please? And this probably should go into pcie-host.c - it's only used there. > +{ > + int bus_num; > + unsigned int dev; > + uint8_t func; > + > +#define PCIE_MASK(val, hi_bit, low_bit) \ > + (((val) & (((1ULL << (hi_bit)) - 1))) >> (low_bit)) > +#define PCIE_VAL(VAL, val) \ > + PCIE_MASK((val), PCIE_MMCFG_ ## VAL ## _HI, PCIE_MMCFG_ ## VAL ## _LOW) This is just too tricky, and HI is ambigous: is it the last bit, or the next on top of last? For the 4 users below, it's easier to just define mask and bit offset and open-code it. > +#define PCIE_MMCFG_BUS_HI 27 > +#define PCIE_MMCFG_BUS_LOW 20 > +#define PCIE_MMCFG_DEV_HI 19 > +#define PCIE_MMCFG_DEV_LOW 15 > +#define PCIE_MMCFG_FUNC_HI 14 > +#define PCIE_MMCFG_FUNC_LOW 12 Device+function come together, same as conventional pci: you can just define a macro to get both and avoid decoding them and then encoding back with PCI_DEVFN. > +#define PCIE_MMCFG_CONFADDR_HI 11 > +#define PCIE_MMCFG_CONFADDR_LOW 0 > +#define PCIE_MMCFG_BUS(addr) PCIE_VAL(BUS, (addr)) > +#define PCIE_MMCFG_DEV(addr) PCIE_VAL(DEV, (addr)) > +#define PCIE_MMCFG_FUNC(addr) PCIE_VAL(FUNC, (addr)) > +#define PCIE_MMCFG_CONFADDR(addr) PCIE_VAL(CONFADDR, (addr)) So you would have (if I understood the code correctly). #define PCIE_MMCFG_FUNC_BIT 12 #define PCIE_MMCFG_FUNC_MASK 0x7 #define PCIE_MMCFG_FUNC(addr) (((addr) >> PCIE_MMCFG_FUNC_BIT) & PCIE_MMCFG_FUNC_MASK) > + > + bus_num = PCIE_MMCFG_BUS(mmcfg_addr); > + dev = PCIE_MMCFG_DEV(mmcfg_addr); > + func = PCIE_MMCFG_FUNC(mmcfg_addr); > + *conf_addr = PCIE_MMCFG_CONFADDR(mmcfg_addr); > + > + *pci_dev = pci_bdf_to_dev(s, bus_num, PCI_DEVFN(dev, func)); > +} > + > +void pcie_data_write(void *opaque, uint32_t addr, uint32_t val, int len) > +{ > + PCIBus *s = opaque; > + PCIDevice *pci_dev; > + uint32_t config_addr; > + > +#if 0 > + PCI_DPRINTF("%s: addr=%08"PRIx32" val=%08"PRIx32" len=%d\n", > + __func__, addr, val, len); > +#endif > + pcie_mmcfg_addr_to_dev(s, addr, &pci_dev, &config_addr); > + pci_dev_data_write(pci_dev, config_addr, val, len); > +} > + > +uint32_t pcie_data_read(void *opaque, uint32_t addr, int len) > +{ > + PCIBus *s = opaque; > + PCIDevice *pci_dev; > + uint32_t config_addr; > + > + pcie_mmcfg_addr_to_dev(s, addr, &pci_dev, &config_addr); > + return pci_dev_data_read(pci_dev, config_addr, len); > +} > + > +#define DEFINE_PCIE_HOST_DATA_READ(len) \ > + static uint32_t pcie_host_data_read_ ## len ( \ > + void *opaque, target_phys_addr_t addr) \ > + { \ > + PCIExpressHost *e = (PCIExpressHost *)opaque; \ > + return pcie_data_read(e->pci.bus, \ > + addr - e->base_addr, (len)); \ > + } > + > +#define DEFINE_PCIE_HOST_DATA_WRITE(len) \ > + static void pcie_host_data_write_ ## len ( \ > + void *opaque, target_phys_addr_t addr, uint32_t value) \ > + { \ > + PCIExpressHost *e = (PCIExpressHost *)opaque; \ > + pcie_data_write(e->pci.bus, \ > + addr - e->base_addr, value, (len)); \ > + } > + > +#define DEFINE_PCIE_HOST_DATA_MMIO(len) \ > + DEFINE_PCIE_HOST_DATA_READ(len) \ > + DEFINE_PCIE_HOST_DATA_WRITE(len) > + > +DEFINE_PCIE_HOST_DATA_MMIO(1) > +DEFINE_PCIE_HOST_DATA_MMIO(2) > +DEFINE_PCIE_HOST_DATA_MMIO(4) > + > +#define DEFINE_PCIE_MEMORY_FUNCS(Type, type) \ > + static CPU ## Type ## MemoryFunc *pcie_host_data_ ## type [] = \ > + { \ > + &pcie_host_data_ ## type ## _1, \ > + &pcie_host_data_ ## type ## _2, \ > + &pcie_host_data_ ## type ## _4, \ > + }; > + > +DEFINE_PCIE_MEMORY_FUNCS(Read, read) > +DEFINE_PCIE_MEMORY_FUNCS(Write, write) > + > +PCIExpressHost *pcie_host_init(CPUReadMemoryFunc **mmcfg_read, > + CPUWriteMemoryFunc **mmcfg_write) > +{ > + PCIExpressHost *e; > + > + e = qemu_mallocz(sizeof(*e)); > + e->base_addr = PCIE_BASE_ADDR_INVALID; > + > + if (mmcfg_read == NULL) > + mmcfg_read = pcie_host_data_read; > + if (mmcfg_write == NULL) > + mmcfg_write = pcie_host_data_write; > + e->mmio_index = cpu_register_io_memory(mmcfg_read, mmcfg_write, e); > + if (e->mmio_index < 0) { > + qemu_free(e); > + return NULL; > + } > + > + return e; > +} > + > +void pcie_host_mmcfg_map(PCIExpressHost *e, > + target_phys_addr_t addr, int bus_num_order) > +{ > + int size_order = 20 + bus_num_order - 1; > + > + /* [(20 + n - 1):20] bit: 2^n bus where 1 <= n <= 8 */ > +#define PCIE_BUS_NUM_ORDER_MIN 1 > +#define PCIE_BUS_NUM_ORDER_MAX (PCIE_MMCFG_BUS_HI - PCIE_MMCFG_BUS_LOW + 1) > + assert(PCIE_BUS_NUM_ORDER_MIN <= bus_num_order); > + assert(bus_num_order <= PCIE_BUS_NUM_ORDER_MAX); > + assert((addr & ((1ULL << size_order) - 1)) == 0); > + > + if (e->base_addr != PCIE_BASE_ADDR_INVALID) { > + cpu_register_physical_memory(e->base_addr, e->size, IO_MEM_UNASSIGNED); > + } > + > + e->base_addr = addr; > + e->size = 1ULL << size_order; > + e->bus_num_order = bus_num_order; > + cpu_register_physical_memory(e->base_addr, e->size, e->mmio_index); > +} > + > /***********************************************************/ > /* generic PCI irq support */ > > @@ -991,9 +1160,10 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name) > > static int pci_find_space(PCIDevice *pdev, uint8_t size) > { > + int config_size = pcie_config_size(pdev); > int offset = PCI_CONFIG_HEADER_SIZE; > int i; > - for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) > + for (i = PCI_CONFIG_HEADER_SIZE; i < config_size; ++i) > if (pdev->used[i]) > offset = i + 1; > else if (i - offset + 1 == size) > diff --git a/hw/pci.h b/hw/pci.h > index 33e2ef2..ff8dbad 100644 > --- a/hw/pci.h > +++ b/hw/pci.h > @@ -170,20 +170,26 @@ enum { > QEMU_PCI_CAP_MSIX = 0x1, > }; > > +/* Size of the standart PCIe config space: 4KB */ > +#define PCIE_CONFIG_SPACE_SIZE 0x1000 > +#define PCIE_EXT_CONFIG_SPACE_SIZE \ > + (PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) > + > struct PCIDevice { > DeviceState qdev; > + > /* PCI config space */ > - uint8_t config[PCI_CONFIG_SPACE_SIZE]; > + uint8_t *config; > > /* Used to enable config checks on load. Note that writeable bits are > * never checked even if set in cmask. */ > - uint8_t cmask[PCI_CONFIG_SPACE_SIZE]; > + uint8_t *cmask; > > /* Used to implement R/W bytes */ > - uint8_t wmask[PCI_CONFIG_SPACE_SIZE]; > + uint8_t *wmask; > > /* Used to allocate config space for capabilities. */ > - uint8_t used[PCI_CONFIG_SPACE_SIZE]; > + uint8_t *used; > > /* the following fields are read only */ > PCIBus *bus; > @@ -257,6 +263,8 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char *default_model, > const char *default_devaddr); > void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len); > uint32_t pci_data_read(void *opaque, uint32_t addr, int len); > +void pcie_data_write(void *opaque, uint32_t addr, uint32_t val, int len); > +uint32_t pcie_data_read(void *opaque, uint32_t addr, int len); > int pci_bus_num(PCIBus *s); > void pci_for_each_device(int bus_num, void (*fn)(PCIDevice *d)); > PCIBus *pci_find_bus(int bus_num); > @@ -329,6 +337,9 @@ typedef struct { > pci_qdev_initfn init; > PCIConfigReadFunc *config_read; > PCIConfigWriteFunc *config_write; > + > + /* pcie stuff */ > + int pcie; Just add a bit in cap_present field. > } PCIDeviceInfo; > > void pci_qdev_register(PCIDeviceInfo *info); > @@ -337,6 +348,16 @@ void pci_qdev_register_many(PCIDeviceInfo *info); > DeviceState *pci_create(const char *name, const char *devaddr); > PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name); > > +static inline int pci_is_pcie(PCIDevice *d) > +{ > + return container_of(d->qdev.info, PCIDeviceInfo, qdev)->pcie; > +} > + > +static inline uint32_t pcie_config_size(PCIDevice *d) > +{ > + return pci_is_pcie(d)? PCIE_CONFIG_SPACE_SIZE: PCI_CONFIG_SPACE_SIZE; space before ? and before : > +} > + rename to pci_config_size? See also my previous mail how we can keep the same size for PCI and express and get rid of this completely. > /* lsi53c895a.c */ > #define LSI_MAX_DEVS 7 > void lsi_scsi_attach(DeviceState *host, BlockDriverState *bd, int id); > diff --git a/hw/pci_host.h b/hw/pci_host.h > index 9f272a7..84e3b08 100644 > --- a/hw/pci_host.h > +++ b/hw/pci_host.h > @@ -36,4 +36,20 @@ typedef struct { > PCIBus *bus; > } PCIHostState; > > +typedef struct { > + PCIHostState pci; > + > + /* express part */ Document these fields? > + target_phys_addr_t base_addr; > +#define PCIE_BASE_ADDR_INVALID ((target_phys_addr_t)-1ULL) What's this? > + target_phys_addr_t size; > + int bus_num_order; > + int mmio_index; > +} PCIExpressHost; > + > +PCIExpressHost *pcie_host_init(CPUReadMemoryFunc **mmcfg_read, > + CPUWriteMemoryFunc **mmcfg_write); > +void pcie_host_mmcfg_map(PCIExpressHost *e, > + target_phys_addr_t addr, int bus_num_order); > + > #endif /* PCI_HOST_H */ > -- > 1.6.0.2 > >