From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43745) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YD5Lw-0007sM-Br for qemu-devel@nongnu.org; Mon, 19 Jan 2015 00:58:24 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YD5Lr-00029J-1t for qemu-devel@nongnu.org; Mon, 19 Jan 2015 00:58:20 -0500 Date: Mon, 19 Jan 2015 16:58:28 +1100 From: David Gibson Message-ID: <20150119055828.GZ5297@voom.fritz.box> References: <1419337831-16552-1-git-send-email-mdroth@linux.vnet.ibm.com> <1419337831-16552-17-git-send-email-mdroth@linux.vnet.ibm.com> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="+HwY7O+GdaLKMYOv" Content-Disposition: inline In-Reply-To: <1419337831-16552-17-git-send-email-mdroth@linux.vnet.ibm.com> Subject: Re: [Qemu-devel] [PATCH v4 16/17] spapr_pci: enable basic hotplug operations List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Michael Roth Cc: aik@ozlabs.ru, qemu-devel@nongnu.org, agraf@suse.de, ncmike@ncultra.org, qemu-ppc@nongnu.org, tyreld@linux.vnet.ibm.com, bharata.rao@gmail.com, nfont@linux.vnet.ibm.com --+HwY7O+GdaLKMYOv Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Tue, Dec 23, 2014 at 06:30:30AM -0600, Michael Roth wrote: > This enables hotplug for PHB bridges. Upon hotplug we generate the > OF-nodes required by PAPR specification and IEEE 1275-1994 > "PCI Bus Binding to Open Firmware" for the device. >=20 > We associate the corresponding FDT for these nodes with the DrcEntry > corresponding to the slot, which will be fetched via > ibm,configure-connector RTAS calls by the guest as described by PAPR > specification. The FDT is cleaned up in the case of unplug. >=20 > Signed-off-by: Michael Roth > --- > hw/ppc/spapr_pci.c | 268 +++++++++++++++++++++++++++++++++++++++++++++++= ++---- > 1 file changed, 249 insertions(+), 19 deletions(-) >=20 > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index a5d7791..94e33b4 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -33,6 +33,7 @@ > #include > #include "trace.h" > #include "qemu/error-report.h" > +#include "qapi/qmp/qerror.h" > =20 > #include "hw/pci/pci_bus.h" > =20 > @@ -51,6 +52,15 @@ > =20 > #include "hw/ppc/spapr_drc.h" > =20 > +#define FDT_MAX_SIZE 0x10000 > +#define _FDT(exp) \ > + do { \ > + int ret =3D (exp); \ > + if (ret < 0) { \ > + return ret; \ > + } \ > + } while (0) > + > static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid) > { > sPAPRPHBState *sphb; > @@ -483,6 +493,237 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bu= s, void *opaque, int devfn) > return &phb->iommu_as; > } > =20 > +/* Macros to operate with address in OF binding to PCI */ > +#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) > +#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ > +#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ > +#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ > +#define b_ss(x) b_x((x), 24, 2) /* the space code */ > +#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ > +#define b_ddddd(x) b_x((x), 11, 5) /* device number */ > +#define b_fff(x) b_x((x), 8, 3) /* function number */ > +#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ > + > +/* for 'reg'/'assigned-addresses' OF properties */ > +#define RESOURCE_CELLS_SIZE 2 > +#define RESOURCE_CELLS_ADDRESS 3 > +#define RESOURCE_CELLS_TOTAL \ > + (RESOURCE_CELLS_SIZE + RESOURCE_CELLS_ADDRESS) > + > +static void fill_resource_props(PCIDevice *d, int bus_num, > + uint32_t *reg, int *reg_size, > + uint32_t *assigned, int *assigned_size) This is another interface which writes to a buffer without any size limit information being passed through, which makes me nervous. > +{ > + uint32_t *reg_row, *assigned_row; > + uint32_t dev_id =3D (b_bbbbbbbb(bus_num) | > + b_ddddd(PCI_SLOT(d->devfn)) | > + b_fff(PCI_FUNC(d->devfn))); > + int i, idx =3D 0; > + > + reg[0] =3D cpu_to_be32(dev_id); > + > + for (i =3D 0; i < PCI_NUM_REGIONS; i++) { > + if (!d->io_regions[i].size) { > + continue; > + } > + reg_row =3D ®[(idx + 1) * RESOURCE_CELLS_TOTAL]; > + assigned_row =3D &assigned[idx * RESOURCE_CELLS_TOTAL]; > + reg_row[0] =3D cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); > + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { > + reg_row[0] |=3D cpu_to_be32(b_ss(1)); > + } else { > + reg_row[0] |=3D cpu_to_be32(b_ss(2)); > + } > + assigned_row[0] =3D cpu_to_be32(reg_row[0] | b_n(1)); > + assigned_row[3] =3D reg_row[3] =3D cpu_to_be32(d->io_regions[i].= size >> 32); > + assigned_row[4] =3D reg_row[4] =3D cpu_to_be32(d->io_regions[i].= size); > + assigned_row[1] =3D cpu_to_be32(d->io_regions[i].addr >> 32); > + assigned_row[2] =3D cpu_to_be32(d->io_regions[i].addr); You don't appear to ever fill in reg_row[1] and reg_row[2]. > + idx++; > + } > + > + *reg_size =3D (idx + 1) * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); > + *assigned_size =3D idx * RESOURCE_CELLS_TOTAL * sizeof(uint32_t); > +} > + > +static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int of= fset, > + int phb_index, int drc_index) > +{ > + int slot =3D PCI_SLOT(dev->devfn); > + char slotname[16]; > + bool is_bridge =3D 1; Should use the true and false macros for a bool type, not 0 and 1. > + uint32_t reg[RESOURCE_CELLS_TOTAL * 8] =3D { 0 }; > + uint32_t assigned[RESOURCE_CELLS_TOTAL * 8] =3D { 0 }; > + int pci_status, reg_size, assigned_size; > + > + if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) =3D=3D > + PCI_HEADER_TYPE_NORMAL) { > + is_bridge =3D 0; > + } > + > + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", > + pci_default_read_config(dev, PCI_VENDOR_ID, 2)= )); > + _FDT(fdt_setprop_cell(fdt, offset, "device-id", > + pci_default_read_config(dev, PCI_DEVICE_ID, 2)= )); > + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", > + pci_default_read_config(dev, PCI_REVISION_ID, = 1))); > + _FDT(fdt_setprop_cell(fdt, offset, "class-code", > + pci_default_read_config(dev, PCI_CLASS_DEVICE,= 2) << 8)); > + > + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", > + pci_default_read_config(dev, PCI_INTERRUPT_PIN= , 1))); > + > + /* if this device is NOT a bridge */ > + if (!is_bridge) { > + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", > + pci_default_read_config(dev, PCI_MIN_GNT, 1))); > + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", > + pci_default_read_config(dev, PCI_MAX_LAT, 1))); > + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", > + pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); > + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", > + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); > + } > + > + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", > + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); > + > + /* the following fdt cells are masked off the pci status register */ > + pci_status =3D pci_default_read_config(dev, PCI_STATUS, 2); > + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", > + PCI_STATUS_DEVSEL_MASK & pci_status)); > + _FDT(fdt_setprop_cell(fdt, offset, "fast-back-to-back", > + PCI_STATUS_FAST_BACK & pci_status)); > + _FDT(fdt_setprop_cell(fdt, offset, "66mhz-capable", > + PCI_STATUS_66MHZ & pci_status)); > + _FDT(fdt_setprop_cell(fdt, offset, "udf-supported", > + PCI_STATUS_UDF & pci_status)); These aren't quite right. According to the OF PCI binding these are boolean properties encoded in the usual way, which is to say absent for false and present-but-empty for true. They shouldn't contain an actual value. > + > + _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); > + sprintf(slotname, "Slot %d", slot + phb_index * PCI_SLOT_MAX); > + _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", slotname, strlen(slotn= ame))); > + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); > + > + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", > + RESOURCE_CELLS_ADDRESS)); > + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", > + RESOURCE_CELLS_SIZE)); > + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", > + RESOURCE_CELLS_SIZE)); > + fill_resource_props(dev, phb_index, reg, ®_size, > + assigned, &assigned_size); > + _FDT(fdt_setprop(fdt, offset, "reg", reg, reg_size)); > + _FDT(fdt_setprop(fdt, offset, "assigned-addresses", > + assigned, assigned_size)); > + > + return 0; > +} > + > +/* create OF node for pci device and required OF DT properties */ > +static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *de= v, > + int drc_index, int *dt_offset) > +{ > + void *fdt_orig, *fdt; > + int offset, ret; > + int slot =3D PCI_SLOT(dev->devfn); > + char nodename[512]; > + > + fdt_orig =3D g_malloc0(FDT_MAX_SIZE); > + offset =3D fdt_create(fdt_orig, FDT_MAX_SIZE); > + fdt_begin_node(fdt_orig, ""); > + fdt_end_node(fdt_orig); > + fdt_finish(fdt_orig); Recent versions of libfdt have an fdt_create_empty_tree() function to simplify that standard idiom. > + fdt =3D g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); There's no need for a second malloc here - fdt_open_into() may be used in place. > + sprintf(nodename, "pci@%d", slot); > + offset =3D fdt_add_subnode(fdt, 0, nodename); > + ret =3D spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, dr= c_index); > + g_assert(!ret); > + g_free(fdt_orig); > + > + *dt_offset =3D offset; > + return fdt; > +} > + > +static void spapr_device_hotplug_add(sPAPRDRConnector *drc, > + sPAPRPHBState *phb, > + PCIDevice *pdev) > +{ > + sPAPRDRConnectorClass *drck =3D SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + DeviceState *dev =3D DEVICE(pdev); > + int drc_index =3D drck->get_index(drc); > + void *fdt =3D NULL; > + int fdt_start_offset =3D 0; > + > + /* boot-time devices get their device tree node created by SLOF, but= for > + * hotplugged devices we need QEMU to generate it so the guest can f= etch > + * it via RTAS Now that we have to have this code in qemu for the hotplug case we may want to consider using it for boot-time devices as well, and removing the corresponding code from SLOF, but that's a problem for another day. > + */ > + if (dev->hotplugged) { > + fdt =3D spapr_create_pci_child_dt(phb, pdev, drc_index, > + &fdt_start_offset); > + } > + drck->attach(drc, DEVICE(pdev), fdt, fdt_start_offset, !dev->hotplug= ged); > +} > + > +static void spapr_device_hotplug_remove_cb(DeviceState *dev, void *opaqu= e) > +{ > + object_unparent(OBJECT(dev)); > +} > + > +static void spapr_device_hotplug_remove(sPAPRDRConnector *drc, > + sPAPRPHBState *phb, > + PCIDevice *pdev) > +{ > + sPAPRDRConnectorClass *drck =3D SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + > + drck->detach(drc, DEVICE(pdev), spapr_device_hotplug_remove_cb, phb); > +} > + > +static void spapr_phb_hot_plug(HotplugHandler *plug_handler, > + DeviceState *plugged_dev, Error **errp) So, this function is hotplugging a PCI device into an existing PHB, rather than hotplugging a PHB itself. Since the DR protocol does support both operations, I could see this name becoming confusing. > +{ > + sPAPRPHBState *phb =3D SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); > + PCIDevice *pdev =3D PCI_DEVICE(plugged_dev); > + sPAPRDRConnector *drc =3D > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI, pdev->devf= n); Is it safe to call this before checking phb->dr_enabled? > + /* if DR is disabled we don't need to do anything in the case of > + * hotplug or coldplug callbacks > + */ > + if (!phb->dr_enabled) { > + /* if this is a hotplug operation initiated by the user > + * we need to let them know it's not enabled > + */ > + if (plugged_dev->hotplugged) { > + error_set(errp, QERR_BUS_NO_HOTPLUG, > + object_get_typename(OBJECT(phb))); > + } > + return; > + } > + > + g_assert(drc); > + spapr_device_hotplug_add(drc, phb, pdev); > +} > + > +static void spapr_phb_hot_unplug(HotplugHandler *plug_handler, > + DeviceState *plugged_dev, Error **errp) > +{ > + sPAPRPHBState *phb =3D SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); > + PCIDevice *pdev =3D PCI_DEVICE(plugged_dev); > + sPAPRDRConnector *drc =3D > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI, pdev->devf= n); > + > + if (!phb->dr_enabled) { > + error_set(errp, QERR_BUS_NO_HOTPLUG, > + object_get_typename(OBJECT(phb))); > + return; > + } > + > + spapr_device_hotplug_remove(drc, phb, pdev); > +} > + > static void spapr_phb_realize(DeviceState *dev, Error **errp) > { > SysBusDevice *s =3D SYS_BUS_DEVICE(dev); > @@ -570,6 +811,7 @@ static void spapr_phb_realize(DeviceState *dev, Error= **errp) > &sphb->memspace, &sphb->iospace, > PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); > phb->bus =3D bus; > + qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL); > =20 > /* > * Initialize PHB address space. > @@ -806,6 +1048,7 @@ static void spapr_phb_class_init(ObjectClass *klass,= void *data) > PCIHostBridgeClass *hc =3D PCI_HOST_BRIDGE_CLASS(klass); > DeviceClass *dc =3D DEVICE_CLASS(klass); > sPAPRPHBClass *spc =3D SPAPR_PCI_HOST_BRIDGE_CLASS(klass); > + HotplugHandlerClass *hp =3D HOTPLUG_HANDLER_CLASS(klass); > =20 > hc->root_bus_path =3D spapr_phb_root_bus_path; > dc->realize =3D spapr_phb_realize; > @@ -815,6 +1058,8 @@ static void spapr_phb_class_init(ObjectClass *klass,= void *data) > set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); > dc->cannot_instantiate_with_device_add_yet =3D false; > spc->finish_realize =3D spapr_phb_finish_realize; > + hp->plug =3D spapr_phb_hot_plug; > + hp->unplug =3D spapr_phb_hot_unplug; > } > =20 > static const TypeInfo spapr_phb_info =3D { > @@ -823,6 +1068,10 @@ static const TypeInfo spapr_phb_info =3D { > .instance_size =3D sizeof(sPAPRPHBState), > .class_init =3D spapr_phb_class_init, > .class_size =3D sizeof(sPAPRPHBClass), > + .interfaces =3D (InterfaceInfo[]) { > + { TYPE_HOTPLUG_HANDLER }, > + { } > + } > }; > =20 > PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) > @@ -836,17 +1085,6 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *sp= apr, int index) > return PCI_HOST_BRIDGE(dev); > } > =20 > -/* Macros to operate with address in OF binding to PCI */ > -#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) > -#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ > -#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ > -#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ > -#define b_ss(x) b_x((x), 24, 2) /* the space code */ > -#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ > -#define b_ddddd(x) b_x((x), 11, 5) /* device number */ > -#define b_fff(x) b_x((x), 8, 3) /* function number */ > -#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ > - > typedef struct sPAPRTCEDT { > void *fdt; > int node_off; > @@ -906,14 +1144,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > return bus_off; > } > =20 > -#define _FDT(exp) \ > - do { \ > - int ret =3D (exp); \ > - if (ret < 0) { \ > - return ret; \ > - } \ > - } while (0) > - > /* Write PHB properties */ > _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); > _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB= ")); --=20 David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson --+HwY7O+GdaLKMYOv Content-Type: application/pgp-signature -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAEBAgAGBQJUvJ0DAAoJEGw4ysog2bOSewcP/ieFCJB01m6VaC9fMsU0Q89d IaJMK2WgzXDfJRAnc+agYMRmbZsUE4t2CPbbL2FzEdUor0M+TD0/1yF92/ChNT4W KNB4S0vNZpCVz/VDdBOWaMzya4yxqOzXh9R4oDtO6+EKp7BSHR3z40jzhHArn8i/ ShklbesP4kP4GvNkUG/6K/xwbzE+8tYCA8SBnTzjN7iVL+1UDdNzYYi/5i9FqAVJ 8SesMpBPvrWF0aSS9E0OUmmuhe62bhoklXPCPMziNpby4wslkAMw6+v85j5mfF6m zxuzSP1v3Hbre+HMS1xl69grG8rizRf6f5vp9hSD95AE2YtUwg9gvMvZX/DyifLI Ga6oc5xo62mA4X2qbSXtqFnVY+BcLnMPIH8ZtkwzGaq0UvcPnxSzdaeOKKKfSk8H qhOFhs0pKvaQMSFkm3yIIHDgR6L736XMiTNJGS3PEHsBN84fldBTB0MlegeSCvaU N1VoBpxoWRw+wcQGtJb2CLWP1yqLFOL87Jozfk+6HY/NILpqPCKP7gP3XrKOhYBY CYC/OSdO4IlCPsYePMEKOBj2cBJgNKIC40IWD9aZF4lbQzARYFzJjbVq63YsG0OQ JCqNjfu4ja+d+/y5j5rCfVkpmyT6cNiI2XtNF/KDRQvjR/WgnHdtTGFNIdsblZ1l oMjch8YsL1PQXdqOJvpl =rUaw -----END PGP SIGNATURE----- --+HwY7O+GdaLKMYOv--