From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43045) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a9fDX-0004lF-DE for qemu-devel@nongnu.org; Thu, 17 Dec 2015 15:32:04 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1a9fDU-0007Tw-2i for qemu-devel@nongnu.org; Thu, 17 Dec 2015 15:32:03 -0500 Received: from mx1.redhat.com ([209.132.183.28]:41364) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a9fDT-0007Tq-SB for qemu-devel@nongnu.org; Thu, 17 Dec 2015 15:32:00 -0500 Message-ID: <1450384318.2674.110.camel@redhat.com> From: Alex Williamson Date: Thu, 17 Dec 2015 13:31:58 -0700 In-Reply-To: <35d75210aef922bd049d93e0c8dbe13a2b5b1dd3.1447748073.git.chen.fan.fnst@cn.fujitsu.com> References: <35d75210aef922bd049d93e0c8dbe13a2b5b1dd3.1447748073.git.chen.fan.fnst@cn.fujitsu.com> Content-Type: text/plain; charset="UTF-8" Mime-Version: 1.0 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Cao jin , qemu-devel@nongnu.org Cc: chen.fan.fnst@cn.fujitsu.com, mst@redhat.com On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote: > From: Chen Fan >=20 > Particularly, For vfio devices, Once need to recovery devices > by bus reset such as AER, we always need to reset the host bus > to recovery the devices under the bus, so we need to add pci device > callbacks to specify to do host bus reset. >=20 > Signed-off-by: Chen Fan > Reviewed-by: Michael S. Tsirkin > --- > =C2=A0hw/pci/pci.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= | 18 ++++++++++++++++++ > =C2=A0hw/pci/pci_bridge.c=C2=A0=C2=A0|=C2=A0=C2=A09 +++++++++ > =C2=A0hw/vfio/pci.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0| 26= ++++++++++++++++++++++++++ > =C2=A0hw/vfio/pci.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2= =A0=C2=A02 ++ > =C2=A0include/hw/pci/pci.h |=C2=A0=C2=A07 +++++++ > =C2=A05 files changed, 62 insertions(+) >=20 > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index f6ca6ef..64fa2cc 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev) > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0msix_reset(dev); > =C2=A0} > =C2=A0 > +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused) > +{ > +=C2=A0=C2=A0=C2=A0=C2=A0PCIDeviceClass *dc =3D PCI_DEVICE_GET_CLASS(de= v); > + > +=C2=A0=C2=A0=C2=A0=C2=A0if (dc->pre_reset) { > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0dc->pre_reset(dev); > +=C2=A0=C2=A0=C2=A0=C2=A0} > +} > + > +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void > *unused) > +{ > +=C2=A0=C2=A0=C2=A0=C2=A0PCIDeviceClass *dc =3D PCI_DEVICE_GET_CLASS(de= v); > + > +=C2=A0=C2=A0=C2=A0=C2=A0if (dc->post_reset) { > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0dc->post_reset(dev); > +=C2=A0=C2=A0=C2=A0=C2=A0} > +} > + > =C2=A0/* > =C2=A0 * This function is called on #RST and FLR. > =C2=A0 * FLR if PCI_EXP_DEVCTL_BCR_FLR is set > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c > index 40c97b1..ddb76ab 100644 > --- a/hw/pci/pci_bridge.c > +++ b/hw/pci/pci_bridge.c > @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d, > =C2=A0 > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0newctl =3D pci_get_word(d->config + PCI_B= RIDGE_CONTROL); > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS= _RESET) { > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0/* > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0* Notify all vfi= o-pci devices under the bus > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0* should do phys= ical bus reset. > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0*/ > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIBus *sec_bus =3D pc= i_bridge_get_sec_bus(s); > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pci_for_each_device(se= c_bus, pci_bus_num(sec_bus), > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2= =A0=C2=A0=C2=A0=C2=A0pci_device_pre_reset, NULL); > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0/* Trigger hot re= set on 0->1 transition. */ > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0qbus_reset_all(&s= ->sec_bus.qbus); > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pci_for_each_device(se= c_bus, pci_bus_num(sec_bus), > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2= =A0=C2=A0=C2=A0=C2=A0pci_device_post_reset, NULL); > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0} > =C2=A0} > =C2=A0 > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index e17dc89..df32618 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -39,6 +39,7 @@ > =C2=A0 > =C2=A0static void vfio_disable_interrupts(VFIOPCIDevice *vdev); > =C2=A0static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool > enabled); > +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single); > =C2=A0 > =C2=A0/* > =C2=A0 * Disabling BAR mmaping can be slow, but toggling it around INTx > can > @@ -1888,6 +1889,8 @@ static int > vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0/* List all affected devices by bus reset= */ > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0devices =3D &info->devices[0]; > =C2=A0 > +=C2=A0=C2=A0=C2=A0=C2=A0vdev->single_depend_dev =3D (info->count =3D=3D= 1); > + > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0/* Verify that we have all the groups req= uired */ > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0for (i =3D 0; i < info->count; i++) { > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIHostDeviceAddr= ess host; > @@ -2029,10 +2032,26 @@ static int > vfio_check_bus_reset(NotifierWithReturn *n, void *opaque) > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0return vfio_check_host_bus_reset(vdev); > =C2=A0} > =C2=A0 > +static void vfio_aer_pre_reset(PCIDevice *pdev) > +{ > +=C2=A0=C2=A0=C2=A0=C2=A0VFIOPCIDevice *vdev =3D DO_UPCAST(VFIOPCIDevic= e, pdev, pdev); > + > +=C2=A0=C2=A0=C2=A0=C2=A0vdev->aer_reset =3D true; > +=C2=A0=C2=A0=C2=A0=C2=A0vfio_pci_hot_reset(vdev, vdev->single_depend_d= ev); > +} Doesn't this lead to multiple host bus resets per guest bus reset in many cases? =C2=A0It looks like we'll do it once per vfio-pci device, eve= n if those devices are on the same host bus. =C2=A0That's a 1 second operat= ion per device. =C2=A0Can we avoid that? =C2=A0Maybe some sort of sequence ID= could help a device figure out whether it's already been reset as part of a dependent device for this particular guest bus reset. =C2=A0Thanks, Alex > + > +static void vfio_aer_post_reset(PCIDevice *pdev) > +{ > +=C2=A0=C2=A0=C2=A0=C2=A0VFIOPCIDevice *vdev =3D DO_UPCAST(VFIOPCIDevic= e, pdev, pdev); > + > +=C2=A0=C2=A0=C2=A0=C2=A0vdev->aer_reset =3D false; > +} > + > =C2=A0static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2= =A0=C2=A0=C2=A0int pos, uint16_t size) > =C2=A0{ > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIDevice *pdev =3D &vdev->pdev; > +=C2=A0=C2=A0=C2=A0=C2=A0PCIDeviceClass *dc =3D PCI_DEVICE_GET_CLASS(pd= ev); > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIDevice *dev_iter; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0uint8_t type; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0uint32_t errcap; > @@ -2079,6 +2098,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, > uint8_t cap_ver, > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0vdev->hotplug_notifier.notify =3D vfio_ch= eck_bus_reset; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pci_bus_add_hotplug_notifier(pdev->bus, &= vdev- > >hotplug_notifier); > =C2=A0 > +=C2=A0=C2=A0=C2=A0=C2=A0dc->pre_reset =3D vfio_aer_pre_reset; > +=C2=A0=C2=A0=C2=A0=C2=A0dc->post_reset =3D vfio_aer_post_reset; > + > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pcie_cap_deverr_init(pdev); > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0return pcie_aer_init(pdev, pos, size); > =C2=A0 > @@ -2978,6 +3000,10 @@ static void vfio_pci_reset(DeviceState *dev) > =C2=A0 > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0trace_vfio_pci_reset(vdev->vbasedev.name)= ; > =C2=A0 > +=C2=A0=C2=A0=C2=A0=C2=A0if (vdev->aer_reset) { > +=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0return; > +=C2=A0=C2=A0=C2=A0=C2=A0} > + > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0vfio_pci_pre_reset(vdev); > =C2=A0 > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0if (vdev->resetfn && !vdev->resetfn(vdev)= ) { > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index b385f07..5470b97 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice { > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0bool no_kvm_intx; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0bool no_kvm_msi; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0bool no_kvm_msix; > +=C2=A0=C2=A0=C2=A0=C2=A0bool aer_reset; > +=C2=A0=C2=A0=C2=A0=C2=A0bool single_depend_dev; > =C2=A0 > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0NotifierWithReturn hotplug_notifier; > =C2=A0} VFIOPCIDevice; > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index 379b6e1..6b1f2d4 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice > *pci_dev, int region_num, > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2= =A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pcibus_t addr, pcibus_= t size, int > type); > =C2=A0typedef void PCIUnregisterFunc(PCIDevice *pci_dev); > =C2=A0 > +typedef void PCIPreResetFunc(PCIDevice *pci_dev); > +typedef void PCIPostResetFunc(PCIDevice *pci_dev); > + > =C2=A0typedef struct PCIIORegion { > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0pcibus_t addr; /* current PCI mapping add= ress. -1 means not > mapped */ > =C2=A0#define PCI_BAR_UNMAPPED (~(pcibus_t)0) > @@ -193,6 +196,8 @@ typedef struct PCIDeviceClass { > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIUnregisterFunc *exit; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIConfigReadFunc *config_read; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIConfigWriteFunc *config_write; > +=C2=A0=C2=A0=C2=A0=C2=A0PCIPreResetFunc *pre_reset; > +=C2=A0=C2=A0=C2=A0=C2=A0PCIPostResetFunc *post_reset; > =C2=A0 > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0uint16_t vendor_id; > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0uint16_t device_id; > @@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old, > PCIINTxRoute *new); > =C2=A0void pci_bus_fire_intx_routing_notifier(PCIBus *bus); > =C2=A0void pci_device_set_intx_routing_notifier(PCIDevice *dev, > =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2= =A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0PCIINTxRoutingNotifier > notifier); > +void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque); > +void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque); > =C2=A0void pci_device_reset(PCIDevice *dev); > =C2=A0 > =C2=A0PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,