From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f54.google.com ([209.85.220.54]:34548 "EHLO mail-pa0-f54.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750703AbbKDFB5 (ORCPT ); Wed, 4 Nov 2015 00:01:57 -0500 Received: by padhx2 with SMTP id hx2so32896833pad.1 for ; Tue, 03 Nov 2015 21:01:57 -0800 (PST) Subject: Re: [PATCH V12 9/9] powerpc/eeh: Support error recovery for VF PE To: Wei Yang , gwshan@linux.vnet.ibm.com, bhelgaas@google.com, mpe@ellerman.id.au References: <1446607732-4166-1-git-send-email-weiyang@linux.vnet.ibm.com> <1446607732-4166-10-git-send-email-weiyang@linux.vnet.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org, linux-pci@vger.kernel.org From: Alexey Kardashevskiy Message-ID: <5639913E.4040000@ozlabs.ru> Date: Wed, 4 Nov 2015 16:01:50 +1100 MIME-Version: 1.0 In-Reply-To: <1446607732-4166-10-git-send-email-weiyang@linux.vnet.ibm.com> Content-Type: text/plain; charset=koi8-r; format=flowed Sender: linux-pci-owner@vger.kernel.org List-ID: On 11/04/2015 02:28 PM, Wei Yang wrote: > PFs are enumerated on PCI bus, while VFs are created by PF's driver. > > In EEH recovery, it has two cases: > 1. Device and driver is EEH aware, error handlers are called. > 2. Device and driver is not EEH aware, un-plug the device and plug it again > by enumerating it. > > The special thing happens on the second case. For a PF, we could use the > original pci core to enumerate the bus, while for VF we need to record the > VFs which aer un-plugged then plug it again. > > Also The patch caches the VF index in pci_dn, which can be used to > calculate VF's bus, device and function number. Those information helps to > locate the VF's PCI device instance when doing hotplug during EEH recovery > if necessary. > > Signed-off-by: Wei Yang > --- > arch/powerpc/include/asm/eeh.h | 7 ++ > arch/powerpc/include/asm/pci-bridge.h | 1 + > arch/powerpc/kernel/eeh.c | 8 +++ > arch/powerpc/kernel/eeh_dev.c | 1 + > arch/powerpc/kernel/eeh_driver.c | 127 +++++++++++++++++++++++++++------- > arch/powerpc/kernel/eeh_pe.c | 3 +- > arch/powerpc/kernel/pci_dn.c | 4 +- > 7 files changed, 123 insertions(+), 28 deletions(-) > > diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h > index 331c856..1f68190 100644 > --- a/arch/powerpc/include/asm/eeh.h > +++ b/arch/powerpc/include/asm/eeh.h > @@ -127,6 +127,11 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe) > #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ > #define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */ > > +struct eeh_rmv_data { > + struct list_head edev_list; > + int removed; > +}; This struct is only used in arch/powerpc/kernel/eeh_driver.c so move it there. > + > struct eeh_dev { > int mode; /* EEH mode */ > int class_code; /* Class code of the device */ > @@ -139,9 +144,11 @@ struct eeh_dev { > int af_cap; /* Saved AF capability */ > struct eeh_pe *pe; /* Associated PE */ > struct list_head list; /* Form link list in the PE */ > + struct list_head rmv_list; /* Record the removed edev */ > struct pci_controller *phb; /* Associated PHB */ > struct pci_dn *pdn; /* Associated PCI device node */ > struct pci_dev *pdev; /* Associated PCI device */ > + bool in_error; /* Error flag for eeh_dev */ > struct pci_dev *physfn; /* Associated PF PORT */ > struct pci_bus *bus; /* PCI bus for partial hotplug */ > }; > diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h > index 9b365d6..533e6e9 100644 > --- a/arch/powerpc/include/asm/pci-bridge.h > +++ b/arch/powerpc/include/asm/pci-bridge.h > @@ -211,6 +211,7 @@ struct pci_dn { > #define IODA_INVALID_PE (-1) > #ifdef CONFIG_PPC_POWERNV > int pe_number; > + int vf_index; /* VF index in the PF */ > #ifdef CONFIG_PCI_IOV > u16 vfs_expanded; /* number of VFs IOV BAR expanded */ > u16 num_vfs; /* number of VFs enabled*/ > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c > index 41a4b30..0f36750 100644 > --- a/arch/powerpc/kernel/eeh.c > +++ b/arch/powerpc/kernel/eeh.c > @@ -1245,6 +1245,14 @@ void eeh_remove_device(struct pci_dev *dev) > * from the parent PE during the BAR resotre. > */ > edev->pdev = NULL; > + > + /* > + * The flag "in_error" is used to trace EEH devices for VFs > + * in error state or not. It's set in eeh_report_error(). If > + * it's not set, eeh_report_{reset,resume}() won't be called > + * for the VF EEH device. > + */ > + edev->in_error = 0; It is a bool, so "= false". > dev->dev.archdata.edev = NULL; > if (!(edev->pe->state & EEH_PE_KEEP)) > eeh_rmv_from_parent_pe(edev); > diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c > index aabba94..7815095 100644 > --- a/arch/powerpc/kernel/eeh_dev.c > +++ b/arch/powerpc/kernel/eeh_dev.c > @@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) > edev->pdn = pdn; > edev->phb = phb; > INIT_LIST_HEAD(&edev->list); > + INIT_LIST_HEAD(&edev->rmv_list); > > return NULL; > } > diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c > index 89eb4bc..06d20d6 100644 > --- a/arch/powerpc/kernel/eeh_driver.c > +++ b/arch/powerpc/kernel/eeh_driver.c > @@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata) > if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; > if (*res == PCI_ERS_RESULT_NONE) *res = rc; > > + edev->in_error = true; > eeh_pcid_put(dev); > return NULL; > } > @@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata) > > if (!driver->err_handler || > !driver->err_handler->slot_reset || > - (edev->mode & EEH_DEV_NO_HANDLER)) { > + (edev->mode & EEH_DEV_NO_HANDLER) || > + (!edev->in_error)) { > eeh_pcid_put(dev); > return NULL; > } > @@ -326,6 +328,7 @@ static void *eeh_report_resume(void *data, void *userdata) > { > struct eeh_dev *edev = (struct eeh_dev *)data; > struct pci_dev *dev = eeh_dev_to_pci_dev(edev); > + bool was_in_error; > struct pci_driver *driver; > > if (!dev || eeh_dev_removed(edev)) > @@ -335,11 +338,13 @@ static void *eeh_report_resume(void *data, void *userdata) > driver = eeh_pcid_get(dev); > if (!driver) return NULL; > > + was_in_error = edev->in_error; > + edev->in_error = false; > eeh_enable_irq(dev); > > if (!driver->err_handler || > !driver->err_handler->resume || > - (edev->mode & EEH_DEV_NO_HANDLER)) { > + (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { > edev->mode &= ~EEH_DEV_NO_HANDLER; > eeh_pcid_put(dev); > return NULL; > @@ -386,12 +391,39 @@ static void *eeh_report_failure(void *data, void *userdata) > return NULL; > } > > +static void *eeh_add_virt_device(void *data, void *userdata) > +{ > + struct pci_driver *driver; > + struct eeh_dev *edev = (struct eeh_dev *)data; > + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); > + struct pci_dn *pdn = eeh_dev_to_pdn(edev); > + > + if (!(edev->physfn)) { > + pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", > + __func__, edev->phb->global_number, pdn->busno, > + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); > + return NULL; > + } > + > + driver = eeh_pcid_get(dev); > + if (driver) { > + eeh_pcid_put(dev); > + if (driver->err_handler) > + return NULL; > + } > + > + pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0); > + return NULL; > +} > + > static void *eeh_rmv_device(void *data, void *userdata) > { > struct pci_driver *driver; > struct eeh_dev *edev = (struct eeh_dev *)data; > struct pci_dev *dev = eeh_dev_to_pci_dev(edev); > - int *removed = (int *)userdata; > + struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; > + int *removed = rmv_data ? &rmv_data->removed : NULL; > + struct pci_dn *pdn = eeh_dev_to_pdn(edev); > > /* > * Actually, we should remove the PCI bridges as well. > @@ -416,7 +448,7 @@ static void *eeh_rmv_device(void *data, void *userdata) > driver = eeh_pcid_get(dev); > if (driver) { > eeh_pcid_put(dev); > - if (driver->err_handler) > + if (removed && driver->err_handler) > return NULL; > } > > @@ -425,11 +457,26 @@ static void *eeh_rmv_device(void *data, void *userdata) > pci_name(dev)); > edev->bus = dev->bus; > edev->mode |= EEH_DEV_DISCONNECTED; > - (*removed)++; > + if (removed) > + (*removed)++; > > - pci_lock_rescan_remove(); > - pci_stop_and_remove_bus_device(dev); > - pci_unlock_rescan_remove(); > + if (edev->physfn) { > + pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0); > + edev->pdev = NULL; > + > + /* > + * We have to set the VF PE number to invalid one, which is > + * required to plug the VF successfully. > + */ > + pdn->pe_number = IODA_INVALID_PE; > + > + if (rmv_data) > + list_add(&edev->rmv_list, &rmv_data->edev_list); > + } else { > + pci_lock_rescan_remove(); > + pci_stop_and_remove_bus_device(dev); > + pci_unlock_rescan_remove(); > + } > > return NULL; > } > @@ -543,11 +590,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) > * During the reset, udev might be invoked because those affected > * PCI devices will be removed and then added. > */ > -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) > +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, > + struct eeh_rmv_data *rmv_data) > { > struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); > struct timeval tstamp; > - int cnt, rc, removed = 0; > + int cnt, rc; > + struct eeh_dev *edev; > > /* pcibios will clear the counter; save the value */ > cnt = pe->freeze_count; > @@ -561,12 +610,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) > */ > eeh_pe_state_mark(pe, EEH_PE_KEEP); > if (bus) { > - pci_lock_rescan_remove(); > - pcibios_remove_pci_devices(bus); > - pci_unlock_rescan_remove(); > - } else if (frozen_bus) { > - eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); > - } > + if (pe->type & EEH_PE_VF) { > + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); > + } else { > + pci_lock_rescan_remove(); > + pcibios_remove_pci_devices(bus); > + pci_unlock_rescan_remove(); > + } > + } else if (frozen_bus) > + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); > > /* > * Reset the pci controller. (Asserts RST#; resets config space). > @@ -607,14 +659,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) > * PE. We should disconnect it so the binding can be > * rebuilt when adding PCI devices. > */ > + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); > eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); > - pcibios_add_pci_devices(bus); > - } else if (frozen_bus && removed) { > + if (pe->type & EEH_PE_VF) > + eeh_add_virt_device(edev, NULL); > + else > + pcibios_add_pci_devices(bus); > + } else if (frozen_bus && rmv_data->removed) { > pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); > ssleep(5); > > + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); > eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); > - pcibios_add_pci_devices(frozen_bus); > + if (pe->type & EEH_PE_VF) > + eeh_add_virt_device(edev, NULL); > + else > + pcibios_add_pci_devices(frozen_bus); > } > eeh_pe_state_clear(pe, EEH_PE_KEEP); > > @@ -633,8 +693,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) > static void eeh_handle_normal_event(struct eeh_pe *pe) > { > struct pci_bus *frozen_bus; > + struct eeh_dev *edev, *tmp; > int rc = 0; > enum pci_ers_result result = PCI_ERS_RESULT_NONE; > + struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; > > frozen_bus = eeh_pe_bus_get(pe); > if (!frozen_bus) { > @@ -681,7 +743,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) > */ > if (result == PCI_ERS_RESULT_NONE) { > pr_info("EEH: Reset with hotplug activity\n"); > - rc = eeh_reset_device(pe, frozen_bus); > + rc = eeh_reset_device(pe, frozen_bus, NULL); > if (rc) { > pr_warn("%s: Unable to reset, err=%d\n", > __func__, rc); > @@ -733,7 +795,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) > /* If any device called out for a reset, then reset the slot */ > if (result == PCI_ERS_RESULT_NEED_RESET) { > pr_info("EEH: Reset without hotplug activity\n"); > - rc = eeh_reset_device(pe, NULL); > + rc = eeh_reset_device(pe, NULL, &rmv_data); > if (rc) { > pr_warn("%s: Cannot reset, err=%d\n", > __func__, rc); > @@ -753,6 +815,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) > goto hard_fail; > } > > + /* > + * For those hot removed VFs, we should add back them after PF get > + * recovered properly. > + */ > + list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { > + eeh_add_virt_device(edev, NULL); > + list_del(&edev->rmv_list); > + } > + > /* Tell all device drivers that they can resume operations */ > pr_info("EEH: Notify device driver to resume\n"); > eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); > @@ -792,11 +863,15 @@ perm_error: > * the their PCI config any more. > */ > if (frozen_bus) { > - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); > - > - pci_lock_rescan_remove(); > - pcibios_remove_pci_devices(frozen_bus); > - pci_unlock_rescan_remove(); > + if (pe->type & EEH_PE_VF) { > + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); > + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); > + } else { > + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); > + pci_lock_rescan_remove(); > + pcibios_remove_pci_devices(frozen_bus); > + pci_unlock_rescan_remove(); > + } > } > } > > diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c > index 29240ad..b7facb9 100644 > --- a/arch/powerpc/kernel/eeh_pe.c > +++ b/arch/powerpc/kernel/eeh_pe.c > @@ -936,7 +936,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) > if (pe->type & EEH_PE_PHB) { > bus = pe->phb->bus; > } else if (pe->type & EEH_PE_BUS || > - pe->type & EEH_PE_DEVICE) { > + pe->type & EEH_PE_DEVICE || > + pe->type & EEH_PE_VF) { > if (pe->bus) { > bus = pe->bus; > goto out; > diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c > index 5091b05..9e9cb16 100644 > --- a/arch/powerpc/kernel/pci_dn.c > +++ b/arch/powerpc/kernel/pci_dn.c > @@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) > #ifdef CONFIG_PCI_IOV > static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, > struct pci_dev *pdev, > + int vf_index, > int busno, int devfn) > { > struct pci_dn *pdn; > @@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, > pdn->busno = busno; > pdn->devfn = devfn; > #ifdef CONFIG_PPC_POWERNV > + pdn->vf_index = vf_index; > pdn->pe_number = IODA_INVALID_PE; > #endif > INIT_LIST_HEAD(&pdn->child_list); > @@ -198,7 +200,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) > return NULL; > > for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { > - pdn = add_one_dev_pci_data(parent, NULL, > + pdn = add_one_dev_pci_data(parent, NULL, i, > pci_iov_virtfn_bus(pdev, i), > pci_iov_virtfn_devfn(pdev, i)); > if (!pdn) { > -- Alexey