From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Wei Yang <weiyang@linux.vnet.ibm.com>,
gwshan@linux.vnet.ibm.com, bhelgaas@google.com,
mpe@ellerman.id.au
Cc: linuxppc-dev@lists.ozlabs.org, linux-pci@vger.kernel.org
Subject: Re: [PATCH V10 10/12] powerpc/eeh: Support error recovery for VF PE
Date: Fri, 30 Oct 2015 16:20:48 +1100 [thread overview]
Message-ID: <5632FE30.8040003@ozlabs.ru> (raw)
In-Reply-To: <1445829362-2738-11-git-send-email-weiyang@linux.vnet.ibm.com>
On 10/26/2015 02:16 PM, Wei Yang wrote:
> Different from PCI bus dependent PE, PE for VFs doesn't have the
s/Different from/Unlike/
> primary bus, on which the PCI hotplug is implemented. The patch
> supports error recovery, especially the PCI hotplug for VF's PE.
The patch adds support for error recovery of what exactly?
What is "especially" about?
> The hotplug on VF's PE is implemented based on VFs, instead of
> PCI bus any more.
Needs rephrase.
Is this patch about EEH error recovery, i.e. unplug VF, re-plug VF? Why
does the commit log talk about PE hotplug? I thought we do VF (i.e. PCI
device) hotplug, not PE.
>
> [gwshan: changelog and code refactoring]
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
> arch/powerpc/include/asm/eeh.h | 1 +
> arch/powerpc/kernel/eeh.c | 8 ++++
> arch/powerpc/kernel/eeh_driver.c | 100 +++++++++++++++++++++++++++++++--------
> arch/powerpc/kernel/eeh_pe.c | 3 +-
> 4 files changed, 90 insertions(+), 22 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 331c856..ea1f13c4 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -142,6 +142,7 @@ struct eeh_dev {
> struct pci_controller *phb; /* Associated PHB */
> struct pci_dn *pdn; /* Associated PCI device node */
> struct pci_dev *pdev; /* Associated PCI device */
> + int in_error; /* Error flag for eeh_dev */
Make it "bool".
> struct pci_dev *physfn; /* Associated PF PORT */
> struct pci_bus *bus; /* PCI bus for partial hotplug */
> };
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index af9b597..28e4d73 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1227,6 +1227,14 @@ void eeh_remove_device(struct pci_dev *dev)
> * from the parent PE during the BAR resotre.
> */
> edev->pdev = NULL;
> +
> + /*
> + * The flag "in_error" is used to trace EEH devices for VFs
> + * in error state or not. It's set in eeh_report_error(). If
> + * it's not set, eeh_report_{reset,resume}() won't be called
> + * for the VF EEH device.
> + */
> + edev->in_error = 0;
> dev->dev.archdata.edev = NULL;
> if (!(edev->pe->state & EEH_PE_KEEP))
> eeh_rmv_from_parent_pe(edev);
> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
> index 89eb4bc..99868e2 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
> if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
> if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>
> + edev->in_error = 1;
> eeh_pcid_put(dev);
> return NULL;
> }
> @@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
>
> if (!driver->err_handler ||
> !driver->err_handler->slot_reset ||
> - (edev->mode & EEH_DEV_NO_HANDLER)) {
> + (edev->mode & EEH_DEV_NO_HANDLER) ||
> + (!edev->in_error)) {
> eeh_pcid_put(dev);
> return NULL;
> }
> @@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void *userdata)
>
bood was_in_error = edev->in_error;
edev->in_error = false;
then use was_in_error below and there is no need to replace return with
goto, etc -> slightly simpler code.
> if (!driver->err_handler ||
> !driver->err_handler->resume ||
> - (edev->mode & EEH_DEV_NO_HANDLER)) {
> + (edev->mode & EEH_DEV_NO_HANDLER) ||
> + (!edev->in_error)) {
> edev->mode &= ~EEH_DEV_NO_HANDLER;
> - eeh_pcid_put(dev);
> - return NULL;
> + goto out;
> }
>
> driver->err_handler->resume(dev);
>
> +out:
> + edev->in_error = 0;
> eeh_pcid_put(dev);
> return NULL;
> }
> @@ -386,12 +390,38 @@ static void *eeh_report_failure(void *data, void *userdata)
> return NULL;
> }
>
> +static void *eeh_add_virt_device(void *data, void *userdata)
> +{
> + struct pci_driver *driver;
> + struct eeh_dev *edev = (struct eeh_dev *)data;
> + struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
> + struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> +
> + if (!(edev->physfn)) {
> + pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
> + __func__, edev->phb->global_number, pdn->busno,
> + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
> + return NULL;
> + }
> +
> + driver = eeh_pcid_get(dev);
> + if (driver) {
> + eeh_pcid_put(dev);
> + if (driver->err_handler)
> + return NULL;
> + }
> +
> + pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
> + return NULL;
> +}
> +
> static void *eeh_rmv_device(void *data, void *userdata)
> {
> struct pci_driver *driver;
> struct eeh_dev *edev = (struct eeh_dev *)data;
> struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
> int *removed = (int *)userdata;
> + struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>
> /*
> * Actually, we should remove the PCI bridges as well.
> @@ -416,7 +446,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
> driver = eeh_pcid_get(dev);
> if (driver) {
> eeh_pcid_put(dev);
> - if (driver->err_handler)
> + if (removed && driver->err_handler)
> return NULL;
> }
>
> @@ -425,11 +455,23 @@ static void *eeh_rmv_device(void *data, void *userdata)
> pci_name(dev));
> edev->bus = dev->bus;
> edev->mode |= EEH_DEV_DISCONNECTED;
> - (*removed)++;
> + if (removed)
> + (*removed)++;
>
> - pci_lock_rescan_remove();
> - pci_stop_and_remove_bus_device(dev);
> - pci_unlock_rescan_remove();
> + if (edev->physfn) {
> + pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
> + edev->pdev = NULL;
> +
> + /*
> + * We have to set the VF PE number to invalid one, which is
> + * required to plug the VF successfully.
> + */
> + pdn->pe_number = IODA_INVALID_PE;
> + } else {
> + pci_lock_rescan_remove();
> + pci_stop_and_remove_bus_device(dev);
> + pci_unlock_rescan_remove();
> + }
>
> return NULL;
> }
> @@ -548,6 +590,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
> struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
> struct timeval tstamp;
> int cnt, rc, removed = 0;
> + struct eeh_dev *edev;
>
> /* pcibios will clear the counter; save the value */
> cnt = pe->freeze_count;
> @@ -561,12 +604,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
> */
> eeh_pe_state_mark(pe, EEH_PE_KEEP);
> if (bus) {
> - pci_lock_rescan_remove();
> - pcibios_remove_pci_devices(bus);
> - pci_unlock_rescan_remove();
> - } else if (frozen_bus) {
> + if (pe->type & EEH_PE_VF)
> + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
I believe the rule is that if one branch of "if" uses curly braces, then
the other should have them too.
> + else {
> + pci_lock_rescan_remove();
> + pcibios_remove_pci_devices(bus);
> + pci_unlock_rescan_remove();
> + }
> + } else if (frozen_bus)
> eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
> - }
>
> /*
> * Reset the pci controller. (Asserts RST#; resets config space).
> @@ -607,14 +653,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
> * PE. We should disconnect it so the binding can be
> * rebuilt when adding PCI devices.
> */
> + edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
> eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
> - pcibios_add_pci_devices(bus);
> + if (pe->type & EEH_PE_VF)
Move "edev = list_first_entry(&pe->edevs, struct eeh_dev, list)" here. You
could actually do:
eeh_add_virt_device(list_first_entry(&pe->edevs, struct eeh_dev, list), NULL);
and drop local variable @edev. Or move it to this scope. Dunno.
> + eeh_add_virt_device(edev, NULL);
> + else
> + pcibios_add_pci_devices(bus);
> } else if (frozen_bus && removed) {
> pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
> ssleep(5);
>
> + edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
> eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
> - pcibios_add_pci_devices(frozen_bus);
> + if (pe->type & EEH_PE_VF)
The same comment as above.
> + eeh_add_virt_device(edev, NULL);
> + else
> + pcibios_add_pci_devices(frozen_bus);
> }
> eeh_pe_state_clear(pe, EEH_PE_KEEP);
>
> @@ -792,11 +846,15 @@ perm_error:
> * the their PCI config any more.
> */
> if (frozen_bus) {
> - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> -
> - pci_lock_rescan_remove();
> - pcibios_remove_pci_devices(frozen_bus);
> - pci_unlock_rescan_remove();
> + if (pe->type & EEH_PE_VF) {
> + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
> + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> + } else {
> + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> + pci_lock_rescan_remove();
> + pcibios_remove_pci_devices(frozen_bus);
> + pci_unlock_rescan_remove();
> + }
> }
> }
>
> diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
> index 260a701..5cde950 100644
> --- a/arch/powerpc/kernel/eeh_pe.c
> +++ b/arch/powerpc/kernel/eeh_pe.c
> @@ -914,7 +914,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
> if (pe->type & EEH_PE_PHB) {
> bus = pe->phb->bus;
> } else if (pe->type & EEH_PE_BUS ||
> - pe->type & EEH_PE_DEVICE) {
> + pe->type & EEH_PE_DEVICE ||
> + pe->type & EEH_PE_VF) {
> if (pe->bus) {
> bus = pe->bus;
> goto out;
>
--
Alexey
next prev parent reply other threads:[~2015-10-30 5:20 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-26 3:15 [PATCH V10 00/12] VF EEH on Power8 Wei Yang
2015-10-26 3:15 ` [PATCH V10 01/12] PCI/IOV: Rename and export virtfn_add/virtfn_remove Wei Yang
2015-10-27 1:31 ` Andrew Donnellan
2015-10-27 23:06 ` Bjorn Helgaas
2015-10-28 1:21 ` Wei Yang
2015-10-26 3:15 ` [PATCH V10 02/12] PCI: Add pcibios_bus_add_device() weak function Wei Yang
2015-10-27 5:07 ` Andrew Donnellan
2015-10-26 3:15 ` [PATCH V10 03/12] powerpc/pci: Cache VF index in pci_dn Wei Yang
2015-10-27 5:01 ` Andrew Donnellan
2015-10-27 22:04 ` Daniel Axtens
2015-10-28 1:45 ` Wei Yang
2015-10-30 2:05 ` Alexey Kardashevskiy
2015-10-30 2:48 ` Wei Yang
2015-10-26 3:15 ` [PATCH V10 04/12] powerpc/pci: Remove VFs prior to PF Wei Yang
2015-10-30 3:04 ` Alexey Kardashevskiy
2015-10-30 6:31 ` Wei Yang
2015-10-26 3:15 ` [PATCH V10 05/12] powerpc/eeh: Cache only BARs, not windows or IOV BARs Wei Yang
2015-10-29 3:29 ` Daniel Axtens
2015-10-29 8:57 ` Wei Yang
2015-10-30 3:22 ` Alexey Kardashevskiy
2015-10-30 6:37 ` Wei Yang
2015-10-26 3:15 ` [PATCH V10 06/12] powerpc/powernv: EEH device for VF Wei Yang
2015-10-30 3:33 ` Alexey Kardashevskiy
2015-10-30 6:52 ` Wei Yang
2015-10-30 7:36 ` Alexey Kardashevskiy
2015-10-30 7:58 ` Wei Yang
2015-10-26 3:15 ` [PATCH V10 07/12] powerpc/eeh: Create PE for VFs Wei Yang
2015-10-30 3:46 ` Alexey Kardashevskiy
2015-10-30 6:59 ` Wei Yang
2015-10-26 3:15 ` [PATCH V10 08/12] powerpc/powernv: Support EEH reset for VF PE Wei Yang
2015-10-30 4:11 ` Alexey Kardashevskiy
2015-10-30 7:18 ` Wei Yang
2015-10-30 8:05 ` Alexey Kardashevskiy
2015-11-02 22:45 ` Wei Yang
2015-10-26 3:15 ` [PATCH V10 09/12] powerpc/powernv: Support PCI config restore for VFs Wei Yang
2015-10-30 4:56 ` Alexey Kardashevskiy
2015-10-30 8:17 ` Wei Yang
2015-10-26 3:16 ` [PATCH V10 10/12] powerpc/eeh: Support error recovery for VF PE Wei Yang
2015-10-30 5:20 ` Alexey Kardashevskiy [this message]
2015-11-01 1:53 ` Wei Yang
2015-11-01 23:40 ` Alexey Kardashevskiy
2015-11-02 9:39 ` Wei Yang
2015-10-26 3:16 ` [PATCH V10 11/12] powerpc/eeh: Don't block PCI config on resetting " Wei Yang
2015-10-30 5:42 ` Alexey Kardashevskiy
2015-10-30 7:19 ` Wei Yang
2015-10-26 3:16 ` [PATCH V10 12/12] powerpc/eeh: Handle hot removed VF when PF is EEH aware Wei Yang
2015-10-30 5:35 ` Alexey Kardashevskiy
2015-10-30 7:29 ` Wei Yang
2015-10-27 23:11 ` [PATCH V10 00/12] VF EEH on Power8 Bjorn Helgaas
2015-10-28 1:50 ` Wei Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5632FE30.8040003@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=bhelgaas@google.com \
--cc=gwshan@linux.vnet.ibm.com \
--cc=linux-pci@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mpe@ellerman.id.au \
--cc=weiyang@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).