linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Gavin Shan <gwshan@linux.vnet.ibm.com>, linuxppc-dev@lists.ozlabs.org
Cc: bhelgaas@google.com, linux-pci@vger.kernel.org
Subject: Re: [PATCH v4 09/21] powerpc/powernv: Use PCI slot reset infrastructure
Date: Sat, 09 May 2015 23:41:05 +1000	[thread overview]
Message-ID: <554E0E71.2080200@ozlabs.ru> (raw)
In-Reply-To: <1430460188-31343-10-git-send-email-gwshan@linux.vnet.ibm.com>

On 05/01/2015 04:02 PM, Gavin Shan wrote:
> For PowerNV platform, running on top of skiboot, all PE level reset
> should be routed to firmware if the bridge of the PE primary bus has
> device-node property "ibm,reset-by-firmware". Otherwise, the kernel
> has to issue hot reset on PE's primary bus despite the requested reset
> types, which is the behaviour before the firmware supports PCI slot
> reset. So the changes don't depend on the PCI slot reset capability
> exposed from the firmware.
>
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/eeh.h               |   1 +
>   arch/powerpc/include/asm/opal.h              |   4 +-
>   arch/powerpc/platforms/powernv/eeh-powernv.c | 206 +++++++++++++--------------
>   3 files changed, 102 insertions(+), 109 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index c5eb86f..2793d24 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -190,6 +190,7 @@ enum {
>   #define EEH_RESET_DEACTIVATE	0	/* Deactivate the PE reset	*/
>   #define EEH_RESET_HOT		1	/* Hot reset			*/
>   #define EEH_RESET_FUNDAMENTAL	3	/* Fundamental reset		*/
> +#define EEH_RESET_COMPLETE	4	/* PHB complete reset           */
>   #define EEH_LOG_TEMP		1	/* EEH temporary error log	*/
>   #define EEH_LOG_PERM		2	/* EEH permanent error log	*/
>
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 042af1a..6d467df 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -129,7 +129,7 @@ int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, uint16_t
>   int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number,
>   					uint16_t dma_window_number, uint64_t pci_start_addr,
>   					uint64_t pci_mem_size);
> -int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state);
> +int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, uint8_t assert_state);
>
>   int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer,
>   				   uint64_t diag_buffer_len);
> @@ -145,7 +145,7 @@ int64_t opal_get_epow_status(__be64 *status);
>   int64_t opal_set_system_attention_led(uint8_t led_action);
>   int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
>   			    __be16 *pci_error_type, __be16 *severity);
> -int64_t opal_pci_poll(uint64_t phb_id);
> +int64_t opal_pci_poll(uint64_t id, uint8_t *val);
>   int64_t opal_return_cpu(void);
>   int64_t opal_check_token(uint64_t token);
>   int64_t opal_reinit_cpus(uint64_t flags);
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index ce738ab..3c01095 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -742,12 +742,12 @@ static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
>   	return ret;
>   }
>
> -static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
> +static s64 pnv_eeh_poll(uint64_t id)
>   {
>   	s64 rc = OPAL_HARDWARE;
>
>   	while (1) {
> -		rc = opal_pci_poll(phb->opal_id);
> +		rc = opal_pci_poll(id, NULL);
>   		if (rc <= 0)
>   			break;
>
> @@ -763,84 +763,38 @@ static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
>   int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
>   {
>   	struct pnv_phb *phb = hose->private_data;
> +	uint8_t scope;
>   	s64 rc = OPAL_HARDWARE;
>
>   	pr_debug("%s: Reset PHB#%x, option=%d\n",
>   		 __func__, hose->global_number, option);
> -
> -	/* Issue PHB complete reset request */
> -	if (option == EEH_RESET_FUNDAMENTAL ||
> -	    option == EEH_RESET_HOT)
> -		rc = opal_pci_reset(phb->opal_id,
> -				    OPAL_RESET_PHB_COMPLETE,
> -				    OPAL_ASSERT_RESET);
> -	else if (option == EEH_RESET_DEACTIVATE)
> -		rc = opal_pci_reset(phb->opal_id,
> -				    OPAL_RESET_PHB_COMPLETE,
> -				    OPAL_DEASSERT_RESET);
> -	if (rc < 0)
> -		goto out;
> -
> -	/*
> -	 * Poll state of the PHB until the request is done
> -	 * successfully. The PHB reset is usually PHB complete
> -	 * reset followed by hot reset on root bus. So we also
> -	 * need the PCI bus settlement delay.
> -	 */
> -	rc = pnv_eeh_phb_poll(phb);
> -	if (option == EEH_RESET_DEACTIVATE) {
> -		if (system_state < SYSTEM_RUNNING)
> -			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
> -		else
> -			msleep(EEH_PE_RST_SETTLE_TIME);


These udelay() and msleep() are gone. How come they are not needed anymore? 
Worth commenting in the commit log or remove those in a separate patch.

I just remember you mentioning some missing delays somewhere which caused 
NVIDIA device to issue EEH and I do not want those to disappear :)


> +	switch (option) {
> +	case EEH_RESET_HOT:
> +		scope = OPAL_RESET_PCI_HOT;
> +		break;
> +	case EEH_RESET_FUNDAMENTAL:
> +		scope = OPAL_RESET_PCI_FUNDAMENTAL;
> +		break;
> +	case EEH_RESET_COMPLETE:
> +		scope = OPAL_RESET_PHB_COMPLETE;
> +		break;
> +	case EEH_RESET_DEACTIVATE:
> +		return 0;
> +	default:
> +		pr_warn("%s: Unsupported option %d\n",
> +			__func__, option);
> +		return -EINVAL;
>   	}
> -out:
> -	if (rc != OPAL_SUCCESS)
> -		return -EIO;
>
> -	return 0;
> -}
> -
> -static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
> -{
> -	struct pnv_phb *phb = hose->private_data;
> -	s64 rc = OPAL_HARDWARE;
> +	/* Issue reset and poll until it's completed */
> +	rc = opal_pci_reset(phb->opal_id, scope, OPAL_ASSERT_RESET);
> +	if (rc > 0)
> +		rc = pnv_eeh_poll(phb->opal_id);
>
> -	pr_debug("%s: Reset PHB#%x, option=%d\n",
> -		 __func__, hose->global_number, option);
> -
> -	/*
> -	 * During the reset deassert time, we needn't care
> -	 * the reset scope because the firmware does nothing
> -	 * for fundamental or hot reset during deassert phase.
> -	 */
> -	if (option == EEH_RESET_FUNDAMENTAL)
> -		rc = opal_pci_reset(phb->opal_id,
> -				    OPAL_RESET_PCI_FUNDAMENTAL,
> -				    OPAL_ASSERT_RESET);
> -	else if (option == EEH_RESET_HOT)
> -		rc = opal_pci_reset(phb->opal_id,
> -				    OPAL_RESET_PCI_HOT,
> -				    OPAL_ASSERT_RESET);
> -	else if (option == EEH_RESET_DEACTIVATE)
> -		rc = opal_pci_reset(phb->opal_id,
> -				    OPAL_RESET_PCI_HOT,
> -				    OPAL_DEASSERT_RESET);
> -	if (rc < 0)
> -		goto out;
> -
> -	/* Poll state of the PHB until the request is done */
> -	rc = pnv_eeh_phb_poll(phb);
> -	if (option == EEH_RESET_DEACTIVATE)
> -		msleep(EEH_PE_RST_SETTLE_TIME);
> -out:
> -	if (rc != OPAL_SUCCESS)
> -		return -EIO;
> -
> -	return 0;
> +	return (rc == OPAL_SUCCESS) ? 0 : -EIO;
>   }
>
> -static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
> +static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
>   {
>   	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
>   	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
> @@ -891,14 +845,57 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
>   	return 0;
>   }
>
> +static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
> +{
> +	struct pci_controller *hose;
> +	struct pnv_phb *phb;
> +	struct device_node *dn = dev ? pci_device_to_OF_node(dev) : NULL;
> +	uint64_t id = (0x1ul << 60);
> +	uint8_t scope;
> +	s64 rc;


int64_t for @rc?


> +
> +	/*
> +	 * If the firmware can't handle it, we will issue hot reset
> +	 * on the secondary bus despite the requested reset type
> +	 */
> +	if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
> +		return __pnv_eeh_bridge_reset(dev, option);
> +
> +	/* The firmware can handle the request */
> +	switch (option) {
> +	case EEH_RESET_HOT:
> +		scope = OPAL_RESET_PCI_HOT;
> +		break;
> +	case EEH_RESET_FUNDAMENTAL:
> +		scope = OPAL_RESET_PCI_FUNDAMENTAL;
> +		break;
> +	case EEH_RESET_DEACTIVATE:
> +		return 0;
> +	case EEH_RESET_COMPLETE:
> +	default:
> +		pr_warn("%s: Unsupported option %d on device %s\n",
> +			__func__, option, pci_name(dev));
> +		return -EINVAL;
> +	}


This is the same switch as earlier in this patch (slightly different 
order). Move it and opal_pci_reset() into a helper and call it 
pnv_opal_pci_reset()?


> +
> +	hose = pci_bus_to_host(dev->bus);
> +	phb = hose->private_data;

Previously you would initialize @hose and @phb where you declared those but 
not here. If you did the same thing as before, the patch could have been 
smaller and easier to read.



> +	id |= (dev->bus->number << 24) | (dev->devfn << 16) | phb->opal_id;
> +	rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
> +	if (rc > 0)
> +		rc = pnv_eeh_poll(id);
> +
> +	return (rc == OPAL_SUCCESS) ? 0 : -EIO;
> +}
> +
>   void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
>   {
>   	struct pci_controller *hose;
>
>   	if (pci_is_root_bus(dev->bus)) {
>   		hose = pci_bus_to_host(dev->bus);
> -		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
> -		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
> +		pnv_eeh_phb_reset(hose, EEH_RESET_HOT);
> +		pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
>   	} else {
>   		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
>   		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
> @@ -920,8 +917,9 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
>   static int pnv_eeh_reset(struct eeh_pe *pe, int option)
>   {
>   	struct pci_controller *hose = pe->phb;
> +	struct pnv_phb *phb;
>   	struct pci_bus *bus;
> -	int ret;
> +	s64 rc;
>
>   	/*
>   	 * For PHB reset, we always have complete reset. For those PEs whose
> @@ -937,43 +935,37 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
>   	 * reset. The side effect is that EEH core has to clear the frozen
>   	 * state explicitly after BAR restore.
>   	 */
> -	if (pe->type & EEH_PE_PHB) {
> -		ret = pnv_eeh_phb_reset(hose, option);
> -	} else {
> -		struct pnv_phb *phb;
> -		s64 rc;
> +	if (pe->type & EEH_PE_PHB)

I would keep "{" in the line above ....

> +		return pnv_eeh_phb_reset(hose, EEH_RESET_COMPLETE);

...put "} else {" here...

and the chunk below would become 1) very small 2) very trivial... And then 
you could make a trivial patch which would do scope removal but without 
functional changes. Or vice versa.

>
> -		/*
> -		 * The frozen PE might be caused by PAPR error injection
> -		 * registers, which are expected to be cleared after hitting
> -		 * frozen PE as stated in the hardware spec. Unfortunately,
> -		 * that's not true on P7IOC. So we have to clear it manually
> -		 * to avoid recursive EEH errors during recovery.
> -		 */
> -		phb = hose->private_data;
> -		if (phb->model == PNV_PHB_MODEL_P7IOC &&
> -		    (option == EEH_RESET_HOT ||
> -		    option == EEH_RESET_FUNDAMENTAL)) {
> -			rc = opal_pci_reset(phb->opal_id,
> -					    OPAL_RESET_PHB_ERROR,
> -					    OPAL_ASSERT_RESET);
> -			if (rc != OPAL_SUCCESS) {
> -				pr_warn("%s: Failure %lld clearing "
> -					"error injection registers\n",
> -					__func__, rc);
> -				return -EIO;
> -			}
> +	/*
> +	 * The frozen PE might be caused by PAPR error injection
> +	 * registers, which are expected to be cleared after hitting
> +	 * frozen PE as stated in the hardware spec. Unfortunately,
> +	 * that's not true on P7IOC. So we have to clear it manually
> +	 * to avoid recursive EEH errors during recovery.
> +	 */
> +	phb = hose->private_data;
> +	if (phb->model == PNV_PHB_MODEL_P7IOC &&
> +	    (option == EEH_RESET_HOT ||
> +	    option == EEH_RESET_FUNDAMENTAL)) {
> +		rc = opal_pci_reset(phb->opal_id,
> +				    OPAL_RESET_PHB_ERROR,
> +				    OPAL_ASSERT_RESET);
> +		if (rc != OPAL_SUCCESS) {
> +			pr_warn("%s: Failure %lld clearing error "
> +				"injection registers on PHB#%d\n",
> +				__func__, rc, hose->global_number);
> +			return -EIO;
>   		}
> -
> -		bus = eeh_pe_bus_get(pe);
> -		if (pci_is_root_bus(bus) ||
> -			pci_is_root_bus(bus->parent))
> -			ret = pnv_eeh_root_reset(hose, option);
> -		else
> -			ret = pnv_eeh_bridge_reset(bus->self, option);
>   	}
>
> -	return ret;
> +	/* Route the reset request to PHB or upstream bridge */
> +	bus = eeh_pe_bus_get(pe);
> +	if (pci_is_root_bus(bus))
> +		return pnv_eeh_phb_reset(hose, option);
> +
> +	return pnv_eeh_bridge_reset(bus->self, option);
>   }
>
>   /**
>


-- 
Alexey

  reply	other threads:[~2015-05-09 13:41 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-01  6:02 [PATCH v4 00/21] PowerPC/PowerNV: PCI Slot Management Gavin Shan
2015-05-01  6:02 ` [PATCH v4 01/21] pci: Add pcibios_setup_bridge() Gavin Shan
2015-05-07 22:12   ` Bjorn Helgaas
2015-05-11  1:59     ` Gavin Shan
2015-05-01  6:02 ` [PATCH v4 02/21] powerpc/powernv: Enable M64 on P7IOC Gavin Shan
2015-05-09  0:18   ` Alexey Kardashevskiy
2015-05-11  4:37     ` Gavin Shan
2015-05-01  6:02 ` [PATCH v4 03/21] powerpc/powernv: M64 support improvement Gavin Shan
2015-05-09 10:24   ` Alexey Kardashevskiy
2015-05-11  4:47     ` Gavin Shan
2015-05-01  6:02 ` [PATCH v4 04/21] powerpc/powernv: Improve IO and M32 mapping Gavin Shan
2015-05-09 10:53   ` Alexey Kardashevskiy
2015-05-11  4:52     ` Gavin Shan
2015-05-01  6:02 ` [PATCH v4 05/21] powerpc/powernv: Improve DMA32 segment assignment Gavin Shan
2015-05-01  6:02 ` [PATCH v4 06/21] powerpc/powernv: Create PEs dynamically Gavin Shan
2015-05-09 11:43   ` Alexey Kardashevskiy
2015-05-11  4:55     ` Gavin Shan
2015-05-01  6:02 ` [PATCH v4 07/21] powerpc/powernv: Release " Gavin Shan
2015-05-09 12:43   ` Alexey Kardashevskiy
2015-05-11  6:25     ` Gavin Shan
2015-05-11  7:02       ` Alexey Kardashevskiy
2015-05-12  0:03         ` Gavin Shan
2015-05-12  0:53           ` Alexey Kardashevskiy
2015-05-12  1:25             ` Gavin Shan
2015-05-01  6:02 ` [PATCH v4 08/21] powerpc/powernv: Drop pnv_ioda_setup_dev_PE() Gavin Shan
2015-05-09 12:45   ` Alexey Kardashevskiy
2015-05-01  6:02 ` [PATCH v4 09/21] powerpc/powernv: Use PCI slot reset infrastructure Gavin Shan
2015-05-09 13:41   ` Alexey Kardashevskiy [this message]
2015-05-11  6:45     ` Gavin Shan
2015-05-11  7:16       ` Alexey Kardashevskiy
2015-05-01  6:02 ` [PATCH v4 10/21] powerpc/powernv: Fundamental reset for PCI bus reset Gavin Shan
2015-05-09 14:12   ` Alexey Kardashevskiy
2015-05-11  6:47     ` Gavin Shan
2015-05-11  7:17       ` Alexey Kardashevskiy
2015-05-12  0:04         ` Gavin Shan
2015-05-01  6:02 ` [PATCH v4 11/21] powerpc/pci: Don't scan empty slot Gavin Shan
2015-05-01  6:02 ` [PATCH v4 12/21] powerpc/pci: Move pcibios_find_pci_bus() around Gavin Shan
2015-05-01  6:03 ` [PATCH v4 13/21] powerpc/powernv: Introduce pnv_pci_poll() Gavin Shan
2015-05-09 14:30   ` Alexey Kardashevskiy
2015-05-11  7:19     ` Gavin Shan
2015-05-01  6:03 ` [PATCH v4 14/21] powerpc/powernv: Functions to get/reset PCI slot status Gavin Shan
2015-05-09 14:44   ` Alexey Kardashevskiy
2015-05-01  6:03 ` [PATCH v4 15/21] powerpc/pci: Delay creating pci_dn Gavin Shan
2015-05-09 14:55   ` Alexey Kardashevskiy
2015-05-11  7:21     ` Gavin Shan
2015-05-01  6:03 ` [PATCH v4 16/21] powerpc/pci: Create eeh_dev while " Gavin Shan
2015-05-09 15:08   ` Alexey Kardashevskiy
2015-05-11  7:24     ` Gavin Shan
2015-05-01  6:03 ` [PATCH v4 17/21] powerpc/pci: Export traverse_pci_device_nodes() Gavin Shan
2015-05-01  6:03 ` [PATCH v4 18/21] powerpc/pci: Update bridge windows on PCI plugging Gavin Shan
2015-05-01  6:03 ` [PATCH v4 19/21] drivers/of: Support adding sub-tree Gavin Shan
2015-05-01 12:54   ` Rob Herring
2015-05-01 15:22     ` Benjamin Herrenschmidt
2015-05-01 18:46       ` Rob Herring
2015-05-01 22:57         ` Benjamin Herrenschmidt
2015-05-01 23:29           ` Benjamin Herrenschmidt
2015-05-02  2:48             ` Benjamin Herrenschmidt
2015-05-04  1:30               ` Gavin Shan
2015-05-04  4:51                 ` Benjamin Herrenschmidt
2015-05-04  0:23             ` Gavin Shan
2015-05-04 16:41           ` Pantelis Antoniou
2015-05-04 21:14             ` Benjamin Herrenschmidt
2015-05-13 23:35               ` Benjamin Herrenschmidt
2015-05-14  0:18                 ` Rob Herring
2015-05-14  0:54                   ` Benjamin Herrenschmidt
2015-05-14  6:23                     ` Pantelis Antoniou
2015-05-14  6:46                       ` Benjamin Herrenschmidt
2015-05-14  7:04                         ` Pantelis Antoniou
2015-05-14  7:14                           ` Benjamin Herrenschmidt
2015-05-14  7:19                             ` Pantelis Antoniou
2015-05-14  7:25                               ` Benjamin Herrenschmidt
2015-05-14  7:29                                 ` Benjamin Herrenschmidt
2015-05-14  7:34                                 ` Pantelis Antoniou
2015-05-14  7:47                                   ` Benjamin Herrenschmidt
2015-05-14 11:02                                     ` Pantelis Antoniou
2015-05-14 23:25                                       ` Benjamin Herrenschmidt
2015-06-07  7:54                     ` Grant Likely
2015-06-08 20:57                       ` Benjamin Herrenschmidt
2015-06-08 21:34                         ` Grant Likely
2015-06-10  6:55                           ` Gavin Shan
2015-05-03 23:28     ` Gavin Shan
2015-05-15  1:27   ` Gavin Shan
2015-05-01  6:03 ` [PATCH v4 20/21] powerpc/powernv: Select OF_DYNAMIC Gavin Shan
2015-05-01  6:03 ` [PATCH v4 21/21] pci/hotplug: PowerPC PowerNV PCI hotplug driver Gavin Shan
2015-05-09 15:54   ` Alexey Kardashevskiy
2015-05-11  7:38     ` Gavin Shan
2015-05-08 23:59 ` [PATCH v4 00/21] PowerPC/PowerNV: PCI Slot Management Alexey Kardashevskiy
2015-05-11  7:40   ` Gavin Shan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=554E0E71.2080200@ozlabs.ru \
    --to=aik@ozlabs.ru \
    --cc=bhelgaas@google.com \
    --cc=gwshan@linux.vnet.ibm.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).