From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Michael Neuling <mikey@neuling.org>, mpe@ellerman.id.au
Cc: linuxppc-dev@lists.ozlabs.org, ruscur@russell.cc,
sam.bobroff@au1.ibm.com
Subject: Re: [PATCH] powerpc/eeh: Fix race with driver un/bind
Date: Fri, 23 Mar 2018 17:33:11 +1100 [thread overview]
Message-ID: <1521786791.16434.339.camel@kernel.crashing.org> (raw)
In-Reply-To: <20180323054417.3268-1-mikey@neuling.org>
On Fri, 2018-03-23 at 16:44 +1100, Michael Neuling wrote:
.../...
> This fixes the problem in the same way the generic PCIe AER code (in
> drivers/pci/pcie/aer/aerdrv_core.c) does. It makes the EEH code hold
> the device_lock() before performing the driver EEH callbacks. This
> ensures either the callbacks are no longer register, or if they are
> registered the driver will not be removed from underneath us.
>
> Signed-off-by: Michael Neuling <mikey@neuling.org>
Generally ok, minor nits though and do we want a CC stable ?
> ---
> arch/powerpc/kernel/eeh_driver.c | 67 ++++++++++++++++++++++++----------------
> 1 file changed, 41 insertions(+), 26 deletions(-)
>
> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
> index 0c0b66fc5b..7cf946ae9a 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
>
> if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
> return NULL;
> +
> + device_lock(&dev->dev);
> dev->error_state = pci_channel_io_frozen;
>
> driver = eeh_pcid_get(dev);
> - if (!driver) return NULL;
> + if (!driver) goto out2;
I don't like out1/out2, why not call them out_nodev and out_no_handler
? (same comment for the other ones).
>
> eeh_disable_irq(dev);
>
> if (!driver->err_handler ||
> - !driver->err_handler->error_detected) {
> - eeh_pcid_put(dev);
> - return NULL;
> - }
> + !driver->err_handler->error_detected)
> + goto out1;
>
> rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
>
> @@ -227,8 +227,11 @@ static void *eeh_report_error(void *data, void *userdata)
> if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>
> edev->in_error = true;
> - eeh_pcid_put(dev);
> pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
> +out1:
> + eeh_pcid_put(dev);
> +out2:
This also changes doing the uevent while holding a reference and the
the device lock, is that ok ? (I guess a reference is a good thing, the
device lock, not sure... I hope so but you should at least document it
as a chance in the cset comment).
> + device_unlock(&dev->dev);
> return NULL;
> }
>
> @@ -251,15 +254,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
> if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
> return NULL;
>
> + device_lock(&dev->dev);
> driver = eeh_pcid_get(dev);
> - if (!driver) return NULL;
> + if (!driver) goto out2;
>
> if (!driver->err_handler ||
> !driver->err_handler->mmio_enabled ||
> - (edev->mode & EEH_DEV_NO_HANDLER)) {
> - eeh_pcid_put(dev);
> - return NULL;
> - }
> + (edev->mode & EEH_DEV_NO_HANDLER))
> + goto out1;
>
> rc = driver->err_handler->mmio_enabled(dev);
>
> @@ -267,7 +269,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
> if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
> if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>
> +out1:
> eeh_pcid_put(dev);
> +out2:
> + device_unlock(&dev->dev);
> return NULL;
> }
>
> @@ -290,20 +295,20 @@ static void *eeh_report_reset(void *data, void *userdata)
>
> if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
> return NULL;
> +
> + device_lock(&dev->dev);
> dev->error_state = pci_channel_io_normal;
>
> driver = eeh_pcid_get(dev);
> - if (!driver) return NULL;
> + if (!driver) goto out2;
>
> eeh_enable_irq(dev);
>
> if (!driver->err_handler ||
> !driver->err_handler->slot_reset ||
> (edev->mode & EEH_DEV_NO_HANDLER) ||
> - (!edev->in_error)) {
> - eeh_pcid_put(dev);
> - return NULL;
> - }
> + (!edev->in_error))
> + goto out1;
>
> rc = driver->err_handler->slot_reset(dev);
> if ((*res == PCI_ERS_RESULT_NONE) ||
> @@ -311,7 +316,10 @@ static void *eeh_report_reset(void *data, void *userdata)
> if (*res == PCI_ERS_RESULT_DISCONNECT &&
> rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>
> +out1:
> eeh_pcid_put(dev);
> +out2:
> + device_unlock(&dev->dev);
> return NULL;
> }
>
> @@ -362,10 +370,12 @@ static void *eeh_report_resume(void *data, void *userdata)
>
> if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
> return NULL;
> +
> + device_lock(&dev->dev);
> dev->error_state = pci_channel_io_normal;
>
> driver = eeh_pcid_get(dev);
> - if (!driver) return NULL;
> + if (!driver) goto out2;
>
> was_in_error = edev->in_error;
> edev->in_error = false;
> @@ -375,18 +385,20 @@ static void *eeh_report_resume(void *data, void *userdata)
> !driver->err_handler->resume ||
> (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
> edev->mode &= ~EEH_DEV_NO_HANDLER;
> - eeh_pcid_put(dev);
> - return NULL;
> + goto out1;
> }
>
> driver->err_handler->resume(dev);
>
> - eeh_pcid_put(dev);
> pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
> +out1:
> + eeh_pcid_put(dev);
> #ifdef CONFIG_PCI_IOV
> if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
> eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
> #endif
> +out2:
> + device_unlock(&dev->dev);
> return NULL;
> }
>
> @@ -406,23 +418,26 @@ static void *eeh_report_failure(void *data, void *userdata)
>
> if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
> return NULL;
> +
> + device_lock(&dev->dev);
> dev->error_state = pci_channel_io_perm_failure;
>
> driver = eeh_pcid_get(dev);
> - if (!driver) return NULL;
> + if (!driver) goto out2;
>
> eeh_disable_irq(dev);
>
> if (!driver->err_handler ||
> - !driver->err_handler->error_detected) {
> - eeh_pcid_put(dev);
> - return NULL;
> - }
> + !driver->err_handler->error_detected)
> + goto out1;
>
> driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
>
> - eeh_pcid_put(dev);
> pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
> +out1:
> + eeh_pcid_put(dev);
> +out2:
> + device_unlock(&dev->dev);
> return NULL;
> }
>
next prev parent reply other threads:[~2018-03-23 6:33 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-23 5:44 [PATCH] powerpc/eeh: Fix race with driver un/bind Michael Neuling
2018-03-23 6:33 ` Benjamin Herrenschmidt [this message]
2018-03-25 23:30 ` Russell Currey
2018-03-26 2:47 ` Michael Neuling
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1521786791.16434.339.camel@kernel.crashing.org \
--to=benh@kernel.crashing.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mikey@neuling.org \
--cc=mpe@ellerman.id.au \
--cc=ruscur@russell.cc \
--cc=sam.bobroff@au1.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).