linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Bjorn Helgaas <bhelgaas@google.com>
To: Yinghai Lu <yinghai@kernel.org>
Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>,
	"linux-pci@vger.kernel.org" <linux-pci@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Sarah Sharp <sarah.a.sharp@linux.intel.com>
Subject: Re: [PATCH] PCI: Fix racing for pci device removing via sysfs
Date: Wed, 8 May 2013 17:43:32 -0600	[thread overview]
Message-ID: <20130508234332.GA7037@google.com> (raw)
In-Reply-To: <CAE9FiQX-HKOm5qDsJycG_Yo5JPngk6dxSFehg9n6mmj2sbcmWQ@mail.gmail.com>

On Tue, Apr 30, 2013 at 02:29:35PM -0700, Yinghai Lu wrote:
> On Mon, Apr 29, 2013 at 3:17 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> > On Mon, Apr 29, 2013 at 11:15 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> >>
> >> I think the 1a:01.0 pci_dev should retain its reference to the pci_bus
> >> for as long as the pci_dev exists, so the pci_bus_put() should go in
> >> pci_release_dev() instead.
> >
> > Good point.
> >
> > will rework pci remove sequence.
> 
> Please check attached version that will not need to touch pci sysfs bits.

I use patchwork to keep track of things I need to look at, and I don't
think patchwork looks at attachments.  Just FYI in case I seem to be
ignoring things; it might be that they just didn't appear on my patchwork
"to-do" list.

I completely agree that gmail makes it impossible to send patches in-line.
On the other hand, sending them as attachments is easy for you but makes it
difficult for others to review and reply to them.  I'm using mutt to
comment on your patch, but eventually I'll get tired of doing the extra
work on my end :)

I tried to apply this on top of 96a3e8af5a (Linus' merge of the v3.10 PCI
changes), but it didn't apply cleanly.  I assume you'll rebase it to
v3.10-rc1 when it comes out.

> Subject: [PATCH -v5] PCI: Fix racing for pci device removing via sysfs
> From: Yinghai Lu <yinghai@kernel.org>
> ... 
> Index: linux-2.6/drivers/pci/probe.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/probe.c
> +++ linux-2.6/drivers/pci/probe.c
> @@ -1119,6 +1119,20 @@ static void pci_release_capabilities(str
>  	pci_free_cap_save_buffers(dev);
>  }
>  
> +static void pci_free_resources(struct pci_dev *dev)
> +{
> +	int i;
> +
> +	msi_remove_pci_irq_vectors(dev);
> +
> +	pci_cleanup_rom(dev);
> +	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
> +		struct resource *res = dev->resource + i;
> +		if (res->parent)
> +			release_resource(res);
> +	}
> +}
> +
>  /**
>   * pci_release_dev - free a pci device structure when all users of it are finished.
>   * @dev: device that's been disconnected
> @@ -1131,6 +1145,13 @@ static void pci_release_dev(struct devic
>  	struct pci_dev *pci_dev;
>  
>  	pci_dev = to_pci_dev(dev);
> +
> +	down_write(&pci_bus_sem);
> +	list_del(&pci_dev->bus_list);
> +	up_write(&pci_bus_sem);
> +	pci_free_resources(pci_dev);
> +	put_device(&pci_dev->bus->dev);

Is there any reason to drop the pci_bus reference here, as opposed to
doing it after the "kfree(pci_dev)"?  We call a couple more things below,
and it's possible that they will still reference pci_dev->bus.

> +
>  	pci_release_capabilities(pci_dev);
>  	pci_release_of_node(pci_dev);
>  	kfree(pci_dev);
> @@ -1340,6 +1361,7 @@ void pci_device_add(struct pci_dev *dev,
>  	down_write(&pci_bus_sem);
>  	list_add_tail(&dev->bus_list, &bus->devices);
>  	up_write(&pci_bus_sem);
> +	get_device(&bus->dev);
>  
>  	ret = pcibios_add_device(dev);
>  	WARN_ON(ret < 0);
> Index: linux-2.6/drivers/pci/remove.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/remove.c
> +++ linux-2.6/drivers/pci/remove.c
> @@ -3,20 +3,6 @@
>  #include <linux/pci-aspm.h>
>  #include "pci.h"
>  
> -static void pci_free_resources(struct pci_dev *dev)
> -{
> -	int i;
> -
> - 	msi_remove_pci_irq_vectors(dev);
> -
> -	pci_cleanup_rom(dev);
> -	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
> -		struct resource *res = dev->resource + i;
> -		if (res->parent)
> -			release_resource(res);
> -	}
> -}
> -
>  static void pci_stop_dev(struct pci_dev *dev)
>  {
>  	pci_pme_active(dev, false);
> @@ -24,8 +10,7 @@ static void pci_stop_dev(struct pci_dev
>  	if (dev->is_added) {
>  		pci_proc_detach_device(dev);
>  		pci_remove_sysfs_dev_files(dev);
> -		device_del(&dev->dev);
> -		dev->is_added = 0;
> +		device_release_driver(&dev->dev);
>  	}
>  
>  	if (dev->bus->self)
> @@ -34,12 +19,11 @@ static void pci_stop_dev(struct pci_dev
>  
>  static void pci_destroy_dev(struct pci_dev *dev)
>  {
> -	down_write(&pci_bus_sem);
> -	list_del(&dev->bus_list);
> -	up_write(&pci_bus_sem);
> -
> -	pci_free_resources(dev);
> -	put_device(&dev->dev);
> +	if (dev->is_added) {

If it's possible that "dev->is_added == 0" here, doesn't that mean
we leaked a struct pci_dev?

For example, if we're hot-adding a device, dev->is_added is zero
between points A and B here:

    pciehp_configure_device
      pci_scan_slot
        pci_scan_single_device
          pci_scan_device
            dev = alloc_pci_dev         # A) dev->is_added == 0 here
          pci_device_add
            device_initialize
            device_add
      pci_bus_add_devices
        pci_bus_add_device
          device_attach
          dev->is_added = 1             # B) dev->is_added == 1 here

If we can get to pci_destroy_dev() for that device during the
interval between A and B, dev->is_added will be zero, and I don't
know where we will ever clean up the device.

If we *can't* get here during that interval, there shouldn't be any
need to test dev->is_added.

> +		device_del(&dev->dev);
> +		put_device(&dev->dev);
> +		dev->is_added = 0;
> +	}
>  }
>  
>  void pci_remove_bus(struct pci_bus *bus)
> @@ -126,7 +110,7 @@ void pci_stop_root_bus(struct pci_bus *b
>  		pci_stop_bus_device(child);
>  
>  	/* stop the host bridge */
> -	device_del(&host_bridge->dev);
> +	device_release_driver(&host_bridge->dev);
>  }
>  
>  void pci_remove_root_bus(struct pci_bus *bus)
> @@ -145,5 +129,5 @@ void pci_remove_root_bus(struct pci_bus
>  	host_bridge->bus = NULL;
>  
>  	/* remove the host bridge */
> -	put_device(&host_bridge->dev);
> +	device_unregister(&host_bridge->dev);
>  }

  reply	other threads:[~2013-05-08 23:43 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-26  1:47 [PATCH] PCI: Fix racing for pci device removing via sysfs Yinghai Lu
2013-04-26 16:28 ` Bjorn Helgaas
2013-04-26 20:20   ` Yinghai Lu
2013-04-26 20:53     ` Bjorn Helgaas
2013-04-26 21:01       ` Yinghai Lu
2013-04-29 10:04         ` Gu Zheng
2013-04-29 15:19           ` Yinghai Lu
2013-04-29 18:15             ` Bjorn Helgaas
2013-04-29 18:21               ` Greg Kroah-Hartman
2013-04-29 21:23                 ` Sarah Sharp
2013-04-29 21:32                   ` Greg Kroah-Hartman
2013-04-29 22:17               ` Yinghai Lu
2013-04-30 21:29                 ` Yinghai Lu
2013-05-08 23:43                   ` Bjorn Helgaas [this message]
2013-04-30  9:17               ` Gu Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130508234332.GA7037@google.com \
    --to=bhelgaas@google.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=guz.fnst@cn.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=sarah.a.sharp@linux.intel.com \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).