All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bjorn Helgaas <bhelgaas@google.com>
To: Wei Yang <weiyang@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org, linux-pci@vger.kernel.org,
	benh@au1.ibm.com, gwshan@linux.vnet.ibm.com,
	yan@linux.vnet.ibm.com, qiudayu@linux.vnet.ibm.com
Subject: Re: [PATCH V7 04/17] PCI: Take additional IOV BAR alignment in sizing and assigning
Date: Tue, 19 Aug 2014 21:08:41 -0600	[thread overview]
Message-ID: <20140820030841.GD6295@google.com> (raw)
In-Reply-To: <1406182947-11302-5-git-send-email-weiyang@linux.vnet.ibm.com>

On Thu, Jul 24, 2014 at 02:22:14PM +0800, Wei Yang wrote:
> At resource sizing/assigning stage, resources are divided into two lists,
> requested list and additional list, while the alignement of the additional
> IOV BAR is not taken into the sizeing and assigning procedure.
> 
> This is reasonable in the original implementation, since IOV BAR's alignment is
> mostly the size of a PF BAR alignemt. This means the alignment is already taken
> into consideration. While this rule may be violated on some platform.
> 
> This patch take the additional IOV BAR alignment in sizing and assigning stage
> explicitly.
> 
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> ---
>  drivers/pci/setup-bus.c |   68 +++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 60 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index a5a63ec..d83681f 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -120,6 +120,28 @@ static resource_size_t get_res_add_size(struct list_head *head,
>  	return 0;
>  }
>  
> +static resource_size_t get_res_add_align(struct list_head *head,
> +		struct resource *res)
> +{
> +	struct pci_dev_resource *dev_res;
> +
> +	list_for_each_entry(dev_res, head, list) {
> +		if (dev_res->res == res) {
> +			int idx = res - &dev_res->dev->resource[0];
> +
> +			dev_printk(KERN_DEBUG, &dev_res->dev->dev,
> +				   "res[%d]=%pR get_res_add_align min_align %llx\n",
> +				   idx, dev_res->res,
> +				   (unsigned long long)dev_res->min_align);
> +
> +			return dev_res->min_align;
> +		}
> +	}
> +
> +	return 0;
> +}

I see that you copied the structure of the existing get_res_add_size()
here.  But I don't understand *that* function.  It looks basically like
this:

  resource_size_t get_res_add_size(list, res)
  {
    list_for_each_entry(dev_res, head, list) {
      if (dev_res->res == res)
        return dev_res->add_size;
    }
    return 0;
  }

and we call it like this:

  dev_res->res->end += get_res_add_size(realloc_head, dev_res->res);

So we start out with dev_res", pass in dev_res->res, search the
realloc_head list to find dev_res again, and return dev_res->add_size.
That looks equivalent to just:

  dev_res->res->end += dev_res->add_size;

It looks like get_res_add_size() merely adds a printk and some complexity.
Am I missing something?

I do see that there are other callers where we don't actually start with
dev_res, which makes it a little more complicated.  But I think you should
either add something like this:

  struct pci_dev_resource *res_to_dev_res(list, res)
  {
    list_for_each_entry(dev_res, head, list) {
      if (dev_res->res == res)
        return dev_res;
    }
    return NULL;
  }

which can be used to replace get_res_add_size() and get_res_add_align(), OR
figure out whether the dev_res of interest is always one we've just added.
If it is, maybe you can just make add_to_list() return the dev_res pointer
instead of an errno, and hang onto the pointer.  I'd like that much better
if that's possible.

> +
> +
>  /* Sort resources by alignment */
>  static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head)
>  {
> @@ -368,8 +390,9 @@ static void __assign_resources_sorted(struct list_head *head,
>  	LIST_HEAD(save_head);
>  	LIST_HEAD(local_fail_head);
>  	struct pci_dev_resource *save_res;
> -	struct pci_dev_resource *dev_res, *tmp_res;
> +	struct pci_dev_resource *dev_res, *tmp_res, *dev_res2;
>  	unsigned long fail_type;
> +	resource_size_t add_align, align;
>  
>  	/* Check if optional add_size is there */
>  	if (!realloc_head || list_empty(realloc_head))
> @@ -384,10 +407,31 @@ static void __assign_resources_sorted(struct list_head *head,
>  	}
>  
>  	/* Update res in head list with add_size in realloc_head list */
> -	list_for_each_entry(dev_res, head, list)
> +	list_for_each_entry_safe(dev_res, tmp_res, head, list) {
>  		dev_res->res->end += get_res_add_size(realloc_head,
>  							dev_res->res);
>  
> +		if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
> +			continue;
> +
> +		add_align = get_res_add_align(realloc_head, dev_res->res);
> +
> +		if (add_align > dev_res->res->start) {
> +			dev_res->res->start = add_align;
> +			dev_res->res->end = add_align +
> +				            resource_size(dev_res->res);
> +
> +			list_for_each_entry(dev_res2, head, list) {
> +				align = pci_resource_alignment(dev_res2->dev,
> +							       dev_res2->res);
> +				if (add_align > align)
> +					list_move_tail(&dev_res->list,
> +						       &dev_res2->list);
> +			}
> +               }
> +
> +	}
> +
>  	/* Try updated head list with add_size added */
>  	assign_requested_resources_sorted(head, &local_fail_head);
>  
> @@ -930,6 +974,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  	struct resource *b_res = find_free_bus_resource(bus,
>  					mask | IORESOURCE_PREFETCH, type);
>  	resource_size_t children_add_size = 0;
> +	resource_size_t children_add_align = 0;
> +	resource_size_t add_align = 0;
>  
>  	if (!b_res)
>  		return -ENOSPC;
> @@ -954,6 +1000,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  			/* put SRIOV requested res to the optional list */
>  			if (realloc_head && i >= PCI_IOV_RESOURCES &&
>  					i <= PCI_IOV_RESOURCE_END) {
> +				add_align = max(pci_resource_alignment(dev, r), add_align);
>  				r->end = r->start - 1;
>  				add_to_list(realloc_head, dev, r, r_size, 0/* don't care */);
>  				children_add_size += r_size;
> @@ -984,8 +1031,11 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  			if (order > max_order)
>  				max_order = order;
>  
> -			if (realloc_head)
> +			if (realloc_head) {
>  				children_add_size += get_res_add_size(realloc_head, r);
> +				children_add_align = get_res_add_align(realloc_head, r);
> +				add_align = max(add_align, children_add_align);
> +			}
>  		}
>  	}
>  
> @@ -996,7 +1046,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  		add_size = children_add_size;
>  	size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
>  		calculate_memsize(size, min_size, add_size,
> -				resource_size(b_res), min_align);
> +				resource_size(b_res), max(min_align, add_align));
>  	if (!size0 && !size1) {
>  		if (b_res->start || b_res->end)
>  			dev_info(&bus->self->dev, "disabling bridge window %pR to %pR (unused)\n",
> @@ -1008,10 +1058,12 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  	b_res->end = size0 + min_align - 1;
>  	b_res->flags |= IORESOURCE_STARTALIGN;
>  	if (size1 > size0 && realloc_head) {
> -		add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
> -		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx\n",
> -			   b_res, &bus->busn_res,
> -			   (unsigned long long)size1-size0);
> +		add_to_list(realloc_head, bus->self, b_res, size1-size0,
> +				max(min_align, add_align));
> +		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
> +				 "%pR to %pR add_size %llx add_align %llx\n", b_res,
> +				 &bus->busn_res, (unsigned long long)size1-size0,
> +				 max(min_align, add_align));

Factor out this "max(min_align, add_align)" thing so we don't have to
change these lines.  Bonus points if you can also factor it out of the
calculate_memsize() call above.  That one is a pretty complicated ternary
expression that should probably be turned into an "if" instead anyway.

>  	}
>  	return 0;
>  }
> -- 
> 1.7.9.5
> 

WARNING: multiple messages have this Message-ID (diff)
From: Bjorn Helgaas <bhelgaas@google.com>
To: Wei Yang <weiyang@linux.vnet.ibm.com>
Cc: benh@au1.ibm.com, linux-pci@vger.kernel.org,
	gwshan@linux.vnet.ibm.com, qiudayu@linux.vnet.ibm.com,
	yan@linux.vnet.ibm.com, linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH V7 04/17] PCI: Take additional IOV BAR alignment in sizing and assigning
Date: Tue, 19 Aug 2014 21:08:41 -0600	[thread overview]
Message-ID: <20140820030841.GD6295@google.com> (raw)
In-Reply-To: <1406182947-11302-5-git-send-email-weiyang@linux.vnet.ibm.com>

On Thu, Jul 24, 2014 at 02:22:14PM +0800, Wei Yang wrote:
> At resource sizing/assigning stage, resources are divided into two lists,
> requested list and additional list, while the alignement of the additional
> IOV BAR is not taken into the sizeing and assigning procedure.
> 
> This is reasonable in the original implementation, since IOV BAR's alignment is
> mostly the size of a PF BAR alignemt. This means the alignment is already taken
> into consideration. While this rule may be violated on some platform.
> 
> This patch take the additional IOV BAR alignment in sizing and assigning stage
> explicitly.
> 
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> ---
>  drivers/pci/setup-bus.c |   68 +++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 60 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index a5a63ec..d83681f 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -120,6 +120,28 @@ static resource_size_t get_res_add_size(struct list_head *head,
>  	return 0;
>  }
>  
> +static resource_size_t get_res_add_align(struct list_head *head,
> +		struct resource *res)
> +{
> +	struct pci_dev_resource *dev_res;
> +
> +	list_for_each_entry(dev_res, head, list) {
> +		if (dev_res->res == res) {
> +			int idx = res - &dev_res->dev->resource[0];
> +
> +			dev_printk(KERN_DEBUG, &dev_res->dev->dev,
> +				   "res[%d]=%pR get_res_add_align min_align %llx\n",
> +				   idx, dev_res->res,
> +				   (unsigned long long)dev_res->min_align);
> +
> +			return dev_res->min_align;
> +		}
> +	}
> +
> +	return 0;
> +}

I see that you copied the structure of the existing get_res_add_size()
here.  But I don't understand *that* function.  It looks basically like
this:

  resource_size_t get_res_add_size(list, res)
  {
    list_for_each_entry(dev_res, head, list) {
      if (dev_res->res == res)
        return dev_res->add_size;
    }
    return 0;
  }

and we call it like this:

  dev_res->res->end += get_res_add_size(realloc_head, dev_res->res);

So we start out with dev_res", pass in dev_res->res, search the
realloc_head list to find dev_res again, and return dev_res->add_size.
That looks equivalent to just:

  dev_res->res->end += dev_res->add_size;

It looks like get_res_add_size() merely adds a printk and some complexity.
Am I missing something?

I do see that there are other callers where we don't actually start with
dev_res, which makes it a little more complicated.  But I think you should
either add something like this:

  struct pci_dev_resource *res_to_dev_res(list, res)
  {
    list_for_each_entry(dev_res, head, list) {
      if (dev_res->res == res)
        return dev_res;
    }
    return NULL;
  }

which can be used to replace get_res_add_size() and get_res_add_align(), OR
figure out whether the dev_res of interest is always one we've just added.
If it is, maybe you can just make add_to_list() return the dev_res pointer
instead of an errno, and hang onto the pointer.  I'd like that much better
if that's possible.

> +
> +
>  /* Sort resources by alignment */
>  static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head)
>  {
> @@ -368,8 +390,9 @@ static void __assign_resources_sorted(struct list_head *head,
>  	LIST_HEAD(save_head);
>  	LIST_HEAD(local_fail_head);
>  	struct pci_dev_resource *save_res;
> -	struct pci_dev_resource *dev_res, *tmp_res;
> +	struct pci_dev_resource *dev_res, *tmp_res, *dev_res2;
>  	unsigned long fail_type;
> +	resource_size_t add_align, align;
>  
>  	/* Check if optional add_size is there */
>  	if (!realloc_head || list_empty(realloc_head))
> @@ -384,10 +407,31 @@ static void __assign_resources_sorted(struct list_head *head,
>  	}
>  
>  	/* Update res in head list with add_size in realloc_head list */
> -	list_for_each_entry(dev_res, head, list)
> +	list_for_each_entry_safe(dev_res, tmp_res, head, list) {
>  		dev_res->res->end += get_res_add_size(realloc_head,
>  							dev_res->res);
>  
> +		if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
> +			continue;
> +
> +		add_align = get_res_add_align(realloc_head, dev_res->res);
> +
> +		if (add_align > dev_res->res->start) {
> +			dev_res->res->start = add_align;
> +			dev_res->res->end = add_align +
> +				            resource_size(dev_res->res);
> +
> +			list_for_each_entry(dev_res2, head, list) {
> +				align = pci_resource_alignment(dev_res2->dev,
> +							       dev_res2->res);
> +				if (add_align > align)
> +					list_move_tail(&dev_res->list,
> +						       &dev_res2->list);
> +			}
> +               }
> +
> +	}
> +
>  	/* Try updated head list with add_size added */
>  	assign_requested_resources_sorted(head, &local_fail_head);
>  
> @@ -930,6 +974,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  	struct resource *b_res = find_free_bus_resource(bus,
>  					mask | IORESOURCE_PREFETCH, type);
>  	resource_size_t children_add_size = 0;
> +	resource_size_t children_add_align = 0;
> +	resource_size_t add_align = 0;
>  
>  	if (!b_res)
>  		return -ENOSPC;
> @@ -954,6 +1000,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  			/* put SRIOV requested res to the optional list */
>  			if (realloc_head && i >= PCI_IOV_RESOURCES &&
>  					i <= PCI_IOV_RESOURCE_END) {
> +				add_align = max(pci_resource_alignment(dev, r), add_align);
>  				r->end = r->start - 1;
>  				add_to_list(realloc_head, dev, r, r_size, 0/* don't care */);
>  				children_add_size += r_size;
> @@ -984,8 +1031,11 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  			if (order > max_order)
>  				max_order = order;
>  
> -			if (realloc_head)
> +			if (realloc_head) {
>  				children_add_size += get_res_add_size(realloc_head, r);
> +				children_add_align = get_res_add_align(realloc_head, r);
> +				add_align = max(add_align, children_add_align);
> +			}
>  		}
>  	}
>  
> @@ -996,7 +1046,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  		add_size = children_add_size;
>  	size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
>  		calculate_memsize(size, min_size, add_size,
> -				resource_size(b_res), min_align);
> +				resource_size(b_res), max(min_align, add_align));
>  	if (!size0 && !size1) {
>  		if (b_res->start || b_res->end)
>  			dev_info(&bus->self->dev, "disabling bridge window %pR to %pR (unused)\n",
> @@ -1008,10 +1058,12 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  	b_res->end = size0 + min_align - 1;
>  	b_res->flags |= IORESOURCE_STARTALIGN;
>  	if (size1 > size0 && realloc_head) {
> -		add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
> -		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx\n",
> -			   b_res, &bus->busn_res,
> -			   (unsigned long long)size1-size0);
> +		add_to_list(realloc_head, bus->self, b_res, size1-size0,
> +				max(min_align, add_align));
> +		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
> +				 "%pR to %pR add_size %llx add_align %llx\n", b_res,
> +				 &bus->busn_res, (unsigned long long)size1-size0,
> +				 max(min_align, add_align));

Factor out this "max(min_align, add_align)" thing so we don't have to
change these lines.  Bonus points if you can also factor it out of the
calculate_memsize() call above.  That one is a pretty complicated ternary
expression that should probably be turned into an "if" instead anyway.

>  	}
>  	return 0;
>  }
> -- 
> 1.7.9.5
> 

  reply	other threads:[~2014-08-20  3:08 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-24  6:22 [PATCH V7 00/17] Enable SRIOV on POWER8 Wei Yang
2014-07-24  6:22 ` [PATCH V7 01/17] PCI/IOV: Export interface for retrieve VF's BDF Wei Yang
2014-08-19 21:37   ` Bjorn Helgaas
2014-08-19 21:37     ` Bjorn Helgaas
2014-08-20  2:25     ` Wei Yang
2014-08-20  2:25       ` Wei Yang
2014-07-24  6:22 ` [PATCH V7 02/17] PCI/IOV: Get VF BAR size from hardware directly when platform needs Wei Yang
2014-08-19 21:44   ` Bjorn Helgaas
2014-08-19 21:44     ` Bjorn Helgaas
2014-08-20  2:31     ` Wei Yang
2014-08-20  2:31       ` Wei Yang
2014-07-24  6:22 ` [PATCH V7 03/17] PCI: Add weak pcibios_sriov_resource_alignment() interface Wei Yang
2014-07-24  6:22 ` [PATCH V7 04/17] PCI: Take additional IOV BAR alignment in sizing and assigning Wei Yang
2014-08-20  3:08   ` Bjorn Helgaas [this message]
2014-08-20  3:08     ` Bjorn Helgaas
2014-08-20  6:14     ` Wei Yang
2014-08-20  6:14       ` Wei Yang
2014-08-28  2:34       ` Wei Yang
2014-08-28  2:34         ` Wei Yang
2014-09-09 20:09       ` Bjorn Helgaas
2014-09-09 20:09         ` Bjorn Helgaas
2014-09-10  3:27         ` Wei Yang
2014-09-10  3:27           ` Wei Yang
2014-07-24  6:22 ` [PATCH V7 05/17] powerpc/pci: Don't unset pci resources for VFs Wei Yang
2014-07-24  6:22 ` [PATCH V7 06/17] powerpc/pci: Define pcibios_disable_device() on powerpc Wei Yang
2014-07-24  6:22 ` [PATCH V7 07/17] powrepc/pci: Refactor pci_dn Wei Yang
2014-07-24  6:22 ` [PATCH V7 08/17] powerpc/powernv: Use pci_dn in PCI config accessor Wei Yang
2014-07-24  6:22 ` [PATCH V7 09/17] powerpc/powernv: mark IOV BAR with IORESOURCE_ARCH Wei Yang
2014-07-24  6:22 ` [PATCH V7 10/17] powerpc/powernv: Allocate pe->iommu_table dynamically Wei Yang
2014-07-24  6:22 ` [PATCH V7 11/17] powerpc/powernv: Add function to deconfig a PE Wei Yang
2014-07-24  6:22 ` [PATCH V7 12/17] powerpc/powernv: Expand VF resources according to the number of total_pe Wei Yang
2014-07-24  6:22 ` [PATCH V7 13/17] powerpc/powernv: Implement pcibios_sriov_resource_alignment on powernv Wei Yang
2014-07-24  6:22 ` [PATCH V7 14/17] powerpc/powernv: Shift VF resource with an offset Wei Yang
2014-07-24  6:22 ` [PATCH V7 15/17] powerpc/powernv: Allocate VF PE Wei Yang
2014-07-24  6:22 ` [PATCH V7 16/17] powerpc/powernv: Expanding IOV BAR, with m64_per_iov supported Wei Yang
2014-07-24  6:22 ` [PATCH V7 17/17] powerpc/powernv: Group VF PE when IOV BAR is big on PHB3 Wei Yang
2014-07-31  6:35 ` [PATCH V7 00/17] Enable SRIOV on POWER8 Benjamin Herrenschmidt
2014-07-31  6:35   ` Benjamin Herrenschmidt
2014-08-19 21:19 ` Bjorn Helgaas
2014-08-19 21:19   ` Bjorn Helgaas
2014-08-20  2:34   ` Wei Yang
2014-08-20  2:34     ` Wei Yang
2014-08-20  3:12     ` Bjorn Helgaas
2014-08-20  3:12       ` Bjorn Helgaas
2014-08-20  3:35       ` Wei Yang
2014-08-20  3:35         ` Wei Yang
2014-10-02 15:59         ` Bjorn Helgaas
2014-10-02 15:59           ` Bjorn Helgaas
2014-10-02 23:38           ` Gavin Shan
2014-10-02 23:38             ` Gavin Shan
2014-10-15  9:00           ` Wei Yang
2014-10-15  9:00             ` Wei Yang
2014-10-15 13:52             ` Bjorn Helgaas
2014-10-15 13:52               ` Bjorn Helgaas
2014-10-16  8:41               ` Wei Yang
2014-10-16  8:41                 ` Wei Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140820030841.GD6295@google.com \
    --to=bhelgaas@google.com \
    --cc=benh@au1.ibm.com \
    --cc=gwshan@linux.vnet.ibm.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=qiudayu@linux.vnet.ibm.com \
    --cc=weiyang@linux.vnet.ibm.com \
    --cc=yan@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.