All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Zhao, Yu" <yu.zhao@intel.com>
To: "Yuji Shimada <shimada-yxb@necst.nec.co.jp>; Keir Fraser"
	<keir.fraser@citrix.com>
Cc: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Subject: Re: [PATCH] dom0 linux: Reassign memory resources to device	for pci passthrough.
Date: Fri, 10 Oct 2008 15:05:29 +0800	[thread overview]
Message-ID: <48EEFEB9.70407@intel.com> (raw)
In-Reply-To: <20081009083706.BF13.SHIMADA-YXB@necst.nec.co.jp>

Yuji & Keir,

I guess the comments I gave yesterday is obscure, so I wrote up more 
details this time. Please take a look.

Thanks,
Yu

 > To reassign resources, please add boot parameters of dom0 linux as 
follows.
 >
 >     reassign_resources reassigndev=00:1d.7,01:00.0
 >
 >         reassign_resources
 >                         Enables reassigning resources.
 >
 >         reassigndev=    Specifies devices include I/O device and PCI-PCI
 >                         bridge to reassign resources.  PCI-PCI bridge
 >                         can be specified, if resource windows need to
 >                         be expanded.

Generally, it doesn't work as claimed from my observation.

>  /* This quirk function enables us to force all memory resources which are
>   * assigned to PCI devices, to be page-aligned.
>   */
> @@ -41,6 +54,42 @@
>         int i;
>         struct resource *r;
>         resource_size_t old_start;
> +
> +       if (reassign_resources) {
> +               if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
> +                   (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
> +                       /* PCI Host Bridge isn't a target device */

The comments is wrong -- we get invalid device type here, not the host 
bridge. Host bridge doesn't have a 'dev' at all.

> +                       return;
> +               }
> +               if (is_reassigndev(dev)) {
> +                       printk(KERN_INFO
> +                               "PCI: Disable device and release resources"
> +                               " [%s].\n", pci_name(dev));
> +                       pci_disable_device(dev);
> +
> +                       for (i=0; i < PCI_NUM_RESOURCES; i++) {
> +                               r = &dev->resource[i];
> +                               if ((r == NULL) ||

How can 'r' be NULL?

> +                                  !(r->flags & IORESOURCE_MEM))
> +                                       continue;
> +
> +                               r->end = r->end - r->start;
> +                               r->start = 0;
> +
> +                               if (i < PCI_BRIDGE_RESOURCES) {
> +                                       pci_update_resource(dev, r, i);
> +                               } else if (i == 8 || i == 9) {

The bridge even hasn't been scanned yet so this will *never* execute as 
expected.

> +                                       /* need to update(clear) the Base/Limit
> +                                        * register also, because PCI bridge is
> +                                        * disabled and the resource is
> +                                        * released.
> +                                        */
> +                                       pci_update_bridge(dev, i);
> +                               }
> +                       }
> +               }
> +               return;
> +       }
> 
>         if (!pci_mem_align)
>                 return;

...

> diff -r b54652ee29ef drivers/pci/setup-bus.c
> --- a/drivers/pci/setup-bus.c   Thu Oct 02 11:29:02 2008 +0100
> +++ b/drivers/pci/setup-bus.c   Wed Oct 08 12:12:27 2008 +0900
> @@ -26,6 +26,7 @@
>  #include <linux/cache.h>
>  #include <linux/slab.h>
> 
> +#include "pci.h"
> 
>  #define DEBUG_CONFIG 1
>  #if DEBUG_CONFIG
> @@ -344,7 +345,8 @@
> 
>         list_for_each_entry(dev, &bus->devices, bus_list) {
>                 int i;

It most likely couldn't get here because the x86 PCI low level code has 
allocated resources for the bridge according to the BIOS and the 
function returns early.

> -
> +               int reassign = reassign_resources ? is_reassigndev(dev) : 0;
> +
>                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>                         struct resource *r = &dev->resource[i];
>                         unsigned long r_size;
> @@ -352,6 +354,11 @@
>                         if (r->parent || (r->flags & mask) != type)
>                                 continue;
>                         r_size = r->end - r->start + 1;
> +
> +                       if (reassign) {
> +                               r_size = ROUND_UP_TO_PAGESIZE(r_size);
> +                       }
> +
>                         /* For bridges size != alignment */
>                         align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
>                         order = __ffs(align) - 20;
> diff -r b54652ee29ef drivers/pci/setup-res.c
> --- a/drivers/pci/setup-res.c   Thu Oct 02 11:29:02 2008 +0100
> +++ b/drivers/pci/setup-res.c   Wed Oct 08 12:12:27 2008 +0900
> @@ -117,19 +117,96 @@
>  }
>  EXPORT_SYMBOL_GPL(pci_claim_resource);
> 
> +void
> +pci_update_bridge(struct pci_dev *dev, int resno)
> +{
> +       struct resource *res = &dev->resource[resno];
> +       struct pci_bus_region region;
> +       u32 l, dw, base_up32, limit_up32;
> +
> +       if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE ||
> +           (dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) {
> +               return;
> +       }
> +
> +       if (!res->flags)
> +               return;
> +
> +       switch (resno) {
> +       case 8 :        /* MMIO Base/Limit */
> +               pcibios_resource_to_bus(dev, &region, res);
> +               if (res->flags & IORESOURCE_MEM &&
> +                   !(res->flags & IORESOURCE_PREFETCH)) {
> +                       l = (region.start >> 16) & 0xfff0;
> +                       l |= region.end & 0xfff00000;
> +               } else {
> +                       l = 0x0000fff0;
> +               }
> +               pci_write_config_dword(dev, PCI_MEMORY_BASE, l);
> +
> +               break;
> +
> +       case 9 :        /* Prefetchable MMIO Base/Limit */
> +               /* Clear out the upper 32 bits of PREF limit.
> +                * If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
> +                * disables PREF range, which is ok.
> +                */
> +               pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
> +
> +               /* Get PREF 32/64 bits Addressing mode */
> +               pci_read_config_dword(dev, PCI_PREF_MEMORY_BASE, &dw);
> +
> +               pcibios_resource_to_bus(dev, &region, res);
> +               if (res->flags & IORESOURCE_MEM &&
> +                   res->flags & IORESOURCE_PREFETCH) {
> +                       l = (region.start >> 16) & 0xfff0;
> +                       l |= region.end & 0xfff00000;
> +
> +                       if (dw & PCI_PREF_RANGE_TYPE_64) {
> +                               base_up32 = (region.start >> 32) & 0xffffffff;
> +                               limit_up32 = (region.end >> 32) & 0xffffffff;
> +                       } else {
> +                               base_up32 = 0;
> +                               limit_up32 = 0;
> +                       }
> +               } else {
> +                       l = 0x0000fff0;
> +                       base_up32 = 0xffffffff;
> +                       limit_up32 = 0;
> +               }
> +               pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, l);
> +               /* Set up the upper 32 bits of PREF base/limit. */
> +               pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, base_up32);
> +               pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, limit_up32);
> +               break;
> +       default :
> +               BUG();
> +               break;
> +       }
> +}
> +

I don't see how this function 'expend' the resource window (even it was 
invoked). Actually it has many problems: 1, the dev->resource is not 
updated hence software doesn't know the new resource window size that 
can be granted to subordinate devices. 2, the values might be 
overwritten later by pci_setup_bridge. 3, the registers are changed 
without checking if resource range are available, which means it may 
conflict with other bridge.

>  int pci_assign_resource(struct pci_dev *dev, int resno)
>  {
>         struct pci_bus *bus = dev->bus;
>         struct resource *res = dev->resource + resno;
>         resource_size_t size, min, align;
>         int ret;
> +       int reassigndev = reassign_resources ? is_reassigndev(dev) : 0;
> 
>         size = res->end - res->start + 1;
>         min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
>         /* The bridge resources are special, as their
>            size != alignment. Sizing routines return
>            required alignment in the "start" field. */
> -       align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start;
> +       if (resno < PCI_BRIDGE_RESOURCES) {
> +               align = size;
> +               if ((reassigndev) &&
> +                   (res->flags & IORESOURCE_MEM)) {
> +                       align = ROUND_UP_TO_PAGESIZE(align);

Different MMIO resrouces of a device may require different alignments. 
Using page size for all may cause problem. The alignment should be a 
bigger value between page size and resource size. Or the best way is 
keep aligned resources of a device and just reassign the unaligned 
resources in the quirk_align_mem_resources.

> +               }
> +       } else {
> +               align = res->start;
> +       }
> 
>         /* First, try exact prefetching match.. */
>         ret = pci_bus_alloc_resource(bus, res, size, align, min,
> @@ -154,6 +231,9 @@
>                         resno, (unsigned long long)size,
>                         (unsigned long long)res->start, pci_name(dev));
>         } else if (resno < PCI_BRIDGE_RESOURCES) {
> +               printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
> +                       "%016llx - %016llx\n", resno, pci_name(dev),
> +                       (u64)res->start, (u64)res->end);
>                 pci_update_resource(dev, res, resno);
>         }
> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

  parent reply	other threads:[~2008-10-10  7:05 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-08 23:43 [PATCH] dom0 linux: Reassign memory resources to device for pci passthrough Yuji Shimada
2008-10-09  1:04 ` Neo Jia
2008-10-09  8:04   ` Yuji Shimada
2008-10-09  3:21 ` Zhao, Yu
2008-10-09  8:54   ` Yuji Shimada
2008-10-09 11:34     ` Zhao, Yu
2008-10-09  7:20 ` Yuji Shimada
2008-10-10  7:05 ` Zhao, Yu [this message]
2008-10-10 10:13   ` Yuji Shimada

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48EEFEB9.70407@intel.com \
    --to=yu.zhao@intel.com \
    --cc=keir.fraser@citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.