From: Andrew Cooper <andrew.cooper3@citrix.com>
To: xen-devel@lists.xensource.com
Subject: Re: [PATCH] x86/PCI-MSI: properly determine VF BAR values
Date: Mon, 8 Aug 2011 11:27:31 +0100 [thread overview]
Message-ID: <4E3FBA13.3060804@citrix.com> (raw)
In-Reply-To: <4E3FC6E102000078000501CA@nat28.tlf.novell.com>
On 08/08/11 10:22, Jan Beulich wrote:
> As was discussed a couple of times on this list, SR-IOV virtual
> functions have their BARs read as zero - the physical function's SR-IOV
> capability structure must be consulted instead. The bogus warnings
> people complained about are being eliminated with this change.
>
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -522,12 +522,48 @@ static int msi_capability_init(struct pc
> return 0;
> }
>
> -static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
> +static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf)
> {
> u8 limit;
> - u32 addr;
> + u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0;
>
> - switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
> + if ( vf >= 0 )
> + {
> + struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func));
> + unsigned int pos = pci_find_ext_capability(0, bus,
> + PCI_DEVFN(slot, func),
> + PCI_EXT_CAP_ID_SRIOV);
> + u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
> + u16 num_vf = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_NUM_VF);
> + u16 offset = pci_conf_read16(bus, slot, func,
> + pos + PCI_SRIOV_VF_OFFSET);
> + u16 stride = pci_conf_read16(bus, slot, func,
> + pos + PCI_SRIOV_VF_STRIDE);
> +
> + if ( !pdev || !pos ||
> + !(ctrl & PCI_SRIOV_CTRL_VFE) ||
> + !(ctrl & PCI_SRIOV_CTRL_MSE) ||
> + !num_vf || !offset || (num_vf > 1 && !stride) ||
> + bir >= PCI_SRIOV_NUM_BARS ||
> + !pdev->vf_rlen[bir] )
> + return 0;
> + base = pos + PCI_SRIOV_BAR;
> + vf -= PCI_BDF(bus, slot, func) + offset;
> + if ( vf < 0 || (vf && vf % stride) )
> + return 0;
> + if ( stride )
> + {
> + if ( vf % stride )
> + return 0;
> + vf /= stride;
> + }
> + if ( vf >= num_vf )
> + return 0;
> + BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
> + disp = vf * pdev->vf_rlen[bir];
> + limit = PCI_SRIOV_NUM_BARS;
> + }
> + else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
> {
> case PCI_HEADER_TYPE_NORMAL:
> limit = 6;
> @@ -544,7 +580,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
>
> if ( bir >= limit )
> return 0;
> - addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4);
> + addr = pci_conf_read32(bus, slot, func, base + bir * 4);
> if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
> return 0;
> if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
> @@ -552,11 +588,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
> addr &= PCI_BASE_ADDRESS_MEM_MASK;
> if ( ++bir >= limit )
> return 0;
> - return addr |
> - ((u64)pci_conf_read32(bus, slot, func,
> - PCI_BASE_ADDRESS_0 + bir * 4) << 32);
> + return addr + disp +
> + ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32);
> }
> - return addr & PCI_BASE_ADDRESS_MEM_MASK;
> + return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
> }
>
> /**
> @@ -629,11 +664,29 @@ static int msix_capability_init(struct p
>
> if ( !dev->msix_nr_entries )
> {
> + u8 pbus, pslot, pfunc;
> + int vf;
> u64 pba_paddr;
> u32 pba_offset;
>
> + if ( !dev->info.is_virtfn )
> + {
> + pbus = bus;
> + pslot = slot;
> + pfunc = func;
> + vf = -1;
> + }
> + else
> + {
> + pbus = dev->info.physfn.bus;
> + pslot = PCI_SLOT(dev->info.physfn.devfn);
> + pfunc = PCI_FUNC(dev->info.physfn.devfn);
> + vf = PCI_BDF2(dev->bus, dev->devfn);
> + }
> +
> ASSERT(!dev->msix_used_entries);
> - WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir));
> + WARN_ON(msi->table_base !=
> + read_pci_mem_bar(pbus, pslot, pfunc, bir, vf));
>
> dev->msix_nr_entries = nr_entries;
> dev->msix_table.first = PFN_DOWN(table_paddr);
> @@ -645,7 +698,7 @@ static int msix_capability_init(struct p
> pba_offset = pci_conf_read32(bus, slot, func,
> msix_pba_offset_reg(pos));
> bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
> - pba_paddr = read_pci_mem_bar(bus, slot, func, bir);
> + pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf);
> WARN_ON(!pba_paddr);
> pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
>
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -146,6 +146,7 @@ void pci_enable_acs(struct pci_dev *pdev
> int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
> {
> struct pci_dev *pdev;
> + unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
> const char *pdev_type;
> int ret = -ENOMEM;
>
> @@ -154,7 +155,14 @@ int pci_add_device(u8 bus, u8 devfn, con
> else if (info->is_extfn)
> pdev_type = "extended function";
> else if (info->is_virtfn)
> + {
> + spin_lock(&pcidevs_lock);
> + pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn);
> + spin_unlock(&pcidevs_lock);
> + if ( !pdev )
> + pci_add_device(info->physfn.bus, info->physfn.devfn, NULL);
> pdev_type = "virtual function";
> + }
> else
> return -EINVAL;
>
> @@ -165,6 +173,70 @@ int pci_add_device(u8 bus, u8 devfn, con
>
> if ( info )
> pdev->info = *info;
> + else if ( !pdev->vf_rlen[0] )
> + {
> + unsigned int pos = pci_find_ext_capability(0, bus, devfn,
> + PCI_EXT_CAP_ID_SRIOV);
> + u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
> +
> + if ( !pos )
> + /* Nothing */;
> + else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
> + {
> + unsigned int i;
> +
> + BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
> + for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
> + {
> + unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
> + u32 bar = pci_conf_read32(bus, slot, func, idx);
> + u32 hi = 0;
> +
> + if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
> + PCI_BASE_ADDRESS_SPACE_IO )
> + {
> + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf"
> + " BAR%u in IO space\n",
> + bus, slot, func, i);
> + continue;
> + }
> + pci_conf_write32(bus, slot, func, idx, ~0);
> + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> + PCI_BASE_ADDRESS_MEM_TYPE_64 )
> + {
> + if ( i >= PCI_SRIOV_NUM_BARS )
> + {
> + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with"
> + " 64-bit vf BAR in last slot\n",
> + bus, slot, func);
> + break;
> + }
> + hi = pci_conf_read32(bus, slot, func, idx + 4);
> + pci_conf_write32(bus, slot, func, idx + 4, ~0);
> + }
> + pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) &
> + PCI_BASE_ADDRESS_MEM_MASK;
> + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> + PCI_BASE_ADDRESS_MEM_TYPE_64 )
> + {
> + pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func,
> + idx + 4) << 32;
> + pci_conf_write32(bus, slot, func, idx + 4, hi);
> + }
> + else if ( pdev->vf_rlen[i] )
> + pdev->vf_rlen[i] |= (u64)~0 << 32;
> + pci_conf_write32(bus, slot, func, idx, bar);
> + pdev->vf_rlen[i] = -pdev->vf_rlen[i];
> + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> + PCI_BASE_ADDRESS_MEM_TYPE_64 )
> + ++i;
> + }
> + }
> + else
> + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual"
> + " functions already enabled (%04x)\n",
> + bus, slot, func, ctrl);
> + }
>
> ret = 0;
> if ( !pdev->domain )
> @@ -184,7 +256,7 @@ int pci_add_device(u8 bus, u8 devfn, con
> out:
> spin_unlock(&pcidevs_lock);
> printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
> - bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
> + bus, slot, func);
> return ret;
> }
>
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -59,6 +59,7 @@ struct pci_dev {
> const u8 bus;
> const u8 devfn;
> struct pci_dev_info info;
> + u64 vf_rlen[6];
> };
>
> #define for_each_pdev(domain, pdev) \
> --- a/xen/include/xen/pci_regs.h
> +++ b/xen/include/xen/pci_regs.h
> @@ -425,7 +425,7 @@
> #define PCI_EXT_CAP_ID_ACS 13
> #define PCI_EXT_CAP_ID_ARI 14
> #define PCI_EXT_CAP_ID_ATS 15
> -#define PCI_EXT_CAP_ID_IOV 16
> +#define PCI_EXT_CAP_ID_SRIOV 16
>
> /* Advanced Error Reporting */
> #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
> @@ -545,4 +545,35 @@
> #define PCI_ACS_CTRL 0x06 /* ACS Control Register */
> #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */
>
> +/* Single Root I/O Virtualization */
> +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */
> +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */
> +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
> +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */
> +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */
> +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */
> +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */
> +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */
> +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */
> +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */
> +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */
> +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */
> +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */
> +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */
> +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */
> +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */
> +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */
> +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */
> +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */
> +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */
> +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */
> +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */
> +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/
> +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */
> +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */
> +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */
> +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */
> +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */
> +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */
> +
> #endif /* LINUX_PCI_REGS_H */
>
>
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
prev parent reply other threads:[~2011-08-08 10:27 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-08 9:22 [PATCH] x86/PCI-MSI: properly determine VF BAR values Jan Beulich
2011-08-08 10:27 ` Andrew Cooper [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4E3FBA13.3060804@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.