All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Zhao, Yu" <yu.zhao@intel.com>
To: Keir Fraser <keir.fraser@eu.citrix.com>
Cc: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Subject: Re: Xen 3.4 code freeze
Date: Wed, 18 Mar 2009 17:22:35 +0800	[thread overview]
Message-ID: <49C0BD5B.5000500@intel.com> (raw)
In-Reply-To: <C5E65404.56B6%keir.fraser@eu.citrix.com>

[-- Attachment #1: Type: text/plain, Size: 707 bytes --]

Hi Keir,

There are some native kernel SR-IOV patches that would be accepted by 
the maintainer soon, and I planed to backport them to Xen/Dom0 then. 
However, it looks they can't be in the native kernel tree before the 
code freeze. I attached the backported patches, if it's possible, can 
you please take them for 3.4?

Thanks,
Yu

Keir Fraser wrote:
> Folks,
> 
> My plan is to take no further large feature patchsets into xen-unstable until 3.4 is branched, the only exception possibly being Dan’s tmem patches. Furthermore, after Friday I’m away for a week and when I get back I intend to shake the trees into better shape and accept bug-fix patches only.
> 
>  -- Keir
> 
> 


[-- Attachment #2: xen.patch --]
[-- Type: text/plain, Size: 6153 bytes --]

# HG changeset patch
# User Yu Zhao <yu.zhao@intel.com>
# Date 1237366099 14400
# Node ID cdf29bb6c74d5e075099855e9ddffb27e633079a
# Parent  9fc957e63f8dc0fdb2400eb424da4c1122b7ac65
Xen: use proper device ID to search VT-d unit for ARI and SR-IOV device

PCIe Alternative Routing-ID Interpretation (ARI) ECN defines the Extended
Function -- a function whose function number is greater than 7 within an
ARI Device. Intel VT-d spec 1.2 section 8.3.2 specifies that the Extended
Function is under the scope of the same remapping unit as the traditional
function. The hypervisor needs to know if a function is Extended Function
so it can find proper DMAR for it.

And section 8.3.3 specifies that the SR-IOV Virtual Function is under the
scope of the same remapping unit as the Physical Function. The hypervisor
also needs to know if a function is the Virtual Function and which Physical
Function it's associated with for same reason.

diff -r 9fc957e63f8d -r cdf29bb6c74d xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c	Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/arch/ia64/xen/hypercall.c	Wed Mar 18 04:48:19 2009 -0400
@@ -650,6 +650,7 @@
 
     case PHYSDEVOP_manage_pci_add: {
         struct physdev_manage_pci manage_pci;
+        struct pci_dev dev;
         ret = -EPERM;
         if ( !IS_PRIV(current->domain) )
             break;
@@ -657,7 +658,13 @@
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+        dev.bus = manage_pci.bus;
+        dev.devfn = manage_pci.devfn;
+        dev.is_extfn = manage_pci.is_extfn;
+        dev.is_virtfn = manage_pci.is_virtfn;
+        dev.physfn.bus = manage_pci.physfn.bus;
+        dev.physfn.devfn = manage_pci.physfn.devfn;
+        ret = pci_add_device(&dev);
             break;
     }
 
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c	Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/arch/x86/physdev.c	Wed Mar 18 04:48:19 2009 -0400
@@ -397,6 +397,7 @@
 
     case PHYSDEVOP_manage_pci_add: {
         struct physdev_manage_pci manage_pci;
+        struct pci_dev dev;
         ret = -EPERM;
         if ( !IS_PRIV(v->domain) )
             break;
@@ -404,7 +405,13 @@
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+        dev.bus = manage_pci.bus;
+        dev.devfn = manage_pci.devfn;
+        dev.is_extfn = manage_pci.is_extfn;
+        dev.is_virtfn = manage_pci.is_virtfn;
+        dev.physfn.bus = manage_pci.physfn.bus;
+        dev.physfn.devfn = manage_pci.physfn.devfn;
+        ret = pci_add_device(&dev);
         break;
     }
 
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c	Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/drivers/passthrough/pci.c	Wed Mar 18 04:48:19 2009 -0400
@@ -43,8 +43,8 @@
         return NULL;
     memset(pdev, 0, sizeof(struct pci_dev));
 
-    *((u8*) &pdev->bus) = bus;
-    *((u8*) &pdev->devfn) = devfn;
+    pdev->bus = bus;
+    pdev->devfn = devfn;
     pdev->domain = NULL;
     INIT_LIST_HEAD(&pdev->msi_list);
     list_add(&pdev->alldevs_list, &alldevs_list);
@@ -92,15 +92,20 @@
     return NULL;
 }
 
-int pci_add_device(u8 bus, u8 devfn)
+int pci_add_device(struct pci_dev *dev)
 {
     struct pci_dev *pdev;
     int ret = -ENOMEM;
 
     spin_lock(&pcidevs_lock);
-    pdev = alloc_pdev(bus, devfn);
+    pdev = alloc_pdev(dev->bus, dev->devfn);
     if ( !pdev )
         goto out;
+
+    pdev->is_extfn = dev->is_extfn;
+    pdev->is_virtfn = dev->is_virtfn;
+    pdev->physfn.bus = dev->physfn.bus;
+    pdev->physfn.devfn = dev->physfn.devfn;
 
     ret = 0;
     if ( !pdev->domain )
@@ -115,8 +120,8 @@
 
 out:
     spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
-           PCI_SLOT(devfn), PCI_FUNC(devfn));
+    printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", dev->bus,
+           PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
     return ret;
 }
 
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c	Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/drivers/passthrough/vtd/dmar.c	Wed Mar 18 04:48:19 2009 -0400
@@ -157,6 +157,17 @@
     struct acpi_drhd_unit *drhd;
     struct acpi_drhd_unit *found = NULL, *include_all = NULL;
     int i;
+    struct pci_dev *dev;
+
+    dev = pci_get_pdev(bus, devfn);
+    BUG_ON(!dev);
+
+    if (dev->is_extfn) {
+        devfn = 0;
+    } else if (dev->is_virtfn) {
+        bus = dev->physfn.bus;
+        devfn = PCI_SLOT(dev->physfn.devfn) ? 0 : dev->physfn.devfn;
+    }
 
     list_for_each_entry ( drhd, &acpi_drhd_units, list )
     {
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/include/public/physdev.h
--- a/xen/include/public/physdev.h	Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/include/public/physdev.h	Wed Mar 18 04:48:19 2009 -0400
@@ -178,6 +178,12 @@
     /* IN */
     uint8_t bus;
     uint8_t devfn;
+    unsigned is_extfn:1;
+    unsigned is_virtfn:1;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
 }; 
 
 typedef struct physdev_manage_pci physdev_manage_pci_t;
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/include/xen/pci.h
--- a/xen/include/xen/pci.h	Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/include/xen/pci.h	Wed Mar 18 04:48:19 2009 -0400
@@ -41,8 +41,14 @@
     spinlock_t msix_table_lock;
 
     struct domain *domain;
-    const u8 bus;
-    const u8 devfn;
+    u8 bus;
+    u8 devfn;
+    unsigned is_extfn:1;
+    unsigned is_virtfn:1;
+    struct {
+        u8 bus;
+        u8 devfn;
+    } physfn;
 };
 
 #define for_each_pdev(domain, pdev) \
@@ -62,7 +68,7 @@
 struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
 
 void pci_release_devices(struct domain *d);
-int pci_add_device(u8 bus, u8 devfn);
+int pci_add_device(struct pci_dev *dev);
 int pci_remove_device(u8 bus, u8 devfn);
 struct pci_dev *pci_get_pdev(int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);

[-- Attachment #3: dom0-1.patch --]
[-- Type: text/plain, Size: 12451 bytes --]

# HG changeset patch
# User Yu Zhao <yu.zhao@intel.com>
# Date 1237268585 14400
# Node ID 92730fa710446b2502809faa72bb29fda95ba878
# Parent  e8a9f8910a3f113759906e493eaa211e2c43cd85
PCI: initialize and release SR-IOV capability

If a device has the SR-IOV capability, initialize it (set the ARI
Capable Hierarchy in the lowest numbered PF if necessary; calculate
the System Page Size for the VF MMIO, probe the VF Offset, Stride
and BARs). A lock for the VF bus allocation is also initialized if
a PF is the lowest numbered PF.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>

diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/Kconfig
--- a/drivers/pci/Kconfig	Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/Kconfig	Tue Mar 17 01:43:05 2009 -0400
@@ -37,3 +37,12 @@
 	help
 	  Say Y here if you want to reserve PCI device for passthrough.
 
+config PCI_IOV
+	bool "PCI IOV support"
+	depends on PCI
+	help
+	  PCI-SIG I/O Virtualization (IOV) Specifications support.
+	  Single Root IOV: allows the creation of virtual PCI devices
+	  that share the physical resources from a real device.
+
+	  When in doubt, say N.
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/Makefile
--- a/drivers/pci/Makefile	Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/Makefile	Tue Mar 17 01:43:05 2009 -0400
@@ -15,6 +15,8 @@
 
 # Build the PCI Hotplug drivers if we were asked to
 obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
+
+obj-$(CONFIG_PCI_IOV) += iov.o
 
 #
 # Some architectures use the generic PCI setup functions
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/iov.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/pci/iov.c	Tue Mar 17 01:43:05 2009 -0400
@@ -0,0 +1,175 @@
+/*
+ * drivers/pci/iov.c
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
+ *
+ * PCI Express I/O Virtualization (IOV) support.
+ *   Single Root IOV 1.0
+ */
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include "pci.h"
+
+
+static int sriov_init(struct pci_dev *dev, int pos)
+{
+	int i;
+	int rc;
+	int nres;
+	u32 pgsz;
+	u16 ctrl, total, offset, stride;
+	struct pci_sriov *iov;
+	struct resource *res;
+	struct pci_dev *pdev;
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE) {
+		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
+		ssleep(1);
+	}
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
+	if (!total)
+		return 0;
+
+	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+		if (pdev->is_physfn)
+			break;
+	if (list_empty(&dev->bus->devices) || !pdev->is_physfn)
+		pdev = NULL;
+
+	ctrl = 0;
+	if (!pdev && pci_ari_enabled(dev->bus))
+		ctrl |= PCI_SRIOV_CTRL_ARI;
+
+	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
+	pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
+	if (!offset || (total > 1 && !stride))
+		return -EIO;
+
+	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
+	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+	pgsz &= ~((1 << i) - 1);
+	if (!pgsz)
+		return -EIO;
+
+	pgsz &= ~(pgsz - 1);
+	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		i += __pci_read_base(dev, pci_bar_unknown, res,
+				     pos + PCI_SRIOV_BAR + i * 4);
+		if (!res->flags)
+			continue;
+		if ((res->end - res->start + 1) & (PAGE_SIZE - 1)) {
+			rc = -EIO;
+			goto failed;
+		}
+		res->end = res->start + (res->end - res->start + 1) * total - 1;
+		nres++;
+	}
+
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov) {
+		rc = -ENOMEM;
+		goto failed;
+	}
+
+	iov->pos = pos;
+	iov->nres = nres;
+	iov->ctrl = ctrl;
+	iov->total = total;
+	iov->offset = offset;
+	iov->stride = stride;
+	iov->pgsz = pgsz;
+	iov->self = dev;
+	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+	if (pdev)
+		iov->dev = pci_dev_get(pdev);
+	else {
+		iov->dev = dev;
+		mutex_init(&iov->lock);
+	}
+
+	dev->sriov = iov;
+	dev->is_physfn = 1;
+
+	return 0;
+
+failed:
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		res->flags = 0;
+	}
+
+	return rc;
+}
+
+static void sriov_release(struct pci_dev *dev)
+{
+	if (dev == dev->sriov->dev)
+		mutex_destroy(&dev->sriov->lock);
+	else
+		pci_dev_put(dev->sriov->dev);
+
+	kfree(dev->sriov);
+	dev->sriov = NULL;
+}
+
+/**
+ * pci_iov_init - initialize the IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+	int pos;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos)
+		return sriov_init(dev, pos);
+
+	return -ENODEV;
+}
+
+/**
+ * pci_iov_release - release resources used by the IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_release(dev);
+}
+
+/**
+ * pci_iov_resource_bar - get position of the SR-IOV BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ * @type: the BAR type to be filled in
+ *
+ * Returns position of the BAR encapsulated in the SR-IOV capability.
+ */
+int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+			 enum pci_bar_type *type)
+{
+	if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
+		return 0;
+
+	BUG_ON(!dev->is_physfn);
+
+	*type = pci_bar_unknown;
+
+	return dev->sriov->pos + PCI_SRIOV_BAR +
+		4 * (resno - PCI_IOV_RESOURCES);
+}
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/pci.c
--- a/drivers/pci/pci.c	Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/pci.c	Tue Mar 17 01:43:05 2009 -0400
@@ -1048,12 +1048,19 @@
  */
 int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 {
+	int reg;
+
 	if (resno < PCI_ROM_RESOURCE) {
 		*type = pci_bar_unknown;
 		return PCI_BASE_ADDRESS_0 + 4 * resno;
 	} else if (resno == PCI_ROM_RESOURCE) {
 		*type = pci_bar_mem32;
 		return dev->rom_base_reg;
+	} else if (resno < PCI_BRIDGE_RESOURCES) {
+		/* device specific resource */
+		reg = pci_iov_resource_bar(dev, resno, type);
+		if (reg)
+			return reg;
 	}
 
 	dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/pci.h
--- a/drivers/pci/pci.h	Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/pci.h	Tue Mar 17 01:43:05 2009 -0400
@@ -132,3 +132,40 @@
 {
 	return bus->self && bus->self->ari_enabled;
 }
+
+/* Single Root I/O Virtualization */
+struct pci_sriov {
+	int pos;		/* capability position */
+	int nres;		/* number of resources */
+	u32 cap;		/* SR-IOV Capabilities */
+	u16 ctrl;		/* SR-IOV Control */
+	u16 total;		/* total VFs associated with the PF */
+	u16 offset;		/* first VF Routing ID offset */
+	u16 stride;		/* following VF stride */
+	u32 pgsz;		/* page size for BAR alignment */
+	u8 link;		/* Function Dependency Link */
+	struct pci_dev *dev;	/* lowest numbered PF */
+	struct pci_dev *self;	/* this PF */
+	struct mutex lock;	/* lock for VF bus */
+};
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_init(struct pci_dev *dev);
+extern void pci_iov_release(struct pci_dev *dev);
+extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+				enum pci_bar_type *type);
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+static inline void pci_iov_release(struct pci_dev *dev)
+
+{
+}
+static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+				       enum pci_bar_type *type)
+{
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/probe.c
--- a/drivers/pci/probe.c	Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/probe.c	Tue Mar 17 01:43:05 2009 -0400
@@ -765,6 +765,9 @@
 	struct pci_dev *pci_dev;
 
 	pci_dev = to_pci_dev(dev);
+
+	pci_iov_release(pci_dev);
+
 	kfree(pci_dev);
 }
 
@@ -891,6 +894,9 @@
 
 	/* Alternative Routing-ID Forwarding */
 	pci_enable_ari(dev);
+
+	/* Single Root I/O Virtualization */
+	pci_iov_init(dev);
 
 	/*
 	 * Add the device to our list of discovered devices
diff -r e8a9f8910a3f -r 92730fa71044 include/linux/pci.h
--- a/include/linux/pci.h	Fri Mar 13 10:08:22 2009 +0000
+++ b/include/linux/pci.h	Tue Mar 17 01:43:05 2009 -0400
@@ -77,6 +77,12 @@
 	/* #6: expansion ROM resource */
 	PCI_ROM_RESOURCE,
 
+	/* device specific resources */
+#ifdef CONFIG_PCI_IOV
+	PCI_IOV_RESOURCES,
+	PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1,
+#endif
+
 	/* resources assigned to buses behind the bridge */
 #define PCI_BRIDGE_RESOURCE_NUM 4
 
@@ -127,6 +133,8 @@
 	char cap_nr;
 	u32 data[0];
 };
+
+struct pci_sriov;
 
 /*
  * The pci_dev structure is used to describe PCI devices.
@@ -189,13 +197,17 @@
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int 	msi_enabled:1;
 	unsigned int	msix_enabled:1;
+	unsigned int	ari_enabled:1;	/* ARI forwarding */
+	unsigned int	is_physfn:1;
 
 	u32		saved_config_space[16]; /* config space saved at suspend time */
 	struct hlist_head saved_cap_space;
 	struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
 	int rom_attr_enabled;		/* has display of the rom attribute been enabled? */
 	struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
-	unsigned int	ari_enabled:1;	/* ARI forwarding */
+#ifdef CONFIG_PCI_IOV
+	struct pci_sriov *sriov;	/* SR-IOV capability related */
+#endif
 };
 
 #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
diff -r e8a9f8910a3f -r 92730fa71044 include/linux/pci_regs.h
--- a/include/linux/pci_regs.h	Fri Mar 13 10:08:22 2009 +0000
+++ b/include/linux/pci_regs.h	Tue Mar 17 01:43:05 2009 -0400
@@ -332,6 +332,7 @@
 #define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
+#define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
 #define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
 #define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
 #define PCI_EXP_DEVCAP		4	/* Device capabilities */
@@ -393,6 +394,7 @@
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
 #define PCI_EXT_CAP_ID_ARI	14
+#define PCI_EXT_CAP_ID_SRIOV	16
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -478,4 +480,35 @@
 #define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
 #define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
 
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
+#define  PCI_SRIOV_CAP_VFM	0x01	/* VF Migration Capable */
+#define  PCI_SRIOV_CAP_INTR(x)	((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL		0x08	/* SR-IOV Control */
+#define  PCI_SRIOV_CTRL_VFE	0x01	/* VF Enable */
+#define  PCI_SRIOV_CTRL_VFM	0x02	/* VF Migration Enable */
+#define  PCI_SRIOV_CTRL_INTR	0x04	/* VF Migration Interrupt Enable */
+#define  PCI_SRIOV_CTRL_MSE	0x08	/* VF Memory Space Enable */
+#define  PCI_SRIOV_CTRL_ARI	0x10	/* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS	0x0a	/* SR-IOV Status */
+#define  PCI_SRIOV_STATUS_VFM	0x01	/* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF	0x0c	/* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF	0x0e	/* Total VFs */
+#define PCI_SRIOV_NUM_VF	0x10	/* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK	0x12	/* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET	0x14	/* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE	0x16	/* Following VF Stride */
+#define PCI_SRIOV_VF_DID	0x1a	/* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE	0x1c	/* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE	0x20	/* System Page Size */
+#define PCI_SRIOV_BAR		0x24	/* VF BAR0 */
+#define  PCI_SRIOV_NUM_BARS	6	/* Number of VF BARs */
+#define PCI_SRIOV_VFM		0x3c	/* VF Migration State Array Offset*/
+#define  PCI_SRIOV_VFM_BIR(x)	((x) & 7)	/* State BIR */
+#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)	/* State Offset */
+#define  PCI_SRIOV_VFM_UA	0x0	/* Inactive.Unavailable */
+#define  PCI_SRIOV_VFM_MI	0x1	/* Dormant.MigrateIn */
+#define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
+#define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
+
 #endif /* LINUX_PCI_REGS_H */

[-- Attachment #4: dom0-2.patch --]
[-- Type: text/plain, Size: 2389 bytes --]

# HG changeset patch
# User Yu Zhao <yu.zhao@intel.com>
# Date 1237268742 14400
# Node ID 2629935bf356bb7118f8691a46e90daed77c3b48
# Parent  92730fa710446b2502809faa72bb29fda95ba878
PCI: restore saved SR-IOV state

Restore the volatile registers in the SR-IOV capability after the
D3->D0 transition.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>

diff -r 92730fa71044 -r 2629935bf356 drivers/pci/iov.c
--- a/drivers/pci/iov.c	Tue Mar 17 01:43:05 2009 -0400
+++ b/drivers/pci/iov.c	Tue Mar 17 01:45:42 2009 -0400
@@ -125,6 +125,25 @@
 	dev->sriov = NULL;
 }
 
+static void sriov_restore_state(struct pci_dev *dev)
+{
+	int i;
+	u16 ctrl;
+	struct pci_sriov *iov = dev->sriov;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE)
+		return;
+
+	for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
+		pci_update_resource(dev, i);
+
+	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
+		msleep(100);
+}
+
 /**
  * pci_iov_init - initialize the IOV capability
  * @dev: the PCI device
@@ -173,3 +192,13 @@
 	return dev->sriov->pos + PCI_SRIOV_BAR +
 		4 * (resno - PCI_IOV_RESOURCES);
 }
+
+/**
+ * pci_restore_iov_state - restore the state of the IOV capability
+ * @dev: the PCI device
+ */
+void pci_restore_iov_state(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_restore_state(dev);
+}
diff -r 92730fa71044 -r 2629935bf356 drivers/pci/pci.c
--- a/drivers/pci/pci.c	Tue Mar 17 01:43:05 2009 -0400
+++ b/drivers/pci/pci.c	Tue Mar 17 01:45:42 2009 -0400
@@ -562,6 +562,8 @@
 	pci_restore_pcix_state(dev);
 	pci_restore_msi_state(dev);
 	pci_restore_msix_state(dev);
+	pci_restore_iov_state(dev);
+
 	return 0;
 }
 
diff -r 92730fa71044 -r 2629935bf356 drivers/pci/pci.h
--- a/drivers/pci/pci.h	Tue Mar 17 01:43:05 2009 -0400
+++ b/drivers/pci/pci.h	Tue Mar 17 01:45:42 2009 -0400
@@ -154,6 +154,7 @@
 extern void pci_iov_release(struct pci_dev *dev);
 extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
 				enum pci_bar_type *type);
+extern void pci_restore_iov_state(struct pci_dev *dev);
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -168,4 +169,7 @@
 {
 	return 0;
 }
+static inline void pci_restore_iov_state(struct pci_dev *dev)
+{
+}
 #endif /* CONFIG_PCI_IOV */

[-- Attachment #5: dom0-3.patch --]
[-- Type: text/plain, Size: 2677 bytes --]

# HG changeset patch
# User Yu Zhao <yu.zhao@intel.com>
# Date 1237268873 14400
# Node ID 6b776c705e444562dda66dc0b33fd80eaceb1bfb
# Parent  2629935bf356bb7118f8691a46e90daed77c3b48
PCI: reserve bus range for SR-IOV device

Reserve the bus number range used by the Virtual Function when
pcibios_assign_all_busses() returns true.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>

diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/iov.c
--- a/drivers/pci/iov.c	Tue Mar 17 01:45:42 2009 -0400
+++ b/drivers/pci/iov.c	Tue Mar 17 01:47:53 2009 -0400
@@ -13,6 +13,18 @@
 #include <linux/delay.h>
 #include "pci.h"
 
+
+static inline u8 virtfn_bus(struct pci_dev *dev, int id)
+{
+	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
+				    dev->sriov->stride * id) >> 8);
+}
+
+static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
+{
+	return (dev->devfn + dev->sriov->offset +
+		dev->sriov->stride * id) & 0xff;
+}
 
 static int sriov_init(struct pci_dev *dev, int pos)
 {
@@ -202,3 +214,27 @@
 	if (dev->is_physfn)
 		sriov_restore_state(dev);
 }
+
+/**
+ * pci_iov_bus_range - find bus range used by Virtual Function
+ * @bus: the PCI bus
+ *
+ * Returns max number of buses (exclude current one) used by Virtual
+ * Functions.
+ */
+int pci_iov_bus_range(struct pci_bus *bus)
+{
+	int max = 0;
+	u8 busnr;
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (!dev->is_physfn)
+			continue;
+		busnr = virtfn_bus(dev, dev->sriov->total - 1);
+		if (busnr > max)
+			max = busnr;
+	}
+
+	return max ? max - bus->number : 0;
+}
diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/pci.h
--- a/drivers/pci/pci.h	Tue Mar 17 01:45:42 2009 -0400
+++ b/drivers/pci/pci.h	Tue Mar 17 01:47:53 2009 -0400
@@ -155,6 +155,7 @@
 extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
 				enum pci_bar_type *type);
 extern void pci_restore_iov_state(struct pci_dev *dev);
+extern int pci_iov_bus_range(struct pci_bus *bus);
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -172,4 +173,8 @@
 static inline void pci_restore_iov_state(struct pci_dev *dev)
 {
 }
+static inline int pci_iov_bus_range(struct pci_bus *bus)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI_IOV */
diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/probe.c
--- a/drivers/pci/probe.c	Tue Mar 17 01:45:42 2009 -0400
+++ b/drivers/pci/probe.c	Tue Mar 17 01:47:53 2009 -0400
@@ -976,6 +976,9 @@
 	for (devfn = 0; devfn < 0x100; devfn += 8)
 		pci_scan_slot(bus, devfn);
 
+	/* Reserve buses for SR-IOV capability. */
+	max += pci_iov_bus_range(bus);
+
 	/*
 	 * After performing arch-dependent fixup of the bus, look behind
 	 * all PCI-to-PCI bridges on this bus.

[-- Attachment #6: dom0-4.patch --]
[-- Type: text/plain, Size: 3758 bytes --]

# HG changeset patch
# User Yu Zhao <yu.zhao@intel.com>
# Date 1237270918 14400
# Node ID 3a2d0f486f533f0ef21267b9a1682997e0caf463
# Parent  6b776c705e444562dda66dc0b33fd80eaceb1bfb
PCI: centralize device setup code

Move the device setup stuff into pci_setup_device() which will be used
to setup the Virtual Function later.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>

diff -r 6b776c705e44 -r 3a2d0f486f53 drivers/pci/pci.h
--- a/drivers/pci/pci.h	Tue Mar 17 01:47:53 2009 -0400
+++ b/drivers/pci/pci.h	Tue Mar 17 02:21:58 2009 -0400
@@ -117,6 +117,7 @@
 	pci_bar_mem64,		/* A 64-bit memory BAR */
 };
 
+extern int pci_setup_device(struct pci_dev *dev);
 extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 				struct resource *res, unsigned int reg);
 extern int pci_resource_bar(struct pci_dev *dev, int resno,
diff -r 6b776c705e44 -r 3a2d0f486f53 drivers/pci/probe.c
--- a/drivers/pci/probe.c	Tue Mar 17 01:47:53 2009 -0400
+++ b/drivers/pci/probe.c	Tue Mar 17 02:21:58 2009 -0400
@@ -683,13 +683,28 @@
  * Initialize the device structure with information about the device's 
  * vendor,class,memory and IO-space addresses,IRQ lines etc.
  * Called at initialisation of the PCI subsystem and by CardBus services.
- * Returns 0 on success and -1 if unknown type of device (not normal, bridge
- * or CardBus).
+ * Returns 0 on success and negative if unknown type of device (not normal,
+ * bridge or CardBus).
  */
-static int pci_setup_device(struct pci_dev * dev)
+int pci_setup_device(struct pci_dev *dev)
 {
 	u32 class;
+	u8 hdr_type;
 
+	if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
+		return -EIO;
+
+	dev->sysdata = dev->bus->sysdata;
+	dev->dev.parent = dev->bus->bridge;
+	dev->dev.bus = &pci_bus_type;
+	dev->hdr_type = hdr_type & 0x7f;
+	dev->multifunction = !!(hdr_type & 0x80);
+	dev->cfg_size = pci_cfg_space_size(dev);
+	dev->error_state = pci_channel_io_normal;
+
+	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+	   set this higher, assuming the system even supports it.  */
+	dev->dma_mask = 0xffffffff;
 	sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
 
@@ -706,7 +721,6 @@
 
 	/* Early fixups, before probing the BARs */
 	pci_fixup_device(pci_fixup_early, dev);
-	class = dev->class >> 8;
 
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
@@ -741,7 +755,7 @@
 	default:				    /* unknown header */
 		printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n",
 			pci_name(dev), dev->hdr_type);
-		return -1;
+		return -EIO;
 
 	bad:
 		printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n",
@@ -823,7 +837,6 @@
 {
 	struct pci_dev *dev;
 	u32 l;
-	u8 hdr_type;
 	int delay = 1;
 
 	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
@@ -850,29 +863,16 @@
 		}
 	}
 
-	if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type))
-		return NULL;
-
 	dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
 	if (!dev)
 		return NULL;
 
 	dev->bus = bus;
-	dev->sysdata = bus->sysdata;
-	dev->dev.parent = bus->bridge;
-	dev->dev.bus = &pci_bus_type;
 	dev->devfn = devfn;
-	dev->hdr_type = hdr_type & 0x7f;
-	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
-	dev->cfg_size = pci_cfg_space_size(dev);
-	dev->error_state = pci_channel_io_normal;
 
-	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
-	   set this higher, assuming the system even supports it.  */
-	dev->dma_mask = 0xffffffff;
-	if (pci_setup_device(dev) < 0) {
+	if (pci_setup_device(dev)) {
 		kfree(dev);
 		return NULL;
 	}

[-- Attachment #7: dom0-5.patch --]
[-- Type: text/plain, Size: 10256 bytes --]

# HG changeset patch
# User Yu Zhao <yu.zhao@intel.com>
# Date 1237270982 14400
# Node ID 577169901110eb89ff36f1460e152a5c96297bde
# Parent  3a2d0f486f533f0ef21267b9a1682997e0caf463
PCI: add SR-IOV API for Physical Function driver

Add or remove the Virtual Function when the SR-IOV is enabled or
disabled by the device driver. This can happen anytime rather than
only at the device probe stage.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>

diff -r 3a2d0f486f53 -r 577169901110 drivers/pci/iov.c
--- a/drivers/pci/iov.c	Tue Mar 17 02:21:58 2009 -0400
+++ b/drivers/pci/iov.c	Tue Mar 17 02:23:02 2009 -0400
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include "pci.h"
 
+#define VIRTFN_ID_LEN	16
 
 static inline u8 virtfn_bus(struct pci_dev *dev, int id)
 {
@@ -24,6 +25,267 @@
 {
 	return (dev->devfn + dev->sriov->offset +
 		dev->sriov->stride * id) & 0xff;
+}
+
+static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
+{
+	struct pci_bus *child;
+
+	if (bus->number == busnr)
+		return bus;
+
+	child = pci_find_bus(pci_domain_nr(bus), busnr);
+	if (child)
+		return child;
+
+	child = pci_add_new_bus(bus, NULL, busnr);
+	if (!child)
+		return NULL;
+
+	child->subordinate = busnr;
+
+	return child;
+}
+
+static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
+{
+	struct pci_bus *child;
+
+	if (bus->number == busnr)
+		return;
+
+	child = pci_find_bus(pci_domain_nr(bus), busnr);
+	BUG_ON(!child);
+
+	if (list_empty(&child->devices))
+		pci_remove_bus(child);
+}
+
+static int virtfn_add(struct pci_dev *dev, int id)
+{
+	int i;
+	int rc;
+	u64 size;
+	char buf[VIRTFN_ID_LEN];
+	struct pci_dev *virtfn;
+	struct resource *res;
+	struct pci_sriov *iov = dev->sriov;
+
+	virtfn = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
+	if (!virtfn)
+		return -ENOMEM;
+
+	mutex_lock(&iov->dev->sriov->lock);
+	virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
+	if (!virtfn->bus) {
+		kfree(virtfn);
+		mutex_unlock(&iov->dev->sriov->lock);
+		return -ENOMEM;
+	}
+	virtfn->devfn = virtfn_devfn(dev, id);
+	virtfn->vendor = dev->vendor;
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
+	pci_setup_device(virtfn);
+	virtfn->dev.parent = dev->dev.parent;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		if (!res->parent)
+			continue;
+		virtfn->resource[i].name = pci_name(virtfn);
+		virtfn->resource[i].flags = res->flags;
+		size = res->end - res->start + 1;
+		do_div(size, iov->total);
+		virtfn->resource[i].start = res->start + size * id;
+		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
+		rc = request_resource(res, &virtfn->resource[i]);
+		BUG_ON(rc);
+	}
+
+	pci_device_add(virtfn, virtfn->bus);
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	virtfn->physfn = pci_dev_get(dev);
+	virtfn->is_virtfn = 1;
+
+	pci_bus_add_device(virtfn);
+	sprintf(buf, "virtfn%u", id);
+	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+	if (rc)
+		goto failed1;
+	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
+	if (rc)
+		goto failed2;
+
+	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+
+	return 0;
+
+failed2:
+	sysfs_remove_link(&dev->dev.kobj, buf);
+failed1:
+	pci_dev_put(dev);
+	mutex_lock(&iov->dev->sriov->lock);
+	pci_remove_bus_device(virtfn);
+	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	return rc;
+}
+
+static void virtfn_remove(struct pci_dev *dev, int id)
+{
+	char buf[VIRTFN_ID_LEN];
+	struct pci_bus *bus;
+	struct pci_dev *virtfn;
+	struct pci_sriov *iov = dev->sriov;
+
+	bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
+	if (!bus)
+		return;
+
+	virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
+	if (!virtfn)
+		return;
+
+	pci_dev_put(virtfn);
+
+	sprintf(buf, "virtfn%u", id);
+	sysfs_remove_link(&dev->dev.kobj, buf);
+	sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+
+	mutex_lock(&iov->dev->sriov->lock);
+	pci_remove_bus_device(virtfn);
+	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	pci_dev_put(dev);
+}
+
+static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
+{
+	int rc;
+	int i, j;
+	int nres;
+	u16 offset, stride, initial;
+	struct resource *res;
+	struct pci_dev *pdev;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!nr_virtfn)
+		return 0;
+
+	if (iov->nr_virtfn)
+		return -EINVAL;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
+	if (initial > iov->total ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
+		return -EIO;
+
+	if (nr_virtfn < 0 || nr_virtfn > iov->total ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
+		return -EINVAL;
+
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
+	if (!offset || (nr_virtfn > 1 && !stride))
+		return -EIO;
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		if (res->parent)
+			nres++;
+	}
+	if (nres != iov->nres) {
+		dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
+		return -ENOMEM;
+	}
+
+	iov->offset = offset;
+	iov->stride = stride;
+
+	if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
+		dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
+		return -ENOMEM;
+	}
+
+	if (iov->link != dev->devfn) {
+		pdev = pci_get_slot(dev->bus, iov->link);
+		if (!pdev)
+			return -ENODEV;
+
+		pci_dev_put(pdev);
+
+		if (!pdev->is_physfn)
+			return -ENODEV;
+
+		rc = sysfs_create_link(&dev->dev.kobj,
+					&pdev->dev.kobj, "dep_link");
+		if (rc)
+			return rc;
+	}
+
+	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	msleep(100);
+	pci_unblock_user_cfg_access(dev);
+
+	iov->initial = initial;
+	if (nr_virtfn < initial)
+		initial = nr_virtfn;
+
+	for (i = 0; i < initial; i++) {
+		rc = virtfn_add(dev, i);
+		if (rc)
+			goto failed;
+	}
+
+	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
+	iov->nr_virtfn = nr_virtfn;
+
+	return 0;
+
+failed:
+	for (j = 0; j < i; j++)
+		virtfn_remove(dev, j);
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_unblock_user_cfg_access(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	return rc;
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+	int i;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!iov->nr_virtfn)
+		return;
+
+	for (i = 0; i < iov->nr_virtfn; i++)
+		virtfn_remove(dev, i);
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_unblock_user_cfg_access(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	iov->nr_virtfn = 0;
 }
 
 static int sriov_init(struct pci_dev *dev, int pos)
@@ -128,6 +390,8 @@
 
 static void sriov_release(struct pci_dev *dev)
 {
+	BUG_ON(dev->sriov->nr_virtfn);
+
 	if (dev == dev->sriov->dev)
 		mutex_destroy(&dev->sriov->lock);
 	else
@@ -151,6 +415,7 @@
 		pci_update_resource(dev, i);
 
 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
 		msleep(100);
@@ -238,3 +503,35 @@
 
 	return max ? max - bus->number : 0;
 }
+
+/**
+ * pci_enable_sriov - enable the SR-IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return -ENODEV;
+
+	return sriov_enable(dev, nr_virtfn);
+}
+EXPORT_SYMBOL_GPL(pci_enable_sriov);
+
+/**
+ * pci_disable_sriov - disable the SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_disable_sriov(struct pci_dev *dev)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return;
+
+	sriov_disable(dev);
+}
+EXPORT_SYMBOL_GPL(pci_disable_sriov);
diff -r 3a2d0f486f53 -r 577169901110 drivers/pci/pci.h
--- a/drivers/pci/pci.h	Tue Mar 17 02:21:58 2009 -0400
+++ b/drivers/pci/pci.h	Tue Mar 17 02:23:02 2009 -0400
@@ -141,6 +141,8 @@
 	u32 cap;		/* SR-IOV Capabilities */
 	u16 ctrl;		/* SR-IOV Control */
 	u16 total;		/* total VFs associated with the PF */
+	u16 initial;		/* initial VFs associated with the PF */
+	u16 nr_virtfn;		/* number of VFs available */
 	u16 offset;		/* first VF Routing ID offset */
 	u16 stride;		/* following VF stride */
 	u32 pgsz;		/* page size for BAR alignment */
diff -r 3a2d0f486f53 -r 577169901110 include/linux/pci.h
--- a/include/linux/pci.h	Tue Mar 17 02:21:58 2009 -0400
+++ b/include/linux/pci.h	Tue Mar 17 02:23:02 2009 -0400
@@ -199,6 +199,7 @@
 	unsigned int	msix_enabled:1;
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_physfn:1;
+	unsigned int	is_virtfn:1;
 
 	u32		saved_config_space[16]; /* config space saved at suspend time */
 	struct hlist_head saved_cap_space;
@@ -206,7 +207,10 @@
 	int rom_attr_enabled;		/* has display of the rom attribute been enabled? */
 	struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
 #ifdef CONFIG_PCI_IOV
-	struct pci_sriov *sriov;	/* SR-IOV capability related */
+	union {
+		struct pci_sriov *sriov;	/* SR-IOV capability related */
+		struct pci_dev *physfn;	/* the PF this VF is associated with */
+	};
 #endif
 };
 
@@ -829,5 +833,18 @@
 int pci_is_guestdev(struct pci_dev *dev);
 #endif /* CONFIG_PCI_GUESTDEV */
 
+#ifdef CONFIG_PCI_IOV
+extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+extern void pci_disable_sriov(struct pci_dev *dev);
+#else
+static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+	return -ENODEV;
+}
+static inline void pci_disable_sriov(struct pci_dev *dev)
+{
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */

[-- Attachment #8: dom0-6.patch --]
[-- Type: text/plain, Size: 2407 bytes --]

# HG changeset patch
# User Yu Zhao <yu.zhao@intel.com>
# Date 1237353055 14400
# Node ID 582ec8e86ffff64834e8c77ef6790774352ddc7a
# Parent  577169901110eb89ff36f1460e152a5c96297bde
PCI: pass ARI and SR-IOV device information to the hypervisor

PCIe Alternative Routing-ID Interpretation (ARI) ECN defines the Extended
Function -- a function whose function number is greater than 7 within an
ARI Device. Intel VT-d spec 1.2 section 8.3.2 specifies that the Extended
Function is under the scope of the same remapping unit as the traditional
function. The hypervisor needs to know if a function is Extended Function
so it can find proper DMAR for it.

And section 8.3.3 specifies that the SR-IOV Virtual Function is under the
scope of the same remapping unit as the Physical Function. The hypervisor
also needs to know if a function is the Virtual Function and which Physical
Function it's associated with for same reason.

diff -r 577169901110 -r 582ec8e86fff drivers/xen/core/pci.c
--- a/drivers/xen/core/pci.c	Tue Mar 17 02:23:02 2009 -0400
+++ b/drivers/xen/core/pci.c	Wed Mar 18 01:10:55 2009 -0400
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <xen/interface/physdev.h>
+#include "../../pci/pci.h"
 
 static int (*pci_bus_probe)(struct device *dev);
 static int (*pci_bus_remove)(struct device *dev);
@@ -15,8 +16,16 @@
 	int r;
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct physdev_manage_pci manage_pci;
+
+	memset(&manage_pci, 0, sizeof(manage_pci));
 	manage_pci.bus = pci_dev->bus->number;
 	manage_pci.devfn = pci_dev->devfn;
+	if (pci_dev->is_virtfn) {
+		manage_pci.is_virtfn = 1;
+		manage_pci.physfn.bus = pci_dev->physfn->bus->number;
+		manage_pci.physfn.devfn = pci_dev->physfn->devfn;
+	} else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+		manage_pci.is_extfn = 1;
 
 	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci);
 	if (r && r != -ENOSYS)
diff -r 577169901110 -r 582ec8e86fff include/xen/interface/physdev.h
--- a/include/xen/interface/physdev.h	Tue Mar 17 02:23:02 2009 -0400
+++ b/include/xen/interface/physdev.h	Wed Mar 18 01:10:55 2009 -0400
@@ -178,6 +178,12 @@
     /* IN */
     uint8_t bus;
     uint8_t devfn;
+    unsigned is_extfn:1;
+    unsigned is_virtfn:1;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
 }; 
 
 typedef struct physdev_manage_pci physdev_manage_pci_t;

[-- Attachment #9: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

  reply	other threads:[~2009-03-18  9:22 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-18  7:32 Xen 3.4 code freeze Keir Fraser
2009-03-18  9:22 ` Zhao, Yu [this message]
2009-03-18 15:35 ` Jan Beulich
2009-03-18 15:50   ` Keir Fraser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49C0BD5B.5000500@intel.com \
    --to=yu.zhao@intel.com \
    --cc=keir.fraser@eu.citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.