All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dom0 linux: Reassign memory resources to device for pci passthrough.
@ 2008-10-08 23:43 Yuji Shimada
  2008-10-09  1:04 ` Neo Jia
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Yuji Shimada @ 2008-10-08 23:43 UTC (permalink / raw)
  To: Keir Fraser, xen-devel

This patch adds the function that reassign page-aligned memory
resources, to dom0 linux. The function is useful when we assign I/O
device to HVM domain using pci passthrough.


When we assign a device to HVM domain using pci passthrough,
the device needs to be assigned page-aligned memory resources. If the
memory resource is not page-aligned, following error occurs.

    Error: pci: 0000:00:1d.7: non-page-aligned MMIO BAR found.

On many system, BIOS assigns memory resources to the device and
enables it. So my patch disables the device, and releases resources.
Then it assigns page-aligned memory resource to the device.

Note: we don't need to re-enable device, because guest software will
enable it.


To reassign resources, please add boot parameters of dom0 linux as follows.

    reassign_resources reassigndev=00:1d.7,01:00.0

	reassign_resources
			Enables reassigning resources.

	reassigndev=	Specifies devices include I/O device and PCI-PCI
			bridge to reassign resources.  PCI-PCI bridge
			can be specified, if resource windows need to
			be expanded.

You can easily improve the way of specifying device to reassign,
changing the code of reassigndev.c.


There is a similar function enabled by pci-mem-align boot
parameter. Currently it is kept. But if many people agree with me, I'd
like to remove it from dom0 linux, because there are two problems.

    - pci-mem-align reassigns all devices' memory resources if they are
      not page-aligned. This is not safe, because some devices are
      used by firmware.
    - pci-mem-align can't expand resource window of PCI-PCI bridge.

Thanks,
--
Yuji Shimada


Signed-off-by: Yuji Shimada <shimada-yxb@necst.nec.co.jp>

diff -r b54652ee29ef drivers/pci/Makefile
--- a/drivers/pci/Makefile	Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/Makefile	Wed Oct 08 12:12:27 2008 +0900
@@ -3,7 +3,8 @@
 #
 
 obj-y		+= access.o bus.o probe.o remove.o pci.o quirks.o \
-			pci-driver.o search.o pci-sysfs.o rom.o setup-res.o
+			pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \
+			reassigndev.o
 obj-$(CONFIG_PROC_FS) += proc.o
 
 # Build PCI Express stuff if needed
diff -r b54652ee29ef drivers/pci/pci.h
--- a/drivers/pci/pci.h	Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/pci.h	Wed Oct 08 12:12:27 2008 +0900
@@ -99,3 +99,8 @@
 	return NULL;
 }
 
+#define ROUND_UP_TO_PAGESIZE(size) ((size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
+
+extern int reassign_resources;
+extern int is_reassigndev(struct pci_dev *dev);
+extern void pci_update_bridge(struct pci_dev *dev, int resno);
diff -r b54652ee29ef drivers/pci/quirks.c
--- a/drivers/pci/quirks.c	Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/quirks.c	Wed Oct 08 12:12:27 2008 +0900
@@ -33,6 +33,19 @@
 }
 __setup("pci-mem-align", set_pci_mem_align);
 
+
+int reassign_resources = 0;
+
+static int __init set_reassign_resources(char *str)
+{
+	/* resources reassign on */
+	reassign_resources = 1;
+	printk(KERN_DEBUG "PCI: resource reassign ON.\n");
+
+	return 1;
+}
+__setup("reassign_resources", set_reassign_resources);
+
 /* This quirk function enables us to force all memory resources which are 
  * assigned to PCI devices, to be page-aligned.
  */
@@ -41,6 +54,42 @@
 	int i;
 	struct resource *r;
 	resource_size_t old_start;
+
+	if (reassign_resources) {
+		if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+		    (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+			/* PCI Host Bridge isn't a target device */
+			return;
+		}
+		if (is_reassigndev(dev)) {
+			printk(KERN_INFO 
+				"PCI: Disable device and release resources"
+				" [%s].\n", pci_name(dev));
+			pci_disable_device(dev);
+
+			for (i=0; i < PCI_NUM_RESOURCES; i++) {
+				r = &dev->resource[i];
+				if ((r == NULL) || 
+				   !(r->flags & IORESOURCE_MEM))
+					continue;
+
+				r->end = r->end - r->start;
+				r->start = 0;
+
+				if (i < PCI_BRIDGE_RESOURCES) {
+					pci_update_resource(dev, r, i);
+				} else if (i == 8 || i == 9) {
+					/* need to update(clear) the Base/Limit
+					 * register also, because PCI bridge is
+					 * disabled and the resource is 
+					 * released.
+					 */
+					pci_update_bridge(dev, i);
+				}
+			}
+		}
+		return;
+	}
 
 	if (!pci_mem_align)
 		return;
diff -r b54652ee29ef drivers/pci/reassigndev.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/pci/reassigndev.c	Wed Oct 08 12:12:27 2008 +0900
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2008, NEC Corporation.
+ *      Taro Nichiden <t-nichiden at ab jp nec com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include "pci.h"
+
+
+#define	REASSIGNDEV_PARAM_MAX	(2048)
+#define	TOKEN_MAX	(12)	/* "SSSS:BB:DD.F" length is 12 */
+
+static char param_reassigndev[REASSIGNDEV_PARAM_MAX] = {0};
+
+static int __init reassigndev_setup(char *str)
+{
+	strncpy(param_reassigndev, str, REASSIGNDEV_PARAM_MAX);
+	param_reassigndev[REASSIGNDEV_PARAM_MAX - 1] = '\0';
+	return 1;
+}
+__setup("reassigndev=", reassigndev_setup);
+
+int is_reassigndev(struct pci_dev *dev)
+{
+	char dev_str[TOKEN_MAX+1];
+	int seg, bus, slot, func;
+	int len;
+	char *p, *next_str;
+
+	p = param_reassigndev;
+	for (; p; p = next_str + 1) {
+		next_str = strpbrk(p, ",");
+		if (next_str) {
+			len = next_str - p;
+		} else {
+			len = strlen(p);
+		}
+		if (len > 0 && len <= TOKEN_MAX) {
+			strncpy(dev_str, p, len);
+			*(dev_str + len) = '\0';
+
+			if (sscanf(dev_str, "%x:%x:%x.%x", 
+				&seg, &bus, &slot, &func) != 4) {
+				if (sscanf(dev_str, "%x:%x.%x", 
+					&bus, &slot, &func) == 3) {
+					seg = 0;
+				} else {
+					/* failed to scan strings */
+					seg = -1;
+					bus = -1;
+				}
+			}
+			if (seg == pci_domain_nr(dev->bus) &&
+			    bus == dev->bus->number &&
+			    slot == PCI_SLOT(dev->devfn) &&
+			    func == PCI_FUNC(dev->devfn)) {
+				/* It's a target device */
+				return 1;
+			}
+		}
+		if (!next_str)
+			break;
+	}
+
+	return 0;
+}
diff -r b54652ee29ef drivers/pci/setup-bus.c
--- a/drivers/pci/setup-bus.c	Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/setup-bus.c	Wed Oct 08 12:12:27 2008 +0900
@@ -26,6 +26,7 @@
 #include <linux/cache.h>
 #include <linux/slab.h>
 
+#include "pci.h"
 
 #define DEBUG_CONFIG 1
 #if DEBUG_CONFIG
@@ -344,7 +345,8 @@
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		int i;
-		
+		int reassign = reassign_resources ? is_reassigndev(dev) : 0;
+
 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 			struct resource *r = &dev->resource[i];
 			unsigned long r_size;
@@ -352,6 +354,11 @@
 			if (r->parent || (r->flags & mask) != type)
 				continue;
 			r_size = r->end - r->start + 1;
+
+			if (reassign) {
+				r_size = ROUND_UP_TO_PAGESIZE(r_size);
+			}
+
 			/* For bridges size != alignment */
 			align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
 			order = __ffs(align) - 20;
diff -r b54652ee29ef drivers/pci/setup-res.c
--- a/drivers/pci/setup-res.c	Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/setup-res.c	Wed Oct 08 12:12:27 2008 +0900
@@ -117,19 +117,96 @@
 }
 EXPORT_SYMBOL_GPL(pci_claim_resource);
 
+void 
+pci_update_bridge(struct pci_dev *dev, int resno)
+{
+	struct resource *res = &dev->resource[resno]; 
+	struct pci_bus_region region;
+	u32 l, dw, base_up32, limit_up32;
+
+	if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE ||
+	    (dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) {
+		return;
+	}
+
+	if (!res->flags)
+		return;
+
+	switch (resno) {
+	case 8 :	/* MMIO Base/Limit */
+		pcibios_resource_to_bus(dev, &region, res);
+		if (res->flags & IORESOURCE_MEM &&
+		    !(res->flags & IORESOURCE_PREFETCH)) {
+			l = (region.start >> 16) & 0xfff0;
+			l |= region.end & 0xfff00000;
+		} else {
+			l = 0x0000fff0;
+		}
+		pci_write_config_dword(dev, PCI_MEMORY_BASE, l);
+
+		break;
+
+	case 9 :	/* Prefetchable MMIO Base/Limit */
+		/* Clear out the upper 32 bits of PREF limit.
+		 * If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
+		 * disables PREF range, which is ok.
+		 */
+		pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+
+		/* Get PREF 32/64 bits Addressing mode */
+		pci_read_config_dword(dev, PCI_PREF_MEMORY_BASE, &dw);
+
+		pcibios_resource_to_bus(dev, &region, res);
+		if (res->flags & IORESOURCE_MEM &&
+		    res->flags & IORESOURCE_PREFETCH) {
+			l = (region.start >> 16) & 0xfff0;
+			l |= region.end & 0xfff00000;
+
+			if (dw & PCI_PREF_RANGE_TYPE_64) {
+				base_up32 = (region.start >> 32) & 0xffffffff;
+				limit_up32 = (region.end >> 32) & 0xffffffff;
+			} else {
+				base_up32 = 0;
+				limit_up32 = 0;
+			}
+		} else {
+			l = 0x0000fff0;
+			base_up32 = 0xffffffff;
+			limit_up32 = 0;
+		}
+		pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, l);
+		/* Set up the upper 32 bits of PREF base/limit. */
+		pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, base_up32);
+		pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, limit_up32);
+		break;
+	default :
+		BUG();
+		break;
+	}
+}
+
 int pci_assign_resource(struct pci_dev *dev, int resno)
 {
 	struct pci_bus *bus = dev->bus;
 	struct resource *res = dev->resource + resno;
 	resource_size_t size, min, align;
 	int ret;
+	int reassigndev = reassign_resources ? is_reassigndev(dev) : 0;
 
 	size = res->end - res->start + 1;
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 	/* The bridge resources are special, as their
 	   size != alignment. Sizing routines return
 	   required alignment in the "start" field. */
-	align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start;
+	if (resno < PCI_BRIDGE_RESOURCES) {
+		align = size;
+		if ((reassigndev) &&
+		    (res->flags & IORESOURCE_MEM)) {
+			align = ROUND_UP_TO_PAGESIZE(align);
+		}
+	} else {
+		align = res->start;
+	}
 
 	/* First, try exact prefetching match.. */
 	ret = pci_bus_alloc_resource(bus, res, size, align, min,
@@ -154,6 +231,9 @@
 			resno, (unsigned long long)size,
 			(unsigned long long)res->start, pci_name(dev));
 	} else if (resno < PCI_BRIDGE_RESOURCES) {
+		printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
+			"%016llx - %016llx\n", resno, pci_name(dev),
+			(u64)res->start, (u64)res->end);
 		pci_update_resource(dev, res, resno);
 	}

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-10-10 10:13 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-08 23:43 [PATCH] dom0 linux: Reassign memory resources to device for pci passthrough Yuji Shimada
2008-10-09  1:04 ` Neo Jia
2008-10-09  8:04   ` Yuji Shimada
2008-10-09  3:21 ` Zhao, Yu
2008-10-09  8:54   ` Yuji Shimada
2008-10-09 11:34     ` Zhao, Yu
2008-10-09  7:20 ` Yuji Shimada
2008-10-10  7:05 ` Zhao, Yu
2008-10-10 10:13   ` Yuji Shimada

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.