All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nishanth Aravamudan <nacc@us.ibm.com>
To: sonnyrao@us.ibm.com, miltonm@bga.com,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	Grant Likely <grant.likely@secretlab.ca>,
	Anton Blanchard <anton@samba.org>,
	linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 7/7 v3] ppc: add dynamic dma window support
Date: Fri, 10 Dec 2010 16:07:44 -0800	[thread overview]
Message-ID: <20101211000744.GA12355@us.ibm.com> (raw)
In-Reply-To: <20101209190920.GB25003@us.ibm.com>

On 09.12.2010 [11:09:20 -0800], Nishanth Aravamudan wrote:
> On 26.10.2010 [20:35:17 -0700], Nishanth Aravamudan wrote:
> > If firmware allows us to map all of a partition's memory for DMA on a
> > particular bridge, create a 1:1 mapping of that memory. Add hooks for
> > dealing with hotplug events. Dyanmic DMA windows can use larger than the
> > default page size, and we use the largest one possible.
> > 
> > Not-yet-signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
> > 
> > ---
> > 
> > I've tested this briefly on a machine with suitable firmware/hardware.
> > Things seem to work well, but I want to do more exhaustive I/O testing
> > before asking for upstream merging. I would really appreciate any
> > feedback on the updated approach.
> > 
> > Specific questions:
> > 
> > Ben, did I hook into the dma_set_mask() platform callback as you
> > expected? Anything I can do better or which perhaps might lead to
> > gotchas later?
> > 
> > I've added a disable_ddw option, but perhaps it would be better to
> > just disable the feature if iommu=force?
> 
> So for the final version, I probably should document this option in
> kernel-parameters.txt w/ the patch, right?

Here's an updated version. Ben, think you can pick this up to your tree?

Thanks,
Nish

ppc: add dynamic dma window support
    
If firmware allows us to map all of a partition's memory for DMA on a
particular bridge, create a 1:1 mapping of that memory. Add hooks for
dealing with hotplug events. Dyanmic DMA windows can use larger than the
default page size, and we use the largest one possible.
    
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

---

I've tested this fairly heavily on a machine with suitable
firmware/hardware, including dlpar operations.

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cdd2a6e..e9ac890 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -618,6 +618,10 @@ and is between 256 and 4096 characters. It is defined in the file
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
+	disable_ddw     [PPC]
+			Disable Dynamic DMA Window support. Use this if
+			to workaround buggy firmware.
+
 	disable_ipv6=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 45c6865..4ba2338 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -33,6 +33,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/crash_dump.h>
+#include <linux/memory.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -45,6 +46,7 @@
 #include <asm/tce.h>
 #include <asm/ppc-pci.h>
 #include <asm/udbg.h>
+#include <asm/mmzone.h>
 
 #include "plpar_wrappers.h"
 
@@ -270,6 +272,137 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 	return tce_ret;
 }
 
+/* this is compatable with cells for the device tree property */
+struct dynamic_dma_window_prop {
+	__be32	liobn;		/* tce table number */
+	__be64	dma_base;	/* address hi,lo */
+	__be32	tce_shift;	/* ilog2(tce_page_size) */
+	__be32	window_shift;	/* ilog2(tce_window_size) */
+};
+
+struct direct_window {
+	struct device_node *device;
+	const struct dynamic_dma_window_prop *prop;
+	struct list_head list;
+};
+static LIST_HEAD(direct_window_list);
+/* prevents races between memory on/offline and window creation */
+static DEFINE_SPINLOCK(direct_window_list_lock);
+/* protects initializing window twice for same device */
+static DEFINE_MUTEX(direct_window_init_mutex);
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+
+static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
+					unsigned long num_pfn, const void *arg)
+{
+	const struct dynamic_dma_window_prop *maprange = arg;
+	int rc;
+	u64 tce_size, num_tce, dma_offset, next;
+	u32 tce_shift;
+	long limit;
+
+	tce_shift = be32_to_cpu(maprange->tce_shift);
+	tce_size = 1ULL << tce_shift;
+	next = start_pfn << PAGE_SHIFT;
+	num_tce = num_pfn << PAGE_SHIFT;
+
+	/* round back to the beginning of the tce page size */
+	num_tce += next & (tce_size - 1);
+	next &= ~(tce_size - 1);
+
+	/* covert to number of tces */
+	num_tce |= tce_size - 1;
+	num_tce >>= tce_shift;
+
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, num_tce, 512);
+		dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+		rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
+					    (u64)dma_offset,
+					     0, limit);
+		num_tce -= limit;
+	} while (num_tce > 0 && !rc);
+
+	return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+					unsigned long num_pfn, const void *arg)
+{
+	const struct dynamic_dma_window_prop *maprange = arg;
+	u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+	u32 tce_shift;
+	u64 rc = 0;
+	long l, limit;
+
+	local_irq_disable();	/* to protect tcep and the page behind it */
+	tcep = __get_cpu_var(tce_page);
+
+	if (!tcep) {
+		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
+		if (!tcep) {
+			local_irq_enable();
+			return -ENOMEM;
+		}
+		__get_cpu_var(tce_page) = tcep;
+	}
+
+	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+
+	liobn = (u64)be32_to_cpu(maprange->liobn);
+	tce_shift = be32_to_cpu(maprange->tce_shift);
+	tce_size = 1ULL << tce_shift;
+	next = start_pfn << PAGE_SHIFT;
+	num_tce = num_pfn << PAGE_SHIFT;
+
+	/* round back to the beginning of the tce page size */
+	num_tce += next & (tce_size - 1);
+	next &= ~(tce_size - 1);
+
+	/* covert to number of tces */
+	num_tce |= tce_size - 1;
+	num_tce >>= tce_shift;
+
+	/* We can map max one pageful of TCEs at a time */
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
+		dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+		for (l = 0; l < limit; l++) {
+			tcep[l] = proto_tce | next;
+			next += tce_size;
+		}
+
+		rc = plpar_tce_put_indirect(liobn,
+					    (u64)dma_offset,
+					    (u64)virt_to_abs(tcep),
+					    limit);
+
+		num_tce -= limit;
+	} while (num_tce > 0 && !rc);
+
+	/* error cleanup: caller will clear whole range */
+
+	local_irq_enable();
+	return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
+		unsigned long num_pfn, void *arg)
+{
+	return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
+}
+
+
 #ifdef CONFIG_PCI
 static void iommu_table_setparms(struct pci_controller *phb,
 				 struct device_node *dn,
@@ -449,8 +582,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 	if (!ppci->iommu_table) {
 		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
 				   ppci->phb->node);
-		iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window,
-			bus->number);
+		iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
 		ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
 		pr_debug("  created table: %p\n", ppci->iommu_table);
 	}
@@ -496,6 +628,328 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 		       pci_name(dev));
 }
 
+static int __read_mostly disable_ddw;
+
+static int __init disable_ddw_setup(char *str)
+{
+	disable_ddw = 1;
+	printk(KERN_INFO "ppc iommu: disabling ddw.\n");
+
+	return 0;
+}
+
+early_param("disable_ddw", disable_ddw_setup);
+
+static void remove_ddw(struct device_node *np)
+{
+	struct dynamic_dma_window_prop *dwp;
+	struct property *win64;
+	const u32 *ddr_avail;
+	u64 liobn;
+	int len, ret;
+
+	ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
+	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
+	if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
+		return;
+
+	dwp = win64->value;
+	liobn = (u64)be32_to_cpu(dwp->liobn);
+
+	/* clear the whole window, note the arg is in kernel pages */
+	ret = tce_clearrange_multi_pSeriesLP(0,
+		1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
+	if (ret)
+		pr_warning("%s failed to clear tces in window.\n",
+			 np->full_name);
+	else
+		pr_warning("%s successfully cleared tces in window.\n",
+			 np->full_name);
+
+	ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
+	if (ret)
+		pr_warning("%s: failed to remove direct window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np->full_name, ret, ddr_avail[2], liobn);
+	else
+		pr_warning("%s: successfully removed direct window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np->full_name, ret, ddr_avail[2], liobn);
+}
+
+
+static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+{
+	struct device_node *dn;
+	struct pci_dn *pcidn;
+	struct direct_window *window;
+	const struct dynamic_dma_window_prop *direct64;
+	u64 dma_addr;
+
+	dn = pci_device_to_OF_node(dev);
+	pcidn = PCI_DN(dn);
+	spin_lock(&direct_window_list_lock);
+	/* check if we already created a window and dupe that config if so */
+	list_for_each_entry(window, &direct_window_list, list) {
+		if (window->device == pdn) {
+			direct64 = window->prop;
+			dma_addr = direct64->dma_base;
+			break;
+		}
+	}
+	spin_unlock(&direct_window_list_lock);
+
+	return dma_addr;
+}
+
+static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
+{
+	struct device_node *dn;
+	struct pci_dn *pcidn;
+	int len;
+	struct direct_window *window;
+	const struct dynamic_dma_window_prop *direct64;
+	u64 dma_addr;
+
+	dn = pci_device_to_OF_node(dev);
+	pcidn = PCI_DN(dn);
+	direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+	if (direct64) {
+		window = kzalloc(sizeof(*window), GFP_KERNEL);
+		if (!window) {
+			remove_ddw(pdn);
+		} else {
+			window->device = pdn;
+			window->prop = direct64;
+			spin_lock(&direct_window_list_lock);
+			list_add(&window->list, &direct_window_list);
+			spin_unlock(&direct_window_list_lock);
+			dma_addr = direct64->dma_base;
+		}
+	}
+
+	return dma_addr;
+}
+
+static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, u32 *query)
+{
+	struct device_node *dn;
+	struct pci_dn *pcidn;
+	u32 cfg_addr;
+	u64 buid;
+	int ret;
+
+	/*
+	 * Get the config address and phb build of the PE window.
+	 * Rely on eeh to retrieve this for us.
+	 * Retrieve them from the pci device, not the node with the
+	 * dma-window property
+	 */
+	dn = pci_device_to_OF_node(dev);
+	pcidn = PCI_DN(dn);
+	cfg_addr = pcidn->eeh_config_addr;
+	if (pcidn->eeh_pe_config_addr)
+		cfg_addr = pcidn->eeh_pe_config_addr;
+	buid = pcidn->phb->buid;
+	ret = rtas_call(ddr_avail[0], 3, 5, query,
+		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
+	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
+		" returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
+		BUID_LO(buid), ret);
+	return ret;
+}
+
+static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, u32 *create, int page_shift, int window_shift)
+{
+	struct device_node *dn;
+	struct pci_dn *pcidn;
+	u32 cfg_addr;
+	u64 buid;
+	int ret;
+
+	/*
+	 * Get the config address and phb build of the PE window.
+	 * Rely on eeh to retrieve this for us.
+	 * Retrieve them from the pci device, not the node with the
+	 * dma-window property
+	 */
+	dn = pci_device_to_OF_node(dev);
+	pcidn = PCI_DN(dn);
+	cfg_addr = pcidn->eeh_config_addr;
+	if (pcidn->eeh_pe_config_addr)
+		cfg_addr = pcidn->eeh_pe_config_addr;
+	buid = pcidn->phb->buid;
+
+	do {
+		/* extra outputs are LIOBN and dma-addr (hi, lo) */
+		ret = rtas_call(ddr_avail[1], 5, 4, &create[0], cfg_addr,
+				BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
+	} while(rtas_busy_delay(ret));
+	dev_info(&dev->dev,
+		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
+		"(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
+		 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
+		 window_shift, ret, create[0], create[1], create[2]);
+	
+	return ret;
+}
+
+/*
+ * If the PE supports dynamic dma windows, and there is space for a table
+ * that can map all pages in a linear offset, then setup such a table,
+ * and record the dma-offset in the struct device.
+ *
+ * dev: the pci device we are checking
+ * pdn: the parent pe node with the ibm,dma_window property
+ * Future: also check if we can remap the base window for our base page size
+ *
+ * returns the dma offset for use by dma_set_mask
+ */
+static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+{
+	int len, ret;
+	u32 query[4], create[3];
+	int page_shift;
+	u64 dma_addr, max_addr;
+	struct device_node *dn;
+	const u32 *uninitialized_var(ddr_avail);
+	struct direct_window *window;
+	struct property *uninitialized_var(win64);
+	struct dynamic_dma_window_prop *ddwprop;
+
+	mutex_lock(&direct_window_init_mutex);
+
+	dma_addr = dupe_ddw_if_already_created(dev, pdn);
+	if (dma_addr != 0)
+		goto out_unlock;
+
+	dma_addr = dupe_ddw_if_kexec(dev, pdn);
+	if (dma_addr != 0)
+		goto out_unlock;
+
+	/*
+	 * the ibm,ddw-applicable property holds the tokens for:
+	 * ibm,query-pe-dma-window
+	 * ibm,create-pe-dma-window
+	 * ibm,remove-pe-dma-window
+	 * for the given node in that order.
+	 * the property is actually in the parent, not the PE
+	 */
+	ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
+	if (!ddr_avail || len < 3 * sizeof(u32))
+		goto out_unlock;
+
+       /*
+	 * Query if there is a second window of size to map the
+	 * whole partition.  Query returns number of windows, largest
+	 * block assigned to PE (partition endpoint), and two bitmasks
+	 * of page sizes: supported and supported for migrate-dma.
+	 */
+	dn = pci_device_to_OF_node(dev);
+	ret = query_ddw(dev, ddr_avail, &query[0]);
+	if (ret != 0)
+		goto out_unlock;
+
+	if (!query[0]) {
+		/*
+		 * no additional windows are available for this device.
+		 * We might be able to reallocate the existing window,
+		 * trading in for a larger page size.
+		 */
+		dev_dbg(&dev->dev, "no free dynamic windows");
+		goto out_unlock;
+	}
+	if (query[2] & 4) {
+		page_shift = 24; /* 16MB */
+	} else if (query[2] & 2) {
+		page_shift = 16; /* 64kB */
+	} else if (query[2] & 1) {
+		page_shift = 12; /* 4kB */
+	} else {
+		dev_dbg(&dev->dev, "no supported direct page size in mask %x",
+			  query[2]);
+		goto out_unlock;
+	}
+	/* verify the window * number of ptes will map the partition */
+	/* check largest block * page size > max memory hotplug addr */
+	max_addr = memory_hotplug_max();
+	if (query[1] < (max_addr >> page_shift)) {
+		dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
+			  "%llu-sized pages\n", max_addr,  query[1],
+			  1ULL << page_shift);
+		goto out_unlock;
+	}
+	len = order_base_2(max_addr);
+	win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
+	if (!win64) {
+		dev_info(&dev->dev,
+			"couldn't allocate property for 64bit dma window\n");
+		goto out_unlock;
+	}
+	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
+	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+	if (!win64->name || !win64->value) {
+		dev_info(&dev->dev,
+			"couldn't allocate property name and value\n");
+		goto out_free_prop;
+	}
+
+	ret = create_ddw(dev, ddr_avail, &create[0], page_shift, len);
+	if (ret != 0)
+		goto out_free_prop;
+
+	*ddwprop = (struct dynamic_dma_window_prop) {
+		.liobn = cpu_to_be32(create[0]),
+		.dma_base = cpu_to_be64(((u64)create[1] << 32) + (u64)create[2]),
+		.tce_shift = cpu_to_be32(page_shift),
+		.window_shift = cpu_to_be32(len)
+	};
+
+	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n",
+		  create[0], dn->full_name);
+
+	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	if (!window)
+		goto out_clear_window;
+
+	ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+			win64->value, tce_setrange_multi_pSeriesLP_walk);
+	if (ret) {
+		dev_info(&dev->dev, "failed to map direct window for %s: %d\n",
+			 dn->full_name, ret);
+		goto out_clear_window;
+	}
+
+	ret = prom_add_property(pdn, win64);
+	if (ret) {
+		dev_err(&dev->dev, "unable to add dma window property for %s: %d",
+			 pdn->full_name, ret);
+		goto out_clear_window;
+	}
+
+	window->device = pdn;
+	window->prop = ddwprop;
+	spin_lock(&direct_window_list_lock);
+	list_add(&window->list, &direct_window_list);
+	spin_unlock(&direct_window_list_lock);
+
+	dma_addr = of_read_number(&create[1], 2);
+	set_dma_offset(&dev->dev, dma_addr);
+	goto out_unlock;
+
+out_clear_window:
+	remove_ddw(pdn);
+
+out_free_prop:
+	kfree(win64->name);
+	kfree(win64->value);
+	kfree(win64);
+
+out_unlock:
+	mutex_unlock(&direct_window_init_mutex);
+	return dma_addr;
+}
+
 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 {
 	struct device_node *pdn, *dn;
@@ -542,23 +996,129 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 
 	set_iommu_table_base(&dev->dev, pci->iommu_table);
 }
+
+static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
+{
+	bool ddw_enabled = false;
+	struct device_node *pdn, *dn;
+	struct pci_dev *pdev;
+	const void *dma_window = NULL;
+	u64 dma_offset;
+
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	/* only attempt to use a new window if 64-bit DMA is requested */
+	if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
+		pdev = to_pci_dev(dev);
+
+		dn = pci_device_to_OF_node(pdev);
+		dev_dbg(dev, "node is %s\n", dn->full_name);
+
+		/* 
+		 * the device tree might contain the dma-window properties
+		 * per-device and not neccesarily for the bus. So we need to
+		 * search upwards in the tree until we either hit a dma-window
+		 * property, OR find a parent with a table already allocated.
+		 */
+		for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+				pdn = pdn->parent) {
+			dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+			if (dma_window)
+				break;
+		}
+		if (pdn && PCI_DN(pdn)) {
+			dma_offset = enable_ddw(pdev, pdn);
+			if (dma_offset != 0) {
+				dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
+				set_dma_offset(dev, dma_offset);
+				set_dma_ops(dev, &dma_direct_ops);
+				ddw_enabled = true;
+			}
+		}
+	}
+
+	/* fall-through to iommu ops */
+	if (!ddw_enabled) {
+		dev_info(dev, "Using 32-bit DMA via iommu\n");
+		set_dma_ops(dev, &dma_iommu_ops);
+	}
+
+	*dev->dma_mask = dma_mask;
+	return 0;
+}
+
 #else  /* CONFIG_PCI */
 #define pci_dma_bus_setup_pSeries	NULL
 #define pci_dma_dev_setup_pSeries	NULL
 #define pci_dma_bus_setup_pSeriesLP	NULL
 #define pci_dma_dev_setup_pSeriesLP	NULL
+#define dma_set_mask_pSeriesLP		NULL
 #endif /* !CONFIG_PCI */
 
+static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
+		void *data)
+{
+	struct direct_window *window;
+	struct memory_notify *arg = data;
+	int ret = 0;
+
+	switch (action) {
+	case MEM_GOING_ONLINE:
+		spin_lock(&direct_window_list_lock);
+		list_for_each_entry(window, &direct_window_list, list) {
+			ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
+					arg->nr_pages, window->prop);
+			/* XXX log error */
+		}
+		spin_unlock(&direct_window_list_lock);
+		break;
+	case MEM_CANCEL_ONLINE:
+	case MEM_OFFLINE:
+		spin_lock(&direct_window_list_lock);
+		list_for_each_entry(window, &direct_window_list, list) {
+			ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
+					arg->nr_pages, window->prop);
+			/* XXX log error */
+		}
+		spin_unlock(&direct_window_list_lock);
+		break;
+	default:
+		break;
+	}
+	if (ret && action != MEM_CANCEL_ONLINE)
+		return NOTIFY_BAD;
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block iommu_mem_nb = {
+	.notifier_call = iommu_mem_notifier,
+};
+
 static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
 {
 	int err = NOTIFY_OK;
 	struct device_node *np = node;
 	struct pci_dn *pci = PCI_DN(np);
+	struct direct_window *window;
 
 	switch (action) {
 	case PSERIES_RECONFIG_REMOVE:
 		if (pci && pci->iommu_table)
 			iommu_free_table(pci->iommu_table, np->full_name);
+
+		spin_lock(&direct_window_list_lock);
+		list_for_each_entry(window, &direct_window_list, list) {
+			if (window->device == np) {
+				list_del(&window->list);
+				kfree(window);
+				break;
+			}
+		}
+		spin_unlock(&direct_window_list_lock);
+
+		remove_ddw(np);
 		break;
 	default:
 		err = NOTIFY_DONE;
@@ -588,6 +1148,7 @@ void iommu_init_early_pSeries(void)
 		ppc_md.tce_get   = tce_get_pSeriesLP;
 		ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
 		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
+		ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
 	} else {
 		ppc_md.tce_build = tce_build_pSeries;
 		ppc_md.tce_free  = tce_free_pSeries;
@@ -598,6 +1159,7 @@ void iommu_init_early_pSeries(void)
 
 
 	pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
+	register_memory_notifier(&iommu_mem_nb);
 
 	set_pci_dma_ops(&dma_iommu_ops);
 }

-- 
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center

  reply	other threads:[~2010-12-11  0:07 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-27  3:35 [RFC PATCH 0/7 v2] ppc: enable dynamic dma window support Nishanth Aravamudan
2010-10-27  3:35 ` [RFC PATCH 1/7 v2] macio: ensure all dma routines get copied over Nishanth Aravamudan
2010-10-27  3:35 ` [RFC PATCH 2/7 v2] ppc: add memory_hotplug_max Nishanth Aravamudan
2010-10-27  3:35 ` [RFC PATCH 3/7 v2] ppc: do not search for dma-window property on dlpar remove Nishanth Aravamudan
2010-11-29  1:38   ` Benjamin Herrenschmidt
2010-12-01  0:30     ` Nishanth Aravamudan
2010-12-04  0:30     ` Nishanth Aravamudan
2010-10-27  3:35 ` [RFC PATCH 4/7 v2] ppc: checking for pdn->parent is redundant Nishanth Aravamudan
2010-10-27  3:35 ` [RFC PATCH 5/7 v2] ppc/iommu: do not need to check for dma_window == NULL Nishanth Aravamudan
2010-10-27  3:35 ` [RFC PATCH 6/7 v2] ppc/iommu: pass phb only to iommu_table_setparms_lpar Nishanth Aravamudan
2010-12-09  4:24   ` Benjamin Herrenschmidt
2010-12-09 16:16     ` Nishanth Aravamudan
2010-10-27  3:35 ` [RFC PATCH 7/7 v2] ppc: add dynamic dma window support Nishanth Aravamudan
2010-12-09  4:17   ` Benjamin Herrenschmidt
2010-12-09 19:00     ` Nishanth Aravamudan
2010-12-09 19:09   ` Nishanth Aravamudan
2010-12-11  0:07     ` Nishanth Aravamudan [this message]
2011-01-08  2:53       ` [PATCH] ppc: update dynamic dma support Nishanth Aravamudan
2011-01-17 17:32         ` [PATCH v2] " Nishanth Aravamudan
2011-01-18  0:20           ` [PATCH v3] " Nishanth Aravamudan
2010-11-08 19:42 ` [RFC PATCH 0/7 v2] ppc: enable dynamic dma window support Nishanth Aravamudan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101211000744.GA12355@us.ibm.com \
    --to=nacc@us.ibm.com \
    --cc=anton@samba.org \
    --cc=benh@kernel.crashing.org \
    --cc=grant.likely@secretlab.ca \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=miltonm@bga.com \
    --cc=paulus@samba.org \
    --cc=sonnyrao@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.