linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH -mm 0/3] convert IOMMUs to use iova
@ 2007-11-02 17:05 FUJITA Tomonori
  2007-11-02 17:05 ` [PATCH 1/3] move iova from drivers/pci/ to lib/ FUJITA Tomonori
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: FUJITA Tomonori @ 2007-11-02 17:05 UTC (permalink / raw)
  To: linux-kernel, linux-scsi
  Cc: James.Bottomley, jens.axboe, jeff, anil.s.keshavamurthy, muli,
	paulus, anton, olof, tony.luck, davem, kyle, fujita.tomonori

This patchset convert the PPC64 IOMMU to use the iova code for free
area management.

The IOMMUs ignores low level drivers' restrictions, the maximum
segment size and segment boundary.

I fixed the former:

http://thread.gmane.org/gmane.linux.scsi/35602

The latter makes the free area management complicated. I'd like to
convert IOMMUs to use the iova code (that intel-iommu introduced) for
free area management and enable iova to handle segment boundary
restrictions, rather than fixing all the IOMMUs' free area management,

I converted the PPC64 IOMMU to see how things work. The patchset was
slightly tested with a pSeries box and seems to work.

This is against the latest Linus' git tree with David Miller's
genericizing iova patch:

http://www.ussg.iu.edu/hypermail/linux/kernel/0710.2/2910.html

This patchset is also available:

git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git iova

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] move iova from drivers/pci/ to lib/
  2007-11-02 17:05 [PATCH -mm 0/3] convert IOMMUs to use iova FUJITA Tomonori
@ 2007-11-02 17:05 ` FUJITA Tomonori
  2007-11-02 17:05 ` [PATCH 2/3] move iova cache code to iova.c FUJITA Tomonori
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: FUJITA Tomonori @ 2007-11-02 17:05 UTC (permalink / raw)
  To: linux-kernel, linux-scsi
  Cc: James.Bottomley, jens.axboe, jeff, anil.s.keshavamurthy, muli,
	paulus, anton, olof, tony.luck, davem, kyle, fujita.tomonori

iova could be used by several IOMMUs. This patch just moves iova from
drivers/pci/ to lib/ and fixes the appropriate Makefile and Kconfig
files.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/x86/Kconfig.x86_64   |    1 +
 drivers/pci/Makefile      |    2 +-
 drivers/pci/intel-iommu.c |    1 -
 drivers/pci/intel-iommu.h |    2 +-
 drivers/pci/iova.c        |  394 ---------------------------------------------
 drivers/pci/iova.h        |   51 ------
 include/linux/iova.h      |   51 ++++++
 lib/Kconfig               |    3 +
 lib/Makefile              |    2 +
 lib/iova.c                |  394 +++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 453 insertions(+), 448 deletions(-)
 delete mode 100644 drivers/pci/iova.c
 delete mode 100644 drivers/pci/iova.h
 create mode 100644 include/linux/iova.h
 create mode 100644 lib/iova.c

diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64
index cc468ea..c10d3f0 100644
--- a/arch/x86/Kconfig.x86_64
+++ b/arch/x86/Kconfig.x86_64
@@ -749,6 +749,7 @@ config PCI_DOMAINS
 config DMAR
 	bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
 	depends on PCI_MSI && ACPI && EXPERIMENTAL
+	select IOVA
 	help
 	  DMA remapping (DMAR) devices support enables independent address
 	  translations for Direct Memory Access (DMA) from devices.
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 5550556..00c3428 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -21,7 +21,7 @@ obj-$(CONFIG_PCI_MSI) += msi.o
 obj-$(CONFIG_HT_IRQ) += htirq.o
 
 # Build Intel IOMMU support
-obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
+obj-$(CONFIG_DMAR) += dmar.o intel-iommu.o
 
 #
 # Some architectures use the generic PCI setup functions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index dc1edbd..1a23b4c 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -30,7 +30,6 @@
 #include <linux/dmar.h>
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
-#include "iova.h"
 #include "intel-iommu.h"
 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 #include <asm/cacheflush.h>
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index f54efd1..750e1b5 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -23,7 +23,7 @@
 
 #include <linux/types.h>
 #include <linux/msi.h>
-#include "iova.h"
+#include <linux/iova.h>
 #include <linux/io.h>
 
 /*
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
deleted file mode 100644
index 8de7ab6..0000000
--- a/drivers/pci/iova.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This file is released under the GPLv2.
- *
- * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- */
-
-#include "iova.h"
-
-void
-init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
-{
-	spin_lock_init(&iovad->iova_alloc_lock);
-	spin_lock_init(&iovad->iova_rbtree_lock);
-	iovad->rbroot = RB_ROOT;
-	iovad->cached32_node = NULL;
-	iovad->dma_32bit_pfn = pfn_32bit;
-}
-
-static struct rb_node *
-__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
-{
-	if ((*limit_pfn != iovad->dma_32bit_pfn) ||
-		(iovad->cached32_node == NULL))
-		return rb_last(&iovad->rbroot);
-	else {
-		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
-		struct iova *curr_iova =
-			container_of(iovad->cached32_node, struct iova, node);
-		*limit_pfn = curr_iova->pfn_lo - 1;
-		return prev_node;
-	}
-}
-
-static void
-__cached_rbnode_insert_update(struct iova_domain *iovad,
-	unsigned long limit_pfn, struct iova *new)
-{
-	if (limit_pfn != iovad->dma_32bit_pfn)
-		return;
-	iovad->cached32_node = &new->node;
-}
-
-static void
-__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
-{
-	struct iova *cached_iova;
-	struct rb_node *curr;
-
-	if (!iovad->cached32_node)
-		return;
-	curr = iovad->cached32_node;
-	cached_iova = container_of(curr, struct iova, node);
-
-	if (free->pfn_lo >= cached_iova->pfn_lo)
-		iovad->cached32_node = rb_next(&free->node);
-}
-
-/* Computes the padding size required, to make the
- * the start address naturally aligned on its size
- */
-static int
-iova_get_pad_size(int size, unsigned int limit_pfn)
-{
-	unsigned int pad_size = 0;
-	unsigned int order = ilog2(size);
-
-	if (order)
-		pad_size = (limit_pfn + 1) % (1 << order);
-
-	return pad_size;
-}
-
-static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
-		unsigned long limit_pfn, struct iova *new, bool size_aligned)
-{
-	struct rb_node *curr = NULL;
-	unsigned long flags;
-	unsigned long saved_pfn;
-	unsigned int pad_size = 0;
-
-	/* Walk the tree backwards */
-	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-	saved_pfn = limit_pfn;
-	curr = __get_cached_rbnode(iovad, &limit_pfn);
-	while (curr) {
-		struct iova *curr_iova = container_of(curr, struct iova, node);
-		if (limit_pfn < curr_iova->pfn_lo)
-			goto move_left;
-		else if (limit_pfn < curr_iova->pfn_hi)
-			goto adjust_limit_pfn;
-		else {
-			if (size_aligned)
-				pad_size = iova_get_pad_size(size, limit_pfn);
-			if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
-				break;	/* found a free slot */
-		}
-adjust_limit_pfn:
-		limit_pfn = curr_iova->pfn_lo - 1;
-move_left:
-		curr = rb_prev(curr);
-	}
-
-	if (!curr) {
-		if (size_aligned)
-			pad_size = iova_get_pad_size(size, limit_pfn);
-		if ((IOVA_START_PFN + size + pad_size) > limit_pfn) {
-			spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-			return -ENOMEM;
-		}
-	}
-
-	/* pfn_lo will point to size aligned address if size_aligned is set */
-	new->pfn_lo = limit_pfn - (size + pad_size) + 1;
-	new->pfn_hi = new->pfn_lo + size - 1;
-
-	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-	return 0;
-}
-
-static void
-iova_insert_rbtree(struct rb_root *root, struct iova *iova)
-{
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
-	/* Figure out where to put new node */
-	while (*new) {
-		struct iova *this = container_of(*new, struct iova, node);
-		parent = *new;
-
-		if (iova->pfn_lo < this->pfn_lo)
-			new = &((*new)->rb_left);
-		else if (iova->pfn_lo > this->pfn_lo)
-			new = &((*new)->rb_right);
-		else
-			BUG(); /* this should not happen */
-	}
-	/* Add new node and rebalance tree. */
-	rb_link_node(&iova->node, parent, new);
-	rb_insert_color(&iova->node, root);
-}
-
-/**
- * alloc_iova - allocates an iova
- * @iovad - iova domain in question
- * @size - size of page frames to allocate
- * @limit_pfn - max limit address
- * @size_aligned - set if size_aligned address range is required
- * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
- * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
- * flag is set then the allocated address iova->pfn_lo will be naturally
- * aligned on roundup_power_of_two(size).
- */
-struct iova *
-alloc_iova(struct iova_domain *iovad, unsigned long size,
-	unsigned long limit_pfn,
-	bool size_aligned)
-{
-	unsigned long flags;
-	struct iova *new_iova;
-	int ret;
-
-	new_iova = alloc_iova_mem();
-	if (!new_iova)
-		return NULL;
-
-	/* If size aligned is set then round the size to
-	 * to next power of two.
-	 */
-	if (size_aligned)
-		size = __roundup_pow_of_two(size);
-
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
-	ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova,
-			size_aligned);
-
-	if (ret) {
-		spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
-		free_iova_mem(new_iova);
-		return NULL;
-	}
-
-	/* Insert the new_iova into domain rbtree by holding writer lock */
-	spin_lock(&iovad->iova_rbtree_lock);
-	iova_insert_rbtree(&iovad->rbroot, new_iova);
-	__cached_rbnode_insert_update(iovad, limit_pfn, new_iova);
-	spin_unlock(&iovad->iova_rbtree_lock);
-
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
-
-	return new_iova;
-}
-
-/**
- * find_iova - find's an iova for a given pfn
- * @iovad - iova domain in question.
- * pfn - page frame number
- * This function finds and returns an iova belonging to the
- * given doamin which matches the given pfn.
- */
-struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
-{
-	unsigned long flags;
-	struct rb_node *node;
-
-	/* Take the lock so that no other thread is manipulating the rbtree */
-	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-	node = iovad->rbroot.rb_node;
-	while (node) {
-		struct iova *iova = container_of(node, struct iova, node);
-
-		/* If pfn falls within iova's range, return iova */
-		if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
-			spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-			/* We are not holding the lock while this iova
-			 * is referenced by the caller as the same thread
-			 * which called this function also calls __free_iova()
-			 * and it is by desing that only one thread can possibly
-			 * reference a particular iova and hence no conflict.
-			 */
-			return iova;
-		}
-
-		if (pfn < iova->pfn_lo)
-			node = node->rb_left;
-		else if (pfn > iova->pfn_lo)
-			node = node->rb_right;
-	}
-
-	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-	return NULL;
-}
-
-/**
- * __free_iova - frees the given iova
- * @iovad: iova domain in question.
- * @iova: iova in question.
- * Frees the given iova belonging to the giving domain
- */
-void
-__free_iova(struct iova_domain *iovad, struct iova *iova)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-	__cached_rbnode_delete_update(iovad, iova);
-	rb_erase(&iova->node, &iovad->rbroot);
-	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-	free_iova_mem(iova);
-}
-
-/**
- * free_iova - finds and frees the iova for a given pfn
- * @iovad: - iova domain in question.
- * @pfn: - pfn that is allocated previously
- * This functions finds an iova for a given pfn and then
- * frees the iova from that domain.
- */
-void
-free_iova(struct iova_domain *iovad, unsigned long pfn)
-{
-	struct iova *iova = find_iova(iovad, pfn);
-	if (iova)
-		__free_iova(iovad, iova);
-
-}
-
-/**
- * put_iova_domain - destroys the iova doamin
- * @iovad: - iova domain in question.
- * All the iova's in that domain are destroyed.
- */
-void put_iova_domain(struct iova_domain *iovad)
-{
-	struct rb_node *node;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-	node = rb_first(&iovad->rbroot);
-	while (node) {
-		struct iova *iova = container_of(node, struct iova, node);
-		rb_erase(node, &iovad->rbroot);
-		free_iova_mem(iova);
-		node = rb_first(&iovad->rbroot);
-	}
-	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-}
-
-static int
-__is_range_overlap(struct rb_node *node,
-	unsigned long pfn_lo, unsigned long pfn_hi)
-{
-	struct iova *iova = container_of(node, struct iova, node);
-
-	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
-		return 1;
-	return 0;
-}
-
-static struct iova *
-__insert_new_range(struct iova_domain *iovad,
-	unsigned long pfn_lo, unsigned long pfn_hi)
-{
-	struct iova *iova;
-
-	iova = alloc_iova_mem();
-	if (!iova)
-		return iova;
-
-	iova->pfn_hi = pfn_hi;
-	iova->pfn_lo = pfn_lo;
-	iova_insert_rbtree(&iovad->rbroot, iova);
-	return iova;
-}
-
-static void
-__adjust_overlap_range(struct iova *iova,
-	unsigned long *pfn_lo, unsigned long *pfn_hi)
-{
-	if (*pfn_lo < iova->pfn_lo)
-		iova->pfn_lo = *pfn_lo;
-	if (*pfn_hi > iova->pfn_hi)
-		*pfn_lo = iova->pfn_hi + 1;
-}
-
-/**
- * reserve_iova - reserves an iova in the given range
- * @iovad: - iova domain pointer
- * @pfn_lo: - lower page frame address
- * @pfn_hi:- higher pfn adderss
- * This function allocates reserves the address range from pfn_lo to pfn_hi so
- * that this address is not dished out as part of alloc_iova.
- */
-struct iova *
-reserve_iova(struct iova_domain *iovad,
-	unsigned long pfn_lo, unsigned long pfn_hi)
-{
-	struct rb_node *node;
-	unsigned long flags;
-	struct iova *iova;
-	unsigned int overlap = 0;
-
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
-	spin_lock(&iovad->iova_rbtree_lock);
-	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
-		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
-			iova = container_of(node, struct iova, node);
-			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
-			if ((pfn_lo >= iova->pfn_lo) &&
-				(pfn_hi <= iova->pfn_hi))
-				goto finish;
-			overlap = 1;
-
-		} else if (overlap)
-				break;
-	}
-
-	/* We are here either becasue this is the first reserver node
-	 * or need to insert remaining non overlap addr range
-	 */
-	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
-finish:
-
-	spin_unlock(&iovad->iova_rbtree_lock);
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
-	return iova;
-}
-
-/**
- * copy_reserved_iova - copies the reserved between domains
- * @from: - source doamin from where to copy
- * @to: - destination domin where to copy
- * This function copies reserved iova's from one doamin to
- * other.
- */
-void
-copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
-{
-	unsigned long flags;
-	struct rb_node *node;
-
-	spin_lock_irqsave(&from->iova_alloc_lock, flags);
-	spin_lock(&from->iova_rbtree_lock);
-	for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
-		struct iova *iova = container_of(node, struct iova, node);
-		struct iova *new_iova;
-		new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
-		if (!new_iova)
-			printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
-				iova->pfn_lo, iova->pfn_lo);
-	}
-	spin_unlock(&from->iova_rbtree_lock);
-	spin_unlock_irqrestore(&from->iova_alloc_lock, flags);
-}
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
deleted file mode 100644
index d521b5b..0000000
--- a/drivers/pci/iova.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This file is released under the GPLv2.
- *
- * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- *
- */
-
-#ifndef _IOVA_H_
-#define _IOVA_H_
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/rbtree.h>
-#include <linux/dma-mapping.h>
-
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN		(1)
-
-/* iova structure */
-struct iova {
-	struct rb_node	node;
-	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
-	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
-};
-
-/* holds all the iova translations for a domain */
-struct iova_domain {
-	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
-	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
-	struct rb_root	rbroot;		/* iova domain rbtree root */
-	struct rb_node	*cached32_node; /* Save last alloced node */
-	unsigned long	dma_32bit_pfn;
-};
-
-struct iova *alloc_iova_mem(void);
-void free_iova_mem(struct iova *iova);
-void free_iova(struct iova_domain *iovad, unsigned long pfn);
-void __free_iova(struct iova_domain *iovad, struct iova *iova);
-struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
-	unsigned long limit_pfn,
-	bool size_aligned);
-struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
-	unsigned long pfn_hi);
-void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
-void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
-struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
-void put_iova_domain(struct iova_domain *iovad);
-
-#endif
diff --git a/include/linux/iova.h b/include/linux/iova.h
new file mode 100644
index 0000000..d521b5b
--- /dev/null
+++ b/include/linux/iova.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ */
+
+#ifndef _IOVA_H_
+#define _IOVA_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/dma-mapping.h>
+
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN		(1)
+
+/* iova structure */
+struct iova {
+	struct rb_node	node;
+	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
+	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
+};
+
+/* holds all the iova translations for a domain */
+struct iova_domain {
+	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
+	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
+	struct rb_root	rbroot;		/* iova domain rbtree root */
+	struct rb_node	*cached32_node; /* Save last alloced node */
+	unsigned long	dma_32bit_pfn;
+};
+
+struct iova *alloc_iova_mem(void);
+void free_iova_mem(struct iova *iova);
+void free_iova(struct iova_domain *iovad, unsigned long pfn);
+void __free_iova(struct iova_domain *iovad, struct iova *iova);
+struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
+	unsigned long limit_pfn,
+	bool size_aligned);
+struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
+	unsigned long pfn_hi);
+void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
+void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
+void put_iova_domain(struct iova_domain *iovad);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index ba3d104..4107547 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -141,4 +141,7 @@ config HAS_DMA
 config CHECK_SIGNATURE
 	bool
 
+config IOVA
+	boolean
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 3a0983b..7de33cc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -66,6 +66,8 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 
+obj-$(CONFIG_IOVA) += iova.o
+
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
 hostprogs-y	:= gen_crc32table
diff --git a/lib/iova.c b/lib/iova.c
new file mode 100644
index 0000000..6e14a3f
--- /dev/null
+++ b/lib/iova.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#include <linux/iova.h>
+
+void
+init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
+{
+	spin_lock_init(&iovad->iova_alloc_lock);
+	spin_lock_init(&iovad->iova_rbtree_lock);
+	iovad->rbroot = RB_ROOT;
+	iovad->cached32_node = NULL;
+	iovad->dma_32bit_pfn = pfn_32bit;
+}
+
+static struct rb_node *
+__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
+{
+	if ((*limit_pfn != iovad->dma_32bit_pfn) ||
+		(iovad->cached32_node == NULL))
+		return rb_last(&iovad->rbroot);
+	else {
+		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
+		struct iova *curr_iova =
+			container_of(iovad->cached32_node, struct iova, node);
+		*limit_pfn = curr_iova->pfn_lo - 1;
+		return prev_node;
+	}
+}
+
+static void
+__cached_rbnode_insert_update(struct iova_domain *iovad,
+	unsigned long limit_pfn, struct iova *new)
+{
+	if (limit_pfn != iovad->dma_32bit_pfn)
+		return;
+	iovad->cached32_node = &new->node;
+}
+
+static void
+__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
+{
+	struct iova *cached_iova;
+	struct rb_node *curr;
+
+	if (!iovad->cached32_node)
+		return;
+	curr = iovad->cached32_node;
+	cached_iova = container_of(curr, struct iova, node);
+
+	if (free->pfn_lo >= cached_iova->pfn_lo)
+		iovad->cached32_node = rb_next(&free->node);
+}
+
+/* Computes the padding size required, to make the
+ * the start address naturally aligned on its size
+ */
+static int
+iova_get_pad_size(int size, unsigned int limit_pfn)
+{
+	unsigned int pad_size = 0;
+	unsigned int order = ilog2(size);
+
+	if (order)
+		pad_size = (limit_pfn + 1) % (1 << order);
+
+	return pad_size;
+}
+
+static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
+		unsigned long limit_pfn, struct iova *new, bool size_aligned)
+{
+	struct rb_node *curr = NULL;
+	unsigned long flags;
+	unsigned long saved_pfn;
+	unsigned int pad_size = 0;
+
+	/* Walk the tree backwards */
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	saved_pfn = limit_pfn;
+	curr = __get_cached_rbnode(iovad, &limit_pfn);
+	while (curr) {
+		struct iova *curr_iova = container_of(curr, struct iova, node);
+		if (limit_pfn < curr_iova->pfn_lo)
+			goto move_left;
+		else if (limit_pfn < curr_iova->pfn_hi)
+			goto adjust_limit_pfn;
+		else {
+			if (size_aligned)
+				pad_size = iova_get_pad_size(size, limit_pfn);
+			if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
+				break;	/* found a free slot */
+		}
+adjust_limit_pfn:
+		limit_pfn = curr_iova->pfn_lo - 1;
+move_left:
+		curr = rb_prev(curr);
+	}
+
+	if (!curr) {
+		if (size_aligned)
+			pad_size = iova_get_pad_size(size, limit_pfn);
+		if ((IOVA_START_PFN + size + pad_size) > limit_pfn) {
+			spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+			return -ENOMEM;
+		}
+	}
+
+	/* pfn_lo will point to size aligned address if size_aligned is set */
+	new->pfn_lo = limit_pfn - (size + pad_size) + 1;
+	new->pfn_hi = new->pfn_lo + size - 1;
+
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	return 0;
+}
+
+static void
+iova_insert_rbtree(struct rb_root *root, struct iova *iova)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	/* Figure out where to put new node */
+	while (*new) {
+		struct iova *this = container_of(*new, struct iova, node);
+		parent = *new;
+
+		if (iova->pfn_lo < this->pfn_lo)
+			new = &((*new)->rb_left);
+		else if (iova->pfn_lo > this->pfn_lo)
+			new = &((*new)->rb_right);
+		else
+			BUG(); /* this should not happen */
+	}
+	/* Add new node and rebalance tree. */
+	rb_link_node(&iova->node, parent, new);
+	rb_insert_color(&iova->node, root);
+}
+
+/**
+ * alloc_iova - allocates an iova
+ * @iovad - iova domain in question
+ * @size - size of page frames to allocate
+ * @limit_pfn - max limit address
+ * @size_aligned - set if size_aligned address range is required
+ * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
+ * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
+ * flag is set then the allocated address iova->pfn_lo will be naturally
+ * aligned on roundup_power_of_two(size).
+ */
+struct iova *
+alloc_iova(struct iova_domain *iovad, unsigned long size,
+	unsigned long limit_pfn,
+	bool size_aligned)
+{
+	unsigned long flags;
+	struct iova *new_iova;
+	int ret;
+
+	new_iova = alloc_iova_mem();
+	if (!new_iova)
+		return NULL;
+
+	/* If size aligned is set then round the size to
+	 * to next power of two.
+	 */
+	if (size_aligned)
+		size = __roundup_pow_of_two(size);
+
+	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
+	ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova,
+			size_aligned);
+
+	if (ret) {
+		spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+		free_iova_mem(new_iova);
+		return NULL;
+	}
+
+	/* Insert the new_iova into domain rbtree by holding writer lock */
+	spin_lock(&iovad->iova_rbtree_lock);
+	iova_insert_rbtree(&iovad->rbroot, new_iova);
+	__cached_rbnode_insert_update(iovad, limit_pfn, new_iova);
+	spin_unlock(&iovad->iova_rbtree_lock);
+
+	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+
+	return new_iova;
+}
+
+/**
+ * find_iova - find's an iova for a given pfn
+ * @iovad - iova domain in question.
+ * pfn - page frame number
+ * This function finds and returns an iova belonging to the
+ * given doamin which matches the given pfn.
+ */
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
+{
+	unsigned long flags;
+	struct rb_node *node;
+
+	/* Take the lock so that no other thread is manipulating the rbtree */
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	node = iovad->rbroot.rb_node;
+	while (node) {
+		struct iova *iova = container_of(node, struct iova, node);
+
+		/* If pfn falls within iova's range, return iova */
+		if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
+			spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+			/* We are not holding the lock while this iova
+			 * is referenced by the caller as the same thread
+			 * which called this function also calls __free_iova()
+			 * and it is by desing that only one thread can possibly
+			 * reference a particular iova and hence no conflict.
+			 */
+			return iova;
+		}
+
+		if (pfn < iova->pfn_lo)
+			node = node->rb_left;
+		else if (pfn > iova->pfn_lo)
+			node = node->rb_right;
+	}
+
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	return NULL;
+}
+
+/**
+ * __free_iova - frees the given iova
+ * @iovad: iova domain in question.
+ * @iova: iova in question.
+ * Frees the given iova belonging to the giving domain
+ */
+void
+__free_iova(struct iova_domain *iovad, struct iova *iova)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	__cached_rbnode_delete_update(iovad, iova);
+	rb_erase(&iova->node, &iovad->rbroot);
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	free_iova_mem(iova);
+}
+
+/**
+ * free_iova - finds and frees the iova for a given pfn
+ * @iovad: - iova domain in question.
+ * @pfn: - pfn that is allocated previously
+ * This functions finds an iova for a given pfn and then
+ * frees the iova from that domain.
+ */
+void
+free_iova(struct iova_domain *iovad, unsigned long pfn)
+{
+	struct iova *iova = find_iova(iovad, pfn);
+	if (iova)
+		__free_iova(iovad, iova);
+
+}
+
+/**
+ * put_iova_domain - destroys the iova doamin
+ * @iovad: - iova domain in question.
+ * All the iova's in that domain are destroyed.
+ */
+void put_iova_domain(struct iova_domain *iovad)
+{
+	struct rb_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	node = rb_first(&iovad->rbroot);
+	while (node) {
+		struct iova *iova = container_of(node, struct iova, node);
+		rb_erase(node, &iovad->rbroot);
+		free_iova_mem(iova);
+		node = rb_first(&iovad->rbroot);
+	}
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+}
+
+static int
+__is_range_overlap(struct rb_node *node,
+	unsigned long pfn_lo, unsigned long pfn_hi)
+{
+	struct iova *iova = container_of(node, struct iova, node);
+
+	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
+		return 1;
+	return 0;
+}
+
+static struct iova *
+__insert_new_range(struct iova_domain *iovad,
+	unsigned long pfn_lo, unsigned long pfn_hi)
+{
+	struct iova *iova;
+
+	iova = alloc_iova_mem();
+	if (!iova)
+		return iova;
+
+	iova->pfn_hi = pfn_hi;
+	iova->pfn_lo = pfn_lo;
+	iova_insert_rbtree(&iovad->rbroot, iova);
+	return iova;
+}
+
+static void
+__adjust_overlap_range(struct iova *iova,
+	unsigned long *pfn_lo, unsigned long *pfn_hi)
+{
+	if (*pfn_lo < iova->pfn_lo)
+		iova->pfn_lo = *pfn_lo;
+	if (*pfn_hi > iova->pfn_hi)
+		*pfn_lo = iova->pfn_hi + 1;
+}
+
+/**
+ * reserve_iova - reserves an iova in the given range
+ * @iovad: - iova domain pointer
+ * @pfn_lo: - lower page frame address
+ * @pfn_hi:- higher pfn adderss
+ * This function allocates reserves the address range from pfn_lo to pfn_hi so
+ * that this address is not dished out as part of alloc_iova.
+ */
+struct iova *
+reserve_iova(struct iova_domain *iovad,
+	unsigned long pfn_lo, unsigned long pfn_hi)
+{
+	struct rb_node *node;
+	unsigned long flags;
+	struct iova *iova;
+	unsigned int overlap = 0;
+
+	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
+	spin_lock(&iovad->iova_rbtree_lock);
+	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
+		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
+			iova = container_of(node, struct iova, node);
+			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
+			if ((pfn_lo >= iova->pfn_lo) &&
+				(pfn_hi <= iova->pfn_hi))
+				goto finish;
+			overlap = 1;
+
+		} else if (overlap)
+				break;
+	}
+
+	/* We are here either becasue this is the first reserver node
+	 * or need to insert remaining non overlap addr range
+	 */
+	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
+finish:
+
+	spin_unlock(&iovad->iova_rbtree_lock);
+	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+	return iova;
+}
+
+/**
+ * copy_reserved_iova - copies the reserved between domains
+ * @from: - source doamin from where to copy
+ * @to: - destination domin where to copy
+ * This function copies reserved iova's from one doamin to
+ * other.
+ */
+void
+copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
+{
+	unsigned long flags;
+	struct rb_node *node;
+
+	spin_lock_irqsave(&from->iova_alloc_lock, flags);
+	spin_lock(&from->iova_rbtree_lock);
+	for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
+		struct iova *iova = container_of(node, struct iova, node);
+		struct iova *new_iova;
+		new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
+		if (!new_iova)
+			printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
+				iova->pfn_lo, iova->pfn_lo);
+	}
+	spin_unlock(&from->iova_rbtree_lock);
+	spin_unlock_irqrestore(&from->iova_alloc_lock, flags);
+}
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] move iova cache code to iova.c
  2007-11-02 17:05 [PATCH -mm 0/3] convert IOMMUs to use iova FUJITA Tomonori
  2007-11-02 17:05 ` [PATCH 1/3] move iova from drivers/pci/ to lib/ FUJITA Tomonori
@ 2007-11-02 17:05 ` FUJITA Tomonori
  2007-11-02 17:05 ` [PATCH 3/3] POWERPC: convert the IOMMU to use iova FUJITA Tomonori
  2007-11-02 17:12 ` [PATCH -mm 0/3] convert IOMMUs " Muli Ben-Yehuda
  3 siblings, 0 replies; 7+ messages in thread
From: FUJITA Tomonori @ 2007-11-02 17:05 UTC (permalink / raw)
  To: linux-kernel, linux-scsi
  Cc: James.Bottomley, jens.axboe, jeff, anil.s.keshavamurthy, muli,
	paulus, anton, olof, tony.luck, davem, kyle, fujita.tomonori

This detaches iova cache code from intel-iommu.c to iova.c in order to
enable IOMMUs to use iova code.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 drivers/pci/intel-iommu.c |   14 ++------------
 include/linux/iova.h      |    6 +++---
 lib/iova.c                |   34 ++++++++++++++++++++++++++++------
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 1a23b4c..0c41d79 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -141,16 +141,6 @@ static inline void free_devinfo_mem(void *vaddr)
 	kmem_cache_free(iommu_devinfo_cache, vaddr);
 }
 
-struct iova *alloc_iova_mem(void)
-{
-	return iommu_kmem_cache_alloc(iommu_iova_cache);
-}
-
-void free_iova_mem(struct iova *iova)
-{
-	kmem_cache_free(iommu_iova_cache, iova);
-}
-
 static inline void __iommu_flush_cache(
 	struct intel_iommu *iommu, void *addr, int size)
 {
@@ -1087,7 +1077,7 @@ static void dmar_init_reserved_ranges(void)
 	int i;
 	u64 addr, size;
 
-	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
+	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN, iommu_iova_cache);
 
 	/* IOAPIC ranges shouldn't be accessed by DMA */
 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
@@ -1141,7 +1131,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	int adjust_width, agaw;
 	unsigned long sagaw;
 
-	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
+	init_iova_domain(&domain->iovad, DMA_32BIT_PFN, iommu_iova_cache);
 	spin_lock_init(&domain->mapping_lock);
 
 	domain_reserve_special_ranges(domain);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index d521b5b..41f189b 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -32,10 +32,9 @@ struct iova_domain {
 	struct rb_root	rbroot;		/* iova domain rbtree root */
 	struct rb_node	*cached32_node; /* Save last alloced node */
 	unsigned long	dma_32bit_pfn;
+	struct kmem_cache *iova_cachep;
 };
 
-struct iova *alloc_iova_mem(void);
-void free_iova_mem(struct iova *iova);
 void free_iova(struct iova_domain *iovad, unsigned long pfn);
 void __free_iova(struct iova_domain *iovad, struct iova *iova);
 struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
@@ -44,7 +43,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 	unsigned long pfn_hi);
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
-void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
+void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit,
+		      struct kmem_cache *iova_cachep);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 
diff --git a/lib/iova.c b/lib/iova.c
index 6e14a3f..d2612e5 100644
--- a/lib/iova.c
+++ b/lib/iova.c
@@ -6,16 +6,38 @@
  * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
+#include <linux/sched.h>
 #include <linux/iova.h>
 
+static struct iova *alloc_iova_mem(struct iova_domain *iovad)
+{
+	unsigned int flags;
+	void *vaddr;
+
+	/* trying to avoid low memory issues */
+	flags = current->flags & PF_MEMALLOC;
+	current->flags |= PF_MEMALLOC;
+	vaddr = kmem_cache_alloc(iovad->iova_cachep, GFP_ATOMIC);
+	current->flags &= (~PF_MEMALLOC | flags);
+
+	return vaddr;
+}
+
+static void free_iova_mem(struct iova_domain *iovad, struct iova *iova)
+{
+	kmem_cache_free(iovad->iova_cachep, iova);
+}
+
 void
-init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
+init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit,
+		 struct kmem_cache *iova_cachep)
 {
 	spin_lock_init(&iovad->iova_alloc_lock);
 	spin_lock_init(&iovad->iova_rbtree_lock);
 	iovad->rbroot = RB_ROOT;
 	iovad->cached32_node = NULL;
 	iovad->dma_32bit_pfn = pfn_32bit;
+	iovad->iova_cachep = iova_cachep;
 }
 
 static struct rb_node *
@@ -160,7 +182,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	struct iova *new_iova;
 	int ret;
 
-	new_iova = alloc_iova_mem();
+	new_iova = alloc_iova_mem(iovad);
 	if (!new_iova)
 		return NULL;
 
@@ -176,7 +198,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 
 	if (ret) {
 		spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
-		free_iova_mem(new_iova);
+		free_iova_mem(iovad, new_iova);
 		return NULL;
 	}
 
@@ -246,7 +268,7 @@ __free_iova(struct iova_domain *iovad, struct iova *iova)
 	__cached_rbnode_delete_update(iovad, iova);
 	rb_erase(&iova->node, &iovad->rbroot);
 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-	free_iova_mem(iova);
+	free_iova_mem(iovad, iova);
 }
 
 /**
@@ -280,7 +302,7 @@ void put_iova_domain(struct iova_domain *iovad)
 	while (node) {
 		struct iova *iova = container_of(node, struct iova, node);
 		rb_erase(node, &iovad->rbroot);
-		free_iova_mem(iova);
+		free_iova_mem(iovad, iova);
 		node = rb_first(&iovad->rbroot);
 	}
 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
@@ -303,7 +325,7 @@ __insert_new_range(struct iova_domain *iovad,
 {
 	struct iova *iova;
 
-	iova = alloc_iova_mem();
+	iova = alloc_iova_mem(iovad);
 	if (!iova)
 		return iova;
 
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] POWERPC: convert the IOMMU to use iova
  2007-11-02 17:05 [PATCH -mm 0/3] convert IOMMUs to use iova FUJITA Tomonori
  2007-11-02 17:05 ` [PATCH 1/3] move iova from drivers/pci/ to lib/ FUJITA Tomonori
  2007-11-02 17:05 ` [PATCH 2/3] move iova cache code to iova.c FUJITA Tomonori
@ 2007-11-02 17:05 ` FUJITA Tomonori
  2007-11-02 17:12 ` [PATCH -mm 0/3] convert IOMMUs " Muli Ben-Yehuda
  3 siblings, 0 replies; 7+ messages in thread
From: FUJITA Tomonori @ 2007-11-02 17:05 UTC (permalink / raw)
  To: linux-kernel, linux-scsi
  Cc: James.Bottomley, jens.axboe, jeff, anil.s.keshavamurthy, muli,
	paulus, anton, olof, tony.luck, davem, kyle, fujita.tomonori

This converts the PPC64 IOMMU to use iova for free area management.

TODO: we might need to modify iova to use the tricks like avoiding
cacheline bouncing. Performance tests are necessary.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/powerpc/kernel/iommu.c            |  169 ++++++--------------------------
 arch/powerpc/platforms/Kconfig.cputype |    1 +
 arch/powerpc/platforms/cell/iommu.c    |    3 +-
 arch/powerpc/sysdev/dart_iommu.c       |    4 +-
 include/asm-powerpc/iommu.h            |    6 +-
 5 files changed, 36 insertions(+), 147 deletions(-)

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2d0c9ef..34dcfd3 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -31,6 +31,7 @@
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
+#include <linux/iova.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/iommu.h>
@@ -82,106 +83,18 @@ __setup("protect4gb=", setup_protect4gb);
 __setup("iommu=", setup_iommu);
 
 static unsigned long iommu_range_alloc(struct iommu_table *tbl,
-                                       unsigned long npages,
-                                       unsigned long *handle,
-                                       unsigned long mask,
-                                       unsigned int align_order)
-{ 
-	unsigned long n, end, i, start;
-	unsigned long limit;
-	int largealloc = npages > 15;
-	int pass = 0;
-	unsigned long align_mask;
-
-	align_mask = 0xffffffffffffffffl >> (64 - align_order);
-
-	/* This allocator was derived from x86_64's bit string search */
-
-	/* Sanity check */
-	if (unlikely(npages == 0)) {
-		if (printk_ratelimit())
-			WARN_ON(1);
-		return DMA_ERROR_CODE;
-	}
-
-	if (handle && *handle)
-		start = *handle;
-	else
-		start = largealloc ? tbl->it_largehint : tbl->it_hint;
-
-	/* Use only half of the table for small allocs (15 pages or less) */
-	limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
-
-	if (largealloc && start < tbl->it_halfpoint)
-		start = tbl->it_halfpoint;
-
-	/* The case below can happen if we have a small segment appended
-	 * to a large, or when the previous alloc was at the very end of
-	 * the available space. If so, go back to the initial start.
-	 */
-	if (start >= limit)
-		start = largealloc ? tbl->it_largehint : tbl->it_hint;
-
- again:
-
-	if (limit + tbl->it_offset > mask) {
-		limit = mask - tbl->it_offset + 1;
-		/* If we're constrained on address range, first try
-		 * at the masked hint to avoid O(n) search complexity,
-		 * but on second pass, start at 0.
-		 */
-		if ((start & mask) >= limit || pass > 0)
-			start = 0;
-		else
-			start &= mask;
-	}
-
-	n = find_next_zero_bit(tbl->it_map, limit, start);
-
-	/* Align allocation */
-	n = (n + align_mask) & ~align_mask;
-
-	end = n + npages;
-
-	if (unlikely(end >= limit)) {
-		if (likely(pass < 2)) {
-			/* First failure, just rescan the half of the table.
-			 * Second failure, rescan the other half of the table.
-			 */
-			start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
-			limit = pass ? tbl->it_size : limit;
-			pass++;
-			goto again;
-		} else {
-			/* Third failure, give up */
-			return DMA_ERROR_CODE;
-		}
-	}
-
-	for (i = n; i < end; i++)
-		if (test_bit(i, tbl->it_map)) {
-			start = i+1;
-			goto again;
-		}
-
-	for (i = n; i < end; i++)
-		__set_bit(i, tbl->it_map);
-
-	/* Bump the hint to a new block for small allocs. */
-	if (largealloc) {
-		/* Don't bump to new block to avoid fragmentation */
-		tbl->it_largehint = end;
-	} else {
-		/* Overflow will be taken care of at the next allocation */
-		tbl->it_hint = (end + tbl->it_blocksize - 1) &
-		                ~(tbl->it_blocksize - 1);
-	}
+				      unsigned long npages,
+				      unsigned long mask,
+				      unsigned int align_order)
+{
+	unsigned long end;
+	struct iova *iova;
 
-	/* Update handle for SG allocations */
-	if (handle)
-		*handle = end;
+	end = min_t(unsigned long, DMA_32BIT_MASK >> IOMMU_PAGE_SHIFT,
+		    tbl->it_size - 1);
+	iova = alloc_iova(&tbl->iovad, npages, end, 1);
 
-	return n;
+	return iova ? iova->pfn_lo : DMA_ERROR_CODE;
 }
 
 static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
@@ -193,7 +106,7 @@ static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
-	entry = iommu_range_alloc(tbl, npages, NULL, mask, align_order);
+	entry = iommu_range_alloc(tbl, npages, mask, align_order);
 
 	if (unlikely(entry == DMA_ERROR_CODE)) {
 		spin_unlock_irqrestore(&(tbl->it_lock), flags);
@@ -224,7 +137,6 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 			 unsigned int npages)
 {
 	unsigned long entry, free_entry;
-	unsigned long i;
 
 	entry = dma_addr >> IOMMU_PAGE_SHIFT;
 	free_entry = entry - tbl->it_offset;
@@ -246,9 +158,8 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 	}
 
 	ppc_md.tce_free(tbl, entry, npages);
-	
-	for (i = 0; i < npages; i++)
-		__clear_bit(free_entry+i, tbl->it_map);
+
+	free_iova(&tbl->iovad, free_entry);
 }
 
 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -309,7 +220,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 		/* Allocate iommu entries for that segment */
 		vaddr = (unsigned long) sg_virt(s);
 		npages = iommu_num_pages(vaddr, slen);
-		entry = iommu_range_alloc(tbl, npages, &handle, mask >> IOMMU_PAGE_SHIFT, 0);
+		entry = iommu_range_alloc(tbl, npages, mask >> IOMMU_PAGE_SHIFT, 0);
 
 		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);
 
@@ -439,34 +350,28 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 	spin_unlock_irqrestore(&(tbl->it_lock), flags);
 }
 
+static struct kmem_cache *iova_cachep;
+
 /*
  * Build a iommu_table structure.  This contains a bit map which
  * is used to manage allocation of the tce space.
  */
 struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 {
-	unsigned long sz;
 	unsigned long start_index, end_index;
 	unsigned long entries_per_4g;
 	unsigned long index;
 	static int welcomed = 0;
-	struct page *page;
-
-	/* Set aside 1/4 of the table for large allocations. */
-	tbl->it_halfpoint = tbl->it_size * 3 / 4;
 
-	/* number of bytes needed for the bitmap */
-	sz = (tbl->it_size + 7) >> 3;
-
-	page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
-	if (!page)
-		panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
-	tbl->it_map = page_address(page);
-	memset(tbl->it_map, 0, sz);
+	if (!iova_cachep) {
+		iova_cachep = KMEM_CACHE(iova, 0);
+		if (!iova_cachep)
+			return NULL;
+	}
 
-	tbl->it_hint = 0;
-	tbl->it_largehint = tbl->it_halfpoint;
 	spin_lock_init(&tbl->it_lock);
+	init_iova_domain(&tbl->iovad, DMA_32BIT_MASK >> IOMMU_PAGE_SHIFT,
+			 iova_cachep);
 
 #ifdef CONFIG_CRASH_DUMP
 	if (ppc_md.tce_get) {
@@ -482,7 +387,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 			 * Freed TCE entry contains 0x7fffffffffffffff on JS20
 			 */
 			if (tceval && (tceval != 0x7fffffffffffffffUL)) {
-				__set_bit(index, tbl->it_map);
+				free_iova(&tbl->iovad, index);
 				tcecount++;
 			}
 		}
@@ -492,7 +397,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 				KDUMP_MIN_TCE_ENTRIES);
 			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
 				index < tbl->it_size; index++)
-				__clear_bit(index, tbl->it_map);
+				free_iova(&tbl->iovad, index);
 		}
 	}
 #else
@@ -514,7 +419,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 		end_index = tbl->it_size;
 
 		for (index = start_index; index < end_index - 1; index += entries_per_4g)
-			__set_bit(index, tbl->it_map);
+			free_iova(&tbl->iovad, index);
 	}
 
 	if (!welcomed) {
@@ -530,31 +435,15 @@ void iommu_free_table(struct device_node *dn)
 {
 	struct pci_dn *pdn = dn->data;
 	struct iommu_table *tbl = pdn->iommu_table;
-	unsigned long bitmap_sz, i;
-	unsigned int order;
 
-	if (!tbl || !tbl->it_map) {
+	if (!tbl) {
 		printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
 				dn->full_name);
 		return;
 	}
 
 	/* verify that table contains no entries */
-	/* it_size is in entries, and we're examining 64 at a time */
-	for (i = 0; i < (tbl->it_size/64); i++) {
-		if (tbl->it_map[i] != 0) {
-			printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
-				__FUNCTION__, dn->full_name);
-			break;
-		}
-	}
-
-	/* calculate bitmap size in bytes */
-	bitmap_sz = (tbl->it_size + 7) / 8;
-
-	/* free bitmap */
-	order = get_order(bitmap_sz);
-	free_pages((unsigned long) tbl->it_map, order);
+	put_iova_domain(&tbl->iovad);
 
 	/* free table */
 	kfree(tbl);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 99684ea..22483e5 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
 config PPC64
 	bool "64-bit kernel"
 	default n
+	select IOVA
 	help
 	  This option selects whether a 32-bit or a 64-bit kernel
 	  will be built.
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index faabc3f..799e8e5 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -471,10 +471,9 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
 	 * This code also assumes that we have a window that starts at 0,
 	 * which is the case on all spider based blades.
 	 */
-	__set_bit(0, window->table.it_map);
+	reserve_iova(&window->table.iovad, 0, 0);
 	tce_build_cell(&window->table, window->table.it_offset, 1,
 		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE);
-	window->table.it_hint = window->table.it_blocksize;
 
 	return window;
 }
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index e0e24b0..6c58ef5 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -37,6 +37,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
 #include <linux/suspend.h>
+#include <linux/iova.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/iommu.h>
@@ -287,7 +288,8 @@ static void iommu_table_dart_setup(void)
 	/* Reserve the last page of the DART to avoid possible prefetch
 	 * past the DART mapped area
 	 */
-	set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
+	reserve_iova(&iommu_table_dart.iovad, iommu_table_dart.it_size - 1,
+		     iommu_table_dart.it_size - 1);
 }
 
 static void pci_dma_dev_setup_dart(struct pci_dev *dev)
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 4a82fdc..f497e7e 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -27,6 +27,7 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
+#include <linux/iova.h>
 #include <asm/machdep.h>
 #include <asm/types.h>
 
@@ -61,11 +62,8 @@ struct iommu_table {
 	unsigned long  it_index;     /* which iommu table this is */
 	unsigned long  it_type;      /* type: PCI or Virtual Bus */
 	unsigned long  it_blocksize; /* Entries in each block (cacheline) */
-	unsigned long  it_hint;      /* Hint for next alloc */
-	unsigned long  it_largehint; /* Hint for large allocs */
-	unsigned long  it_halfpoint; /* Breaking point for small/large allocs */
 	spinlock_t     it_lock;      /* Protects it_map */
-	unsigned long *it_map;       /* A simple allocation bitmap for now */
+	struct iova_domain iovad;
 };
 
 struct scatterlist;
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH -mm 0/3] convert IOMMUs to use iova
  2007-11-02 17:05 [PATCH -mm 0/3] convert IOMMUs to use iova FUJITA Tomonori
                   ` (2 preceding siblings ...)
  2007-11-02 17:05 ` [PATCH 3/3] POWERPC: convert the IOMMU to use iova FUJITA Tomonori
@ 2007-11-02 17:12 ` Muli Ben-Yehuda
  2007-11-02 18:11   ` FUJITA Tomonori
  2007-11-07 13:33   ` FUJITA Tomonori
  3 siblings, 2 replies; 7+ messages in thread
From: Muli Ben-Yehuda @ 2007-11-02 17:12 UTC (permalink / raw)
  To: FUJITA Tomonori
  Cc: linux-kernel, linux-scsi, James.Bottomley, jens.axboe, jeff,
	anil.s.keshavamurthy, paulus, anton, olof, tony.luck, davem, kyle,
	fujita.tomonori

On Sat, Nov 03, 2007 at 02:05:39AM +0900, FUJITA Tomonori wrote:

> This patchset convert the PPC64 IOMMU to use the iova code for free
> area management.
> 
> The IOMMUs ignores low level drivers' restrictions, the maximum
> segment size and segment boundary.
> 
> I fixed the former:
> 
> http://thread.gmane.org/gmane.linux.scsi/35602
> 
> The latter makes the free area management complicated. I'd like to
> convert IOMMUs to use the iova code (that intel-iommu introduced)
> for free area management and enable iova to handle segment boundary
> restrictions, rather than fixing all the IOMMUs' free area
> management,

In general it sounds like a great idea, but have you looked at what
impact this has on the performance of the IO path?

Cheers,
Muli

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH -mm 0/3] convert IOMMUs to use iova
  2007-11-02 17:12 ` [PATCH -mm 0/3] convert IOMMUs " Muli Ben-Yehuda
@ 2007-11-02 18:11   ` FUJITA Tomonori
  2007-11-07 13:33   ` FUJITA Tomonori
  1 sibling, 0 replies; 7+ messages in thread
From: FUJITA Tomonori @ 2007-11-02 18:11 UTC (permalink / raw)
  To: muli
  Cc: tomof, linux-kernel, linux-scsi, James.Bottomley, jens.axboe,
	jeff, anil.s.keshavamurthy, paulus, anton, olof, tony.luck, davem,
	kyle, fujita.tomonori

On Fri, 2 Nov 2007 19:12:27 +0200
Muli Ben-Yehuda <muli@il.ibm.com> wrote:

> On Sat, Nov 03, 2007 at 02:05:39AM +0900, FUJITA Tomonori wrote:
> 
> > This patchset convert the PPC64 IOMMU to use the iova code for free
> > area management.
> > 
> > The IOMMUs ignores low level drivers' restrictions, the maximum
> > segment size and segment boundary.
> > 
> > I fixed the former:
> > 
> > http://thread.gmane.org/gmane.linux.scsi/35602
> > 
> > The latter makes the free area management complicated. I'd like to
> > convert IOMMUs to use the iova code (that intel-iommu introduced)
> > for free area management and enable iova to handle segment boundary
> > restrictions, rather than fixing all the IOMMUs' free area
> > management,
> 
> In general it sounds like a great idea, but have you looked at what
> impact this has on the performance of the IO path?

Not yet. And I have access to only an entry-class pSeries server so I
would appreciate it if POWERPC people try the patchset with high-end
servers.

The patchset doesn't use the tricks that the current POWERPC IOMMU use
to improve performance (Anton and Olof told me about them, Thanks!)
. So I might need to modify iova for them.

I think that it's difficult to use some tricks used in the IOMMUs with
the iova even if we modify the iova. If the iova degrades performance,
I think that it would be better to have new IOMMU library code to use
simply a bit map that most of the IOMMUs use.

Another possible factor to degrade performance is that we can't
allocate areas for map_sg at one stroke like some of the IOMMUs do.
We need to allocate an area per sg segment to handle segment boundary.
If it's unacceptable, I guess that there is only one way, LLDs
allocate many sg entries and then the IOMMU splits the sg entryes.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH -mm 0/3] convert IOMMUs to use iova
  2007-11-02 17:12 ` [PATCH -mm 0/3] convert IOMMUs " Muli Ben-Yehuda
  2007-11-02 18:11   ` FUJITA Tomonori
@ 2007-11-07 13:33   ` FUJITA Tomonori
  1 sibling, 0 replies; 7+ messages in thread
From: FUJITA Tomonori @ 2007-11-07 13:33 UTC (permalink / raw)
  To: muli
  Cc: tomof, linux-kernel, linux-scsi, James.Bottomley, jens.axboe,
	jeff, anil.s.keshavamurthy, paulus, anton, olof, tony.luck, davem,
	kyle, fujita.tomonori

On Fri, 2 Nov 2007 19:12:27 +0200
Muli Ben-Yehuda <muli@il.ibm.com> wrote:

> On Sat, Nov 03, 2007 at 02:05:39AM +0900, FUJITA Tomonori wrote:
> 
> > This patchset convert the PPC64 IOMMU to use the iova code for free
> > area management.
> > 
> > The IOMMUs ignores low level drivers' restrictions, the maximum
> > segment size and segment boundary.
> > 
> > I fixed the former:
> > 
> > http://thread.gmane.org/gmane.linux.scsi/35602
> > 
> > The latter makes the free area management complicated. I'd like to
> > convert IOMMUs to use the iova code (that intel-iommu introduced)
> > for free area management and enable iova to handle segment boundary
> > restrictions, rather than fixing all the IOMMUs' free area
> > management,
> 
> In general it sounds like a great idea, but have you looked at what
> impact this has on the performance of the IO path?

I converted swiotlb to use iova and compared it with the original
algorithm (better than the simple bit map one that most of the IOMMUs
use, I think).

I use 'swiotlb=force' boot option and run netperf with "-m 128 -D"
(lead to tons of dma_map_single).

The original produced 281.8 Mb/s and the iova produced 77.2 Mb/s.

Seems that it would be better to generalization the swiotlb algorithm
(at least for small I/O area)? Or my patch might have bugs.

Here's the patch to convert swiotlb to use iova against my iova
patchset:

http://marc.info/?l=linux-kernel&m=119402340801254&w=2

diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64
index c10d3f0..8735822 100644
--- a/arch/x86/Kconfig.x86_64
+++ b/arch/x86/Kconfig.x86_64
@@ -524,6 +524,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
 	bool
+	select IOVA
 	help
 	  Support for software bounce buffers used on x86-64 systems
 	  which don't have a hardware IOMMU (e.g. the current generation
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index aa805b1..8507402 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -325,6 +325,9 @@ static int __init pci_iommu_init(void)
 	gart_iommu_init();
 #endif
 
+#ifdef CONFIG_SWIOTLB
+	swiotlb_alloc();
+#endif
 	no_iommu_init();
 	return 0;
 }
diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index f9c5895..f00d20c 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -40,6 +40,7 @@ extern void swiotlb_free_coherent (struct device *hwdev, size_t size,
 				   void *vaddr, dma_addr_t dma_handle);
 extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
 extern void swiotlb_init(void);
+extern void swiotlb_alloc(void);
 
 extern int swiotlb_force;
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1a8050a..54ecb87 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -24,6 +24,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
+#include <linux/iova.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -103,10 +104,7 @@ static unsigned int io_tlb_index;
  */
 static unsigned char **io_tlb_orig_addr;
 
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
+static struct iova_domain swiotlb_iovad;
 
 static int __init
 setup_io_tlb_npages(char *str)
@@ -272,6 +270,19 @@ cleanup1:
 	return -ENOMEM;
 }
 
+static struct kmem_cache *iova_cachep;
+
+void __init
+swiotlb_alloc(void)
+{
+	if (!swiotlb)
+		return;
+
+	iova_cachep = KMEM_CACHE(iova, 0);
+	init_iova_domain(&swiotlb_iovad, DMA_32BIT_MASK >> IO_TLB_SHIFT,
+			 iova_cachep);
+}
+
 static int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr)
 {
@@ -288,70 +299,20 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr)
 static void *
 map_single(struct device *hwdev, char *buffer, size_t size, int dir)
 {
-	unsigned long flags;
 	char *dma_addr;
-	unsigned int nslots, stride, index, wrap;
+	unsigned int nslots, index;
 	int i;
+	struct iova *iova;
 
-	/*
-	 * For mappings greater than a page, we limit the stride (and
-	 * hence alignment) to a page size.
-	 */
 	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	if (size > PAGE_SIZE)
-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-	else
-		stride = 1;
-
 	BUG_ON(!nslots);
 
-	/*
-	 * Find suitable number of IO TLB entries size that will fit this
-	 * request and allocate a buffer from that IO TLB pool.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		wrap = index = ALIGN(io_tlb_index, stride);
-
-		if (index >= io_tlb_nslabs)
-			wrap = index = 0;
-
-		do {
-			/*
-			 * If we find a slot that indicates we have 'nslots'
-			 * number of contiguous buffers, we allocate the
-			 * buffers from that slot and mark the entries as '0'
-			 * indicating unavailable.
-			 */
-			if (io_tlb_list[index] >= nslots) {
-				int count = 0;
-
-				for (i = index; i < (int) (index + nslots); i++)
-					io_tlb_list[i] = 0;
-				for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-					io_tlb_list[i] = ++count;
-				dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-				/*
-				 * Update the indices to avoid searching in
-				 * the next round.
-				 */
-				io_tlb_index = ((index + nslots) < io_tlb_nslabs
-						? (index + nslots) : 0);
-
-				goto found;
-			}
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
-		} while (index != wrap);
-
-		spin_unlock_irqrestore(&io_tlb_lock, flags);
+	iova = alloc_iova(&swiotlb_iovad, nslots, io_tlb_nslabs - 1, 1);
+	if (!iova)
 		return NULL;
-	}
-  found:
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
 
+	index = iova->pfn_lo;
+	dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
 	/*
 	 * Save away the mapping from the original address to the DMA address.
 	 * This is needed when we sync the memory.  Then we sync the buffer if
@@ -371,8 +332,6 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
 static void
 unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 {
-	unsigned long flags;
-	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	char *buffer = io_tlb_orig_addr[index];
 
@@ -386,30 +345,7 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 		 */
 		memcpy(buffer, dma_addr, size);
 
-	/*
-	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contigous entries available.
-	 * While returning the entries to the free list, we merge the entries
-	 * with slots below and above the pool being returned.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
-		/*
-		 * Step 1: return the slots to the free list, merging the
-		 * slots with superceeding slots
-		 */
-		for (i = index + nslots - 1; i >= index; i--)
-			io_tlb_list[i] = ++count;
-		/*
-		 * Step 2: merge the returned slots with the preceding slots,
-		 * if available (non zero)
-		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
-	}
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	free_iova(&swiotlb_iovad, index);
 }
 
 static void
-- 
1.5.3.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-11-07 13:39 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-02 17:05 [PATCH -mm 0/3] convert IOMMUs to use iova FUJITA Tomonori
2007-11-02 17:05 ` [PATCH 1/3] move iova from drivers/pci/ to lib/ FUJITA Tomonori
2007-11-02 17:05 ` [PATCH 2/3] move iova cache code to iova.c FUJITA Tomonori
2007-11-02 17:05 ` [PATCH 3/3] POWERPC: convert the IOMMU to use iova FUJITA Tomonori
2007-11-02 17:12 ` [PATCH -mm 0/3] convert IOMMUs " Muli Ben-Yehuda
2007-11-02 18:11   ` FUJITA Tomonori
2007-11-07 13:33   ` FUJITA Tomonori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).