public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC] Generic ioremap_page_range
       [not found] ` <1152892123.24925.67.camel@localhost.localdomain>
@ 2006-08-04  7:47   ` Haavard Skinnemoen
  2006-08-04 17:18     ` Dave Hansen
  2006-08-04 23:40     ` Paul Mackerras
  0 siblings, 2 replies; 7+ messages in thread
From: Haavard Skinnemoen @ 2006-08-04  7:47 UTC (permalink / raw)
  To: Dave Hansen; +Cc: linux-arch

On Fri, 14 Jul 2006 08:48:43 -0700
Dave Hansen <haveblue@us.ibm.com> wrote:

> It would also be nice to see a _couple_ of patches that perhaps
> abstract a thing or two into generic code.  I know that new
> architectures usually begin with a 'cp -r', but it would be nice to
> see a wee bit of code refactoring as a small price of admission.
> Some of the ioremap and other pagetable code looked pretty generic to
> me.

Ok, here's a first try.

This patch moves the i386 implementation of ioremap_page_range() into
lib/ioremap.c for use by other architectures.

There's one difference between the generic ioremap_page_range and the
i386 version: it takes a pgprot_t argument instead of unsigned long
flags, meaning that the arch-specific ioremap() implementation must
set all pte flags before calling ioremap_page_range() instead of
in the lowest-level page remapping function.

If you think this looks like a good idea, I'll split out the i386
modifications in a separate patch and submit patches for several other
architectures as well.

To get the review started, here are a couple of questions:
  * Wouldn't it make more sense to call flush_cache_vmap() instead of
    flush_cache_all()?
  * Why do we need to call flush_tlb_all()? I thought you only needed
    to do that when changing/removing existing mappings...

Haavard

---
 arch/i386/mm/ioremap.c |   88 ++++--------------------------------------------
 include/linux/io.h     |    3 ++
 lib/Makefile           |    2 +
 lib/ioremap.c          |   87 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 81 deletions(-)

diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index 247fde7..df8427d 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -12,7 +12,7 @@ #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/fixmap.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
@@ -21,82 +21,6 @@ #include <asm/pgtable.h>
 #define ISA_START_ADDRESS	0xa0000
 #define ISA_END_ADDRESS		0x100000
 
-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, unsigned long flags)
-{
-	pte_t *pte;
-	unsigned long pfn;
-
-	pfn = phys_addr >> PAGE_SHIFT;
-	pte = pte_alloc_kernel(pmd, addr);
-	if (!pte)
-		return -ENOMEM;
-	do {
-		BUG_ON(!pte_none(*pte));
-		set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | 
-					_PAGE_DIRTY | _PAGE_ACCESSED | flags)));
-		pfn++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	return 0;
-}
-
-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, unsigned long flags)
-{
-	pmd_t *pmd;
-	unsigned long next;
-
-	phys_addr -= addr;
-	pmd = pmd_alloc(&init_mm, pud, addr);
-	if (!pmd)
-		return -ENOMEM;
-	do {
-		next = pmd_addr_end(addr, end);
-		if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags))
-			return -ENOMEM;
-	} while (pmd++, addr = next, addr != end);
-	return 0;
-}
-
-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, unsigned long flags)
-{
-	pud_t *pud;
-	unsigned long next;
-
-	phys_addr -= addr;
-	pud = pud_alloc(&init_mm, pgd, addr);
-	if (!pud)
-		return -ENOMEM;
-	do {
-		next = pud_addr_end(addr, end);
-		if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags))
-			return -ENOMEM;
-	} while (pud++, addr = next, addr != end);
-	return 0;
-}
-
-static int ioremap_page_range(unsigned long addr,
-		unsigned long end, unsigned long phys_addr, unsigned long flags)
-{
-	pgd_t *pgd;
-	unsigned long next;
-	int err;
-
-	BUG_ON(addr >= end);
-	flush_cache_all();
-	phys_addr -= addr;
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
-		if (err)
-			break;
-	} while (pgd++, addr = next, addr != end);
-	flush_tlb_all();
-	return err;
-}
-
 /*
  * Generic mapping function (not visible outside):
  */
@@ -112,9 +36,10 @@ static int ioremap_page_range(unsigned l
  */
 void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
 {
-	void __iomem * addr;
-	struct vm_struct * area;
+	void __iomem *addr;
+	struct vm_struct *area;
 	unsigned long offset, last_addr;
+	pgprot_t prot;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
@@ -142,6 +67,9 @@ void __iomem * __ioremap(unsigned long p
 				return NULL;
 	}
 
+	prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY
+			| _PAGE_ACCESSED | flags);
+
 	/*
 	 * Mappings have to be page-aligned
 	 */
@@ -158,7 +86,7 @@ void __iomem * __ioremap(unsigned long p
 	area->phys_addr = phys_addr;
 	addr = (void __iomem *) area->addr;
 	if (ioremap_page_range((unsigned long) addr,
-			(unsigned long) addr + size, phys_addr, flags)) {
+			(unsigned long) addr + size, phys_addr, prot)) {
 		vunmap((void __force *) addr);
 		return NULL;
 	}
diff --git a/include/linux/io.h b/include/linux/io.h
index 420e2fd..d9d45be 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -23,4 +23,7 @@ #include <asm/io.h>
 void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
 void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
 
+int ioremap_page_range(unsigned long addr, unsigned long end,
+		       unsigned long phys_addr, pgprot_t prot);
+
 #endif /* _LINUX_IO_H */
diff --git a/lib/Makefile b/lib/Makefile
index be9719a..a4dcb07 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -5,7 +5,7 @@ #
 lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
 	 bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
 	 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
-	 sha1.o
+	 sha1.o ioremap.o
 
 lib-$(CONFIG_SMP) += cpumask.o
 
diff --git a/lib/ioremap.c b/lib/ioremap.c
new file mode 100644
index 0000000..d3e1bfd
--- /dev/null
+++ b/lib/ioremap.c
@@ -0,0 +1,87 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
+		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+{
+	pte_t *pte;
+	unsigned long pfn;
+
+	pfn = phys_addr >> PAGE_SHIFT;
+	pte = pte_alloc_kernel(pmd, addr);
+	if (!pte)
+		return -ENOMEM;
+	do {
+		BUG_ON(!pte_none(*pte));
+		set_pte(pte, pfn_pte(pfn, prot));
+		pfn++;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	return 0;
+}
+
+static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
+		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	phys_addr -= addr;
+	pmd = pmd_alloc(&init_mm, pud, addr);
+	if (!pmd)
+		return -ENOMEM;
+	do {
+		next = pmd_addr_end(addr, end);
+		if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	phys_addr -= addr;
+	pud = pud_alloc(&init_mm, pgd, addr);
+	if (!pud)
+		return -ENOMEM;
+	do {
+		next = pud_addr_end(addr, end);
+		if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+int ioremap_page_range(unsigned long addr,
+		       unsigned long end, unsigned long phys_addr, pgprot_t prot)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	int err;
+
+	BUG_ON(addr >= end);
+	flush_cache_all();
+	phys_addr -= addr;
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
+		if (err)
+			break;
+	} while (pgd++, addr = next, addr != end);
+	flush_tlb_all();
+	return err;
+}
-- 
1.4.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC] Generic ioremap_page_range
  2006-08-04  7:47   ` [RFC] Generic ioremap_page_range Haavard Skinnemoen
@ 2006-08-04 17:18     ` Dave Hansen
  2006-08-04 18:44       ` Håvard Skinnemoen
  2006-08-04 23:40     ` Paul Mackerras
  1 sibling, 1 reply; 7+ messages in thread
From: Dave Hansen @ 2006-08-04 17:18 UTC (permalink / raw)
  To: Haavard Skinnemoen; +Cc: linux-arch

On Fri, 2006-08-04 at 09:47 +0200, Haavard Skinnemoen wrote:
> On Fri, 14 Jul 2006 08:48:43 -0700
> Dave Hansen <haveblue@us.ibm.com> wrote:
> > It would also be nice to see a _couple_ of patches that perhaps
> > abstract a thing or two into generic code.  I know that new
> > architectures usually begin with a 'cp -r', but it would be nice to
> > see a wee bit of code refactoring as a small price of admission.
> > Some of the ioremap and other pagetable code looked pretty generic to
> > me.
> 
> Ok, here's a first try.
> 
> This patch moves the i386 implementation of ioremap_page_range() into
> lib/ioremap.c for use by other architectures.

Wow.  Very nice.

> There's one difference between the generic ioremap_page_range and the
> i386 version: it takes a pgprot_t argument instead of unsigned long
> flags, meaning that the arch-specific ioremap() implementation must
> set all pte flags before calling ioremap_page_range() instead of
> in the lowest-level page remapping function.

It looks like they were pretty static before, anyway.  I guess, in the
worst case, you could make a weak symbol in lib/ioremap.c that does
arch_ioremap_pgprot().  If an architecture needs to do something
special, they could override it.

But, unless this is causing real problems, I don't see any serious
reason to do it.  It can wail until we actually run into a user that
needs it.

> If you think this looks like a good idea, I'll split out the i386
> modifications in a separate patch and submit patches for several other
> architectures as well.
> 
> To get the review started, here are a couple of questions:
>   * Wouldn't it make more sense to call flush_cache_vmap() instead of
>     flush_cache_all()?

Yup, probably.  The ioremap code probably predates the existence of 
flush_cache_vmap().

>   * Why do we need to call flush_tlb_all()? I thought you only needed
>     to do that when changing/removing existing mappings...

I must not be understanding the flush_cache*() functions correctly
because the vmalloc() code does its flush_cache_vmap() _after_ the
vmalloc area is set up.

In any case, vmalloc() apparently does something very close to what you
need, and it does what you suggest: use flush_cache_vmap(), intends to
only work on pte_none() ptes, and doesn't call a tlb flush function
afterwords.  Unless there is something to differentiate ioremap's
functionality (say, the random pte flags you can set with ioremap) I
can't think of why ioremap is different.

BTW, does this new generic ioremap code work on _your_ architecture? ;)
Have you done a quick survey to see how many other architectures can use
it?

-- Dave


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] Generic ioremap_page_range
  2006-08-04 17:18     ` Dave Hansen
@ 2006-08-04 18:44       ` Håvard Skinnemoen
  2006-08-07  7:06         ` Paul Mackerras
  0 siblings, 1 reply; 7+ messages in thread
From: Håvard Skinnemoen @ 2006-08-04 18:44 UTC (permalink / raw)
  To: Dave Hansen; +Cc: Haavard Skinnemoen, linux-arch

On 8/4/06, Dave Hansen <haveblue@us.ibm.com> wrote:
> On Fri, 2006-08-04 at 09:47 +0200, Haavard Skinnemoen wrote:
> > There's one difference between the generic ioremap_page_range and the
> > i386 version: it takes a pgprot_t argument instead of unsigned long
> > flags, meaning that the arch-specific ioremap() implementation must
> > set all pte flags before calling ioremap_page_range() instead of
> > in the lowest-level page remapping function.
>
> It looks like they were pretty static before, anyway.  I guess, in the
> worst case, you could make a weak symbol in lib/ioremap.c that does
> arch_ioremap_pgprot().  If an architecture needs to do something
> special, they could override it.

So far, I've only seen static flags. And I really can't think of a
reason to set the flags dynamically...

> But, unless this is causing real problems, I don't see any serious
> reason to do it.  It can wail until we actually run into a user that
> needs it.

If anyone does something clever with the pgprot flags, I'm pretty sure
I'll notice it when converting the architecture. Or if I don't, the
arch maintainer surely will...?

> In any case, vmalloc() apparently does something very close to what you
> need, and it does what you suggest: use flush_cache_vmap(), intends to
> only work on pte_none() ptes, and doesn't call a tlb flush function
> afterwords.  Unless there is something to differentiate ioremap's
> functionality (say, the random pte flags you can set with ioremap) I
> can't think of why ioremap is different.

In fact, ARM does it exactly this way. I think I'll try changing it
and see if any arch maintainers complain.

> BTW, does this new generic ioremap code work on _your_ architecture? ;)
> Have you done a quick survey to see how many other architectures can use
> it?

Yes, it works on AVR32. In fact, the old code was broken, and simply
copying the i386 code fixed it. ioremap() on AVR32 is usually a no-op,
so I had to short-circuit some optimizations in order to test it...

From a quick glance, it looks like the generic version should work on
alpha, arm, avr32, cris, i386, m32r, mips, parisc, s390, sh, sh64 and
x86_64. The rest are either no-ops or something weird (powerpc, m68k,
sparc).

Haavard

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] Generic ioremap_page_range
  2006-08-04  7:47   ` [RFC] Generic ioremap_page_range Haavard Skinnemoen
  2006-08-04 17:18     ` Dave Hansen
@ 2006-08-04 23:40     ` Paul Mackerras
  2006-08-05  0:13       ` David Miller
  1 sibling, 1 reply; 7+ messages in thread
From: Paul Mackerras @ 2006-08-04 23:40 UTC (permalink / raw)
  To: Haavard Skinnemoen; +Cc: Dave Hansen, linux-arch

Haavard Skinnemoen writes:

> This patch moves the i386 implementation of ioremap_page_range() into
> lib/ioremap.c for use by other architectures.

[snip]

> +static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
> +		unsigned long end, unsigned long phys_addr, pgprot_t prot)
> +{
> +	pte_t *pte;
> +	unsigned long pfn;
> +
> +	pfn = phys_addr >> PAGE_SHIFT;
> +	pte = pte_alloc_kernel(pmd, addr);
> +	if (!pte)
> +		return -ENOMEM;
> +	do {
> +		BUG_ON(!pte_none(*pte));
> +		set_pte(pte, pfn_pte(pfn, prot));

This would need to be set_pte_at(...) for this to be useful on
PowerPC.

Paul.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] Generic ioremap_page_range
  2006-08-04 23:40     ` Paul Mackerras
@ 2006-08-05  0:13       ` David Miller
  0 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2006-08-05  0:13 UTC (permalink / raw)
  To: paulus; +Cc: hskinnemoen, haveblue, linux-arch

From: Paul Mackerras <paulus@samba.org>
Date: Sat, 5 Aug 2006 09:40:46 +1000

> This would need to be set_pte_at(...) for this to be useful on
> PowerPC.

I'm surprised set_pte() still exists, we should kill it off
so nobody can add new uses even accidently.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] Generic ioremap_page_range
  2006-08-04 18:44       ` Håvard Skinnemoen
@ 2006-08-07  7:06         ` Paul Mackerras
  2006-08-07  7:21           ` David Miller
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Mackerras @ 2006-08-07  7:06 UTC (permalink / raw)
  To: Håvard Skinnemoen; +Cc: Dave Hansen, Haavard Skinnemoen, linux-arch

Håvard Skinnemoen writes:

> So far, I've only seen static flags. And I really can't think of a
> reason to set the flags dynamically...

The reason would be to select what sort of caching is allowed for the
mapping, and whether write-combining is allowed, on architectures
(such as PowerPC) where those choices are made by bits in the PTEs.

Paul.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] Generic ioremap_page_range
  2006-08-07  7:06         ` Paul Mackerras
@ 2006-08-07  7:21           ` David Miller
  0 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2006-08-07  7:21 UTC (permalink / raw)
  To: paulus; +Cc: hskinnemoen, haveblue, hskinnemoen, linux-arch

From: Paul Mackerras <paulus@samba.org>
Date: Mon, 7 Aug 2006 17:06:03 +1000

> Håvard Skinnemoen writes:
> 
> > So far, I've only seen static flags. And I really can't think of a
> > reason to set the flags dynamically...
> 
> The reason would be to select what sort of caching is allowed for the
> mapping, and whether write-combining is allowed, on architectures
> (such as PowerPC) where those choices are made by bits in the PTEs.

Things work similarly on sparc64.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2006-08-07  7:21 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20060713224800.6cbdbf5d.akpm@osdl.org>
     [not found] ` <1152892123.24925.67.camel@localhost.localdomain>
2006-08-04  7:47   ` [RFC] Generic ioremap_page_range Haavard Skinnemoen
2006-08-04 17:18     ` Dave Hansen
2006-08-04 18:44       ` Håvard Skinnemoen
2006-08-07  7:06         ` Paul Mackerras
2006-08-07  7:21           ` David Miller
2006-08-04 23:40     ` Paul Mackerras
2006-08-05  0:13       ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox