* [RFC] Generic ioremap_page_range [not found] ` <1152892123.24925.67.camel@localhost.localdomain> @ 2006-08-04 7:47 ` Haavard Skinnemoen 2006-08-04 17:18 ` Dave Hansen 2006-08-04 23:40 ` Paul Mackerras 0 siblings, 2 replies; 7+ messages in thread From: Haavard Skinnemoen @ 2006-08-04 7:47 UTC (permalink / raw) To: Dave Hansen; +Cc: linux-arch On Fri, 14 Jul 2006 08:48:43 -0700 Dave Hansen <haveblue@us.ibm.com> wrote: > It would also be nice to see a _couple_ of patches that perhaps > abstract a thing or two into generic code. I know that new > architectures usually begin with a 'cp -r', but it would be nice to > see a wee bit of code refactoring as a small price of admission. > Some of the ioremap and other pagetable code looked pretty generic to > me. Ok, here's a first try. This patch moves the i386 implementation of ioremap_page_range() into lib/ioremap.c for use by other architectures. There's one difference between the generic ioremap_page_range and the i386 version: it takes a pgprot_t argument instead of unsigned long flags, meaning that the arch-specific ioremap() implementation must set all pte flags before calling ioremap_page_range() instead of in the lowest-level page remapping function. If you think this looks like a good idea, I'll split out the i386 modifications in a separate patch and submit patches for several other architectures as well. To get the review started, here are a couple of questions: * Wouldn't it make more sense to call flush_cache_vmap() instead of flush_cache_all()? * Why do we need to call flush_tlb_all()? I thought you only needed to do that when changing/removing existing mappings... Haavard --- arch/i386/mm/ioremap.c | 88 ++++-------------------------------------------- include/linux/io.h | 3 ++ lib/Makefile | 2 + lib/ioremap.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 81 deletions(-) diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index 247fde7..df8427d 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -12,7 +12,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/fixmap.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> @@ -21,82 +21,6 @@ #include <asm/pgtable.h> #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 -static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pte_t *pte; - unsigned long pfn; - - pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel(pmd, addr); - if (!pte) - return -ENOMEM; - do { - BUG_ON(!pte_none(*pte)); - set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_DIRTY | _PAGE_ACCESSED | flags))); - pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); - return 0; -} - -static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pmd_t *pmd; - unsigned long next; - - phys_addr -= addr; - pmd = pmd_alloc(&init_mm, pud, addr); - if (!pmd) - return -ENOMEM; - do { - next = pmd_addr_end(addr, end); - if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags)) - return -ENOMEM; - } while (pmd++, addr = next, addr != end); - return 0; -} - -static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pud_t *pud; - unsigned long next; - - phys_addr -= addr; - pud = pud_alloc(&init_mm, pgd, addr); - if (!pud) - return -ENOMEM; - do { - next = pud_addr_end(addr, end); - if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags)) - return -ENOMEM; - } while (pud++, addr = next, addr != end); - return 0; -} - -static int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pgd_t *pgd; - unsigned long next; - int err; - - BUG_ON(addr >= end); - flush_cache_all(); - phys_addr -= addr; - pgd = pgd_offset_k(addr); - do { - next = pgd_addr_end(addr, end); - err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); - if (err) - break; - } while (pgd++, addr = next, addr != end); - flush_tlb_all(); - return err; -} - /* * Generic mapping function (not visible outside): */ @@ -112,9 +36,10 @@ static int ioremap_page_range(unsigned l */ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) { - void __iomem * addr; - struct vm_struct * area; + void __iomem *addr; + struct vm_struct *area; unsigned long offset, last_addr; + pgprot_t prot; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -142,6 +67,9 @@ void __iomem * __ioremap(unsigned long p return NULL; } + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY + | _PAGE_ACCESSED | flags); + /* * Mappings have to be page-aligned */ @@ -158,7 +86,7 @@ void __iomem * __ioremap(unsigned long p area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; if (ioremap_page_range((unsigned long) addr, - (unsigned long) addr + size, phys_addr, flags)) { + (unsigned long) addr + size, phys_addr, prot)) { vunmap((void __force *) addr); return NULL; } diff --git a/include/linux/io.h b/include/linux/io.h index 420e2fd..d9d45be 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -23,4 +23,7 @@ #include <asm/io.h> void __iowrite32_copy(void __iomem *to, const void *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); +int ioremap_page_range(unsigned long addr, unsigned long end, + unsigned long phys_addr, pgprot_t prot); + #endif /* _LINUX_IO_H */ diff --git a/lib/Makefile b/lib/Makefile index be9719a..a4dcb07 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -5,7 +5,7 @@ # lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o + sha1.o ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/ioremap.c b/lib/ioremap.c new file mode 100644 index 0000000..d3e1bfd --- /dev/null +++ b/lib/ioremap.c @@ -0,0 +1,87 @@ +/* + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + * + * (C) Copyright 1995 1996 Linus Torvalds + */ +#include <linux/io.h> +#include <linux/vmalloc.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/pgtable.h> + +static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pte_t *pte; + unsigned long pfn; + + pfn = phys_addr >> PAGE_SHIFT; + pte = pte_alloc_kernel(pmd, addr); + if (!pte) + return -ENOMEM; + do { + BUG_ON(!pte_none(*pte)); + set_pte(pte, pfn_pte(pfn, prot)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); + return 0; +} + +static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pmd_t *pmd; + unsigned long next; + + phys_addr -= addr; + pmd = pmd_alloc(&init_mm, pud, addr); + if (!pmd) + return -ENOMEM; + do { + next = pmd_addr_end(addr, end); + if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (pmd++, addr = next, addr != end); + return 0; +} + +static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pud_t *pud; + unsigned long next; + + phys_addr -= addr; + pud = pud_alloc(&init_mm, pgd, addr); + if (!pud) + return -ENOMEM; + do { + next = pud_addr_end(addr, end); + if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (pud++, addr = next, addr != end); + return 0; +} + +int ioremap_page_range(unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pgd_t *pgd; + unsigned long next; + int err; + + BUG_ON(addr >= end); + flush_cache_all(); + phys_addr -= addr; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); + if (err) + break; + } while (pgd++, addr = next, addr != end); + flush_tlb_all(); + return err; +} -- 1.4.0 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC] Generic ioremap_page_range 2006-08-04 7:47 ` [RFC] Generic ioremap_page_range Haavard Skinnemoen @ 2006-08-04 17:18 ` Dave Hansen 2006-08-04 18:44 ` Håvard Skinnemoen 2006-08-04 23:40 ` Paul Mackerras 1 sibling, 1 reply; 7+ messages in thread From: Dave Hansen @ 2006-08-04 17:18 UTC (permalink / raw) To: Haavard Skinnemoen; +Cc: linux-arch On Fri, 2006-08-04 at 09:47 +0200, Haavard Skinnemoen wrote: > On Fri, 14 Jul 2006 08:48:43 -0700 > Dave Hansen <haveblue@us.ibm.com> wrote: > > It would also be nice to see a _couple_ of patches that perhaps > > abstract a thing or two into generic code. I know that new > > architectures usually begin with a 'cp -r', but it would be nice to > > see a wee bit of code refactoring as a small price of admission. > > Some of the ioremap and other pagetable code looked pretty generic to > > me. > > Ok, here's a first try. > > This patch moves the i386 implementation of ioremap_page_range() into > lib/ioremap.c for use by other architectures. Wow. Very nice. > There's one difference between the generic ioremap_page_range and the > i386 version: it takes a pgprot_t argument instead of unsigned long > flags, meaning that the arch-specific ioremap() implementation must > set all pte flags before calling ioremap_page_range() instead of > in the lowest-level page remapping function. It looks like they were pretty static before, anyway. I guess, in the worst case, you could make a weak symbol in lib/ioremap.c that does arch_ioremap_pgprot(). If an architecture needs to do something special, they could override it. But, unless this is causing real problems, I don't see any serious reason to do it. It can wail until we actually run into a user that needs it. > If you think this looks like a good idea, I'll split out the i386 > modifications in a separate patch and submit patches for several other > architectures as well. > > To get the review started, here are a couple of questions: > * Wouldn't it make more sense to call flush_cache_vmap() instead of > flush_cache_all()? Yup, probably. The ioremap code probably predates the existence of flush_cache_vmap(). > * Why do we need to call flush_tlb_all()? I thought you only needed > to do that when changing/removing existing mappings... I must not be understanding the flush_cache*() functions correctly because the vmalloc() code does its flush_cache_vmap() _after_ the vmalloc area is set up. In any case, vmalloc() apparently does something very close to what you need, and it does what you suggest: use flush_cache_vmap(), intends to only work on pte_none() ptes, and doesn't call a tlb flush function afterwords. Unless there is something to differentiate ioremap's functionality (say, the random pte flags you can set with ioremap) I can't think of why ioremap is different. BTW, does this new generic ioremap code work on _your_ architecture? ;) Have you done a quick survey to see how many other architectures can use it? -- Dave ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC] Generic ioremap_page_range 2006-08-04 17:18 ` Dave Hansen @ 2006-08-04 18:44 ` Håvard Skinnemoen 2006-08-07 7:06 ` Paul Mackerras 0 siblings, 1 reply; 7+ messages in thread From: Håvard Skinnemoen @ 2006-08-04 18:44 UTC (permalink / raw) To: Dave Hansen; +Cc: Haavard Skinnemoen, linux-arch On 8/4/06, Dave Hansen <haveblue@us.ibm.com> wrote: > On Fri, 2006-08-04 at 09:47 +0200, Haavard Skinnemoen wrote: > > There's one difference between the generic ioremap_page_range and the > > i386 version: it takes a pgprot_t argument instead of unsigned long > > flags, meaning that the arch-specific ioremap() implementation must > > set all pte flags before calling ioremap_page_range() instead of > > in the lowest-level page remapping function. > > It looks like they were pretty static before, anyway. I guess, in the > worst case, you could make a weak symbol in lib/ioremap.c that does > arch_ioremap_pgprot(). If an architecture needs to do something > special, they could override it. So far, I've only seen static flags. And I really can't think of a reason to set the flags dynamically... > But, unless this is causing real problems, I don't see any serious > reason to do it. It can wail until we actually run into a user that > needs it. If anyone does something clever with the pgprot flags, I'm pretty sure I'll notice it when converting the architecture. Or if I don't, the arch maintainer surely will...? > In any case, vmalloc() apparently does something very close to what you > need, and it does what you suggest: use flush_cache_vmap(), intends to > only work on pte_none() ptes, and doesn't call a tlb flush function > afterwords. Unless there is something to differentiate ioremap's > functionality (say, the random pte flags you can set with ioremap) I > can't think of why ioremap is different. In fact, ARM does it exactly this way. I think I'll try changing it and see if any arch maintainers complain. > BTW, does this new generic ioremap code work on _your_ architecture? ;) > Have you done a quick survey to see how many other architectures can use > it? Yes, it works on AVR32. In fact, the old code was broken, and simply copying the i386 code fixed it. ioremap() on AVR32 is usually a no-op, so I had to short-circuit some optimizations in order to test it... From a quick glance, it looks like the generic version should work on alpha, arm, avr32, cris, i386, m32r, mips, parisc, s390, sh, sh64 and x86_64. The rest are either no-ops or something weird (powerpc, m68k, sparc). Haavard ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC] Generic ioremap_page_range 2006-08-04 18:44 ` Håvard Skinnemoen @ 2006-08-07 7:06 ` Paul Mackerras 2006-08-07 7:21 ` David Miller 0 siblings, 1 reply; 7+ messages in thread From: Paul Mackerras @ 2006-08-07 7:06 UTC (permalink / raw) To: Håvard Skinnemoen; +Cc: Dave Hansen, Haavard Skinnemoen, linux-arch Håvard Skinnemoen writes: > So far, I've only seen static flags. And I really can't think of a > reason to set the flags dynamically... The reason would be to select what sort of caching is allowed for the mapping, and whether write-combining is allowed, on architectures (such as PowerPC) where those choices are made by bits in the PTEs. Paul. ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC] Generic ioremap_page_range 2006-08-07 7:06 ` Paul Mackerras @ 2006-08-07 7:21 ` David Miller 0 siblings, 0 replies; 7+ messages in thread From: David Miller @ 2006-08-07 7:21 UTC (permalink / raw) To: paulus; +Cc: hskinnemoen, haveblue, hskinnemoen, linux-arch From: Paul Mackerras <paulus@samba.org> Date: Mon, 7 Aug 2006 17:06:03 +1000 > Håvard Skinnemoen writes: > > > So far, I've only seen static flags. And I really can't think of a > > reason to set the flags dynamically... > > The reason would be to select what sort of caching is allowed for the > mapping, and whether write-combining is allowed, on architectures > (such as PowerPC) where those choices are made by bits in the PTEs. Things work similarly on sparc64. ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC] Generic ioremap_page_range 2006-08-04 7:47 ` [RFC] Generic ioremap_page_range Haavard Skinnemoen 2006-08-04 17:18 ` Dave Hansen @ 2006-08-04 23:40 ` Paul Mackerras 2006-08-05 0:13 ` David Miller 1 sibling, 1 reply; 7+ messages in thread From: Paul Mackerras @ 2006-08-04 23:40 UTC (permalink / raw) To: Haavard Skinnemoen; +Cc: Dave Hansen, linux-arch Haavard Skinnemoen writes: > This patch moves the i386 implementation of ioremap_page_range() into > lib/ioremap.c for use by other architectures. [snip] > +static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, > + unsigned long end, unsigned long phys_addr, pgprot_t prot) > +{ > + pte_t *pte; > + unsigned long pfn; > + > + pfn = phys_addr >> PAGE_SHIFT; > + pte = pte_alloc_kernel(pmd, addr); > + if (!pte) > + return -ENOMEM; > + do { > + BUG_ON(!pte_none(*pte)); > + set_pte(pte, pfn_pte(pfn, prot)); This would need to be set_pte_at(...) for this to be useful on PowerPC. Paul. ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC] Generic ioremap_page_range 2006-08-04 23:40 ` Paul Mackerras @ 2006-08-05 0:13 ` David Miller 0 siblings, 0 replies; 7+ messages in thread From: David Miller @ 2006-08-05 0:13 UTC (permalink / raw) To: paulus; +Cc: hskinnemoen, haveblue, linux-arch From: Paul Mackerras <paulus@samba.org> Date: Sat, 5 Aug 2006 09:40:46 +1000 > This would need to be set_pte_at(...) for this to be useful on > PowerPC. I'm surprised set_pte() still exists, we should kill it off so nobody can add new uses even accidently. ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-08-07 7:21 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20060713224800.6cbdbf5d.akpm@osdl.org>
[not found] ` <1152892123.24925.67.camel@localhost.localdomain>
2006-08-04 7:47 ` [RFC] Generic ioremap_page_range Haavard Skinnemoen
2006-08-04 17:18 ` Dave Hansen
2006-08-04 18:44 ` Håvard Skinnemoen
2006-08-07 7:06 ` Paul Mackerras
2006-08-07 7:21 ` David Miller
2006-08-04 23:40 ` Paul Mackerras
2006-08-05 0:13 ` David Miller
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox