* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables [not found] <CAGoO6fLT6B_vx+Wtm6bKHD6YkjQcM8f=deD7SDP3BR_uC+1c5w@mail.gmail.com> @ 2014-07-29 13:47 ` Jungseok Lee 2014-07-29 14:19 ` Joel Schopp 0 siblings, 1 reply; 10+ messages in thread From: Jungseok Lee @ 2014-07-29 13:47 UTC (permalink / raw) To: linux-arm-kernel On Jul 28 08:40:07 PDT 2014, Joel Schopp wrote: Hi Joel, > On 07/16/2014 02:09 PM, Catalin Marinas wrote: [ ... ] >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 4daf11f5b403..24cbe72c0da9 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -196,12 +196,18 @@ config ARM64_VA_BITS_42 >> bool "42-bit" >> depends on ARM64_64K_PAGES >> >> +config ARM64_VA_BITS_48 >> + bool "48-bit" >> + depends on !KVM >> + depends on ARM64_4K_PAGES >> + >> endchoice > Shouldn't we be able to support 48 bit VA with 3 level 64K pages? If so > why the dependency on ARM64_4K_PAGES? Have you reviewed [PATCH 13/11] thread? It supports 3 levels with 64KB page. In addition, ARM64_VA_BITS_48 depends on only BROKEN. > More generally it seems like a problem to tie the equate the VA_BITS the > page table could address with the VA_BITS the hardware could address. > Even with 4 level 4K page tables that can address 48 bits the hardware > may only support say 42 bit address space. I leave comments below. >> >> config ARM64_VA_BITS >> int >> default 39 if ARM64_VA_BITS_39 >> default 42 if ARM64_VA_BITS_42 >> + default 48 if ARM64_VA_BITS_48 >> >> config ARM64_2_LEVELS >> def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 >> @@ -209,6 +215,9 @@ config ARM64_2_LEVELS >> config ARM64_3_LEVELS >> def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 >> >> +config ARM64_4_LEVELS >> + def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_48 >> + > It seems like we should also do ARM64_4K_PAGES and ARM64_VA_BITS_42 as a > valid combination for ARM64_4_LEVELS. At least if we are assuming the > VA_BITS correspond to hardware. I don't understand why VA_BITS should correspond to hardware. For example, should ARM64_VA_BITS_36 be supported for 36-bit address spaces SoCs? >> config CPU_BIG_ENDIAN >> bool "Build big-endian kernel" >> help >> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h >> index 6bf139188792..cf9afa0366b6 100644 >> --- a/arch/arm64/include/asm/page.h >> +++ b/arch/arm64/include/asm/page.h >> @@ -33,19 +33,26 @@ >> >> /* >> * The idmap and swapper page tables need some space reserved in the kernel >> - * image. Both require a pgd and a next level table to (section) map the >> - * kernel. The the swapper also maaps the FDT (see __create_page_tables for >> + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) >> + * map the kernel. The swapper also maps the FDT (see __create_page_tables for >> * more information). >> */ >> +#ifdef CONFIG_ARM64_4_LEVELS >> +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) >> +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) >> +#else >> #define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) >> #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) >> +#endif >> >> #ifndef __ASSEMBLY__ >> >> #ifdef CONFIG_ARM64_2_LEVELS >> #include <asm/pgtable-2level-types.h> >> -#else >> +#elif defined(CONFIG_ARM64_3_LEVELS) >> #include <asm/pgtable-3level-types.h> >> +#else >> +#include <asm/pgtable-4level-types.h> >> #endif >> >> extern void __cpu_clear_user_page(void *p, unsigned long user); >> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h >> index 48298376e46a..8d745fae4c2d 100644 >> --- a/arch/arm64/include/asm/pgalloc.h >> +++ b/arch/arm64/include/asm/pgalloc.h >> @@ -26,6 +26,26 @@ >> >> #define check_pgt_cache() do { } while (0) >> >> +#ifdef CONFIG_ARM64_4_LEVELS >> + >> +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) >> +{ >> + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); >> +} >> + >> +static inline void pud_free(struct mm_struct *mm, pud_t *pud) >> +{ >> + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); >> + free_page((unsigned long)pud); >> +} >> + >> +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) >> +{ >> + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); >> +} >> + >> +#endif /* CONFIG_ARM64_4_LEVELS */ >> + >> #ifndef CONFIG_ARM64_2_LEVELS >> >> static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) >> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h >> index c7c603b489b8..fddcc3efa569 100644 >> --- a/arch/arm64/include/asm/pgtable-hwdef.h >> +++ b/arch/arm64/include/asm/pgtable-hwdef.h >> @@ -18,8 +18,10 @@ >> >> #ifdef CONFIG_ARM64_2_LEVELS >> #include <asm/pgtable-2level-hwdef.h> >> -#else >> +#elif defined(CONFIG_ARM64_3_LEVELS) >> #include <asm/pgtable-3level-hwdef.h> >> +#else >> +#include <asm/pgtable-4level-hwdef.h> >> #endif >> >> /* >> @@ -27,7 +29,7 @@ >> * >> * Level 1 descriptor (PUD). >> */ >> - >> +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) >> #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) >> #define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) >> #define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >> index 6d5854972a77..d9b23efdaded 100644 >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -35,7 +35,11 @@ >> * VMALLOC and SPARSEMEM_VMEMMAP ranges. >> */ >> #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) > Here's a good example of where we run into trouble equating page table > addressable bits with hardware addressable bits. If VA_BITS is 48 due > to 4K 4 level page tables but is running on a 42 bit system this will > end up being out of range. Is your concern that CPU issues 48-bit address to MMU on 42-bit hardware? Have you tested this patch series on your hardware? - Jungseok Lee ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-07-29 13:47 ` [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables Jungseok Lee @ 2014-07-29 14:19 ` Joel Schopp 2014-07-30 14:57 ` Jungseok Lee 0 siblings, 1 reply; 10+ messages in thread From: Joel Schopp @ 2014-07-29 14:19 UTC (permalink / raw) To: linux-arm-kernel >> Here's a good example of where we run into trouble equating page table >> addressable bits with hardware addressable bits. If VA_BITS is 48 due >> to 4K 4 level page tables but is running on a 42 bit system this will >> end up being out of range. > Is your concern that CPU issues 48-bit address to MMU on 42-bit hardware? > Have you tested this patch series on your hardware? > > - Jungseok Lee That is my concern. I did test the patch on my hardware with the following results: 64k pages, 2 levels 42 bit VA - worked (no regression) 64k pages, 3 levels 48 bit VA- didn't boot 4k pages, 4 levels 42 bit VA - didn't boot 4k pages, 4 levels 48 bit VA - didn't boot ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-07-29 14:19 ` Joel Schopp @ 2014-07-30 14:57 ` Jungseok Lee 2014-08-14 11:42 ` Ganapatrao Kulkarni 0 siblings, 1 reply; 10+ messages in thread From: Jungseok Lee @ 2014-07-30 14:57 UTC (permalink / raw) To: linux-arm-kernel On Jul 29, 2014, at 11:19 PM, Joel Schopp wrote: > >>> Here's a good example of where we run into trouble equating page table >>> addressable bits with hardware addressable bits. If VA_BITS is 48 due >>> to 4K 4 level page tables but is running on a 42 bit system this will >>> end up being out of range. >> Is your concern that CPU issues 48-bit address to MMU on 42-bit hardware? >> Have you tested this patch series on your hardware? >> >> - Jungseok Lee > > That is my concern. I did test the patch on my hardware with the > following results: > 64k pages, 2 levels 42 bit VA - worked (no regression) > 64k pages, 3 levels 48 bit VA- didn't boot > 4k pages, 4 levels 42 bit VA - didn't boot > 4k pages, 4 levels 48 bit VA - didn't boot Let me break the concern down into two small parts. The first one is a relation between VA and PA. Let me visualize the above description in the following way. I assume that Cortex-A57 is used and connected to bus with 42-bit address line. SoC Boundary |--------------------------------------------- | Cortex-A57 Boundary | | --------------- | | | CPU --> MMU | --> BUS --> Memory Controller --> RAM | ------48------- 42 42 | 42 |--------------------------------------------- In this configuration, there is no problem since 48-bit VA is handled in Cortex-A57 boundary. Cortex-A57 can support up to 48-bit VA and 44-bit PA. The second part is the test result. It's bad actually. So, I've done booting test on for-next/core branch of arm64 linux git, [1], quickly using Model. All combinations, 4KB + 3Level (39-bit VA), 4KB + 4Level (48-bit VA), 64KB + 2Level (42-bit VA) and 64KB + 3Level(48bit VA), boot up successfully. I hesitate to say anything since I don't have any real hardware. I think people who have real platform, such as Juno, can help to figure it out. [1]: git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git I add Will in Cc since [1] looks updated by Will now. - Jungseok Lee ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-07-30 14:57 ` Jungseok Lee @ 2014-08-14 11:42 ` Ganapatrao Kulkarni 2014-08-14 12:58 ` Catalin Marinas 0 siblings, 1 reply; 10+ messages in thread From: Ganapatrao Kulkarni @ 2014-08-14 11:42 UTC (permalink / raw) To: linux-arm-kernel Hi Catalin, IMHO, the Macro MAX_PHYSMEM_BITS needs to be set to 48 in file arch/arm64/include/asm/sparsemem.h with 40 bit set, for RAM address beyond 40 bit, seeeing below warning message. WARNING: CPU: 0 PID: 0 at mm/sparse.c:164 mminit_validate_memmodel_limits+0xf8/0x118() thanks Ganapat On Wed, Jul 30, 2014 at 8:27 PM, Jungseok Lee <jungseoklee85@gmail.com> wrote: > On Jul 29, 2014, at 11:19 PM, Joel Schopp wrote: >> >>>> Here's a good example of where we run into trouble equating page table >>>> addressable bits with hardware addressable bits. If VA_BITS is 48 due >>>> to 4K 4 level page tables but is running on a 42 bit system this will >>>> end up being out of range. >>> Is your concern that CPU issues 48-bit address to MMU on 42-bit hardware? >>> Have you tested this patch series on your hardware? >>> >>> - Jungseok Lee >> >> That is my concern. I did test the patch on my hardware with the >> following results: >> 64k pages, 2 levels 42 bit VA - worked (no regression) >> 64k pages, 3 levels 48 bit VA- didn't boot >> 4k pages, 4 levels 42 bit VA - didn't boot >> 4k pages, 4 levels 48 bit VA - didn't boot > > > Let me break the concern down into two small parts. > > The first one is a relation between VA and PA. Let me visualize the above > description in the following way. I assume that Cortex-A57 is used and > connected to bus with 42-bit address line. > > SoC Boundary > |--------------------------------------------- > | Cortex-A57 Boundary | > | --------------- | > | | CPU --> MMU | --> BUS --> Memory Controller --> RAM > | ------48------- 42 42 | 42 > |--------------------------------------------- > > In this configuration, there is no problem since 48-bit VA is handled in > Cortex-A57 boundary. Cortex-A57 can support up to 48-bit VA and 44-bit PA. > > The second part is the test result. It's bad actually. So, I've done booting > test on for-next/core branch of arm64 linux git, [1], quickly using Model. All > combinations, 4KB + 3Level (39-bit VA), 4KB + 4Level (48-bit VA), 64KB + 2Level > (42-bit VA) and 64KB + 3Level(48bit VA), boot up successfully. > > I hesitate to say anything since I don't have any real hardware. I think people > who have real platform, such as Juno, can help to figure it out. > > [1]: git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git > > I add Will in Cc since [1] looks updated by Will now. > > - Jungseok Lee > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel at lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-08-14 11:42 ` Ganapatrao Kulkarni @ 2014-08-14 12:58 ` Catalin Marinas 2014-08-14 13:47 ` Ganapatrao Kulkarni 0 siblings, 1 reply; 10+ messages in thread From: Catalin Marinas @ 2014-08-14 12:58 UTC (permalink / raw) To: linux-arm-kernel On Thu, Aug 14, 2014 at 12:42:12PM +0100, Ganapatrao Kulkarni wrote: > IMHO, the Macro MAX_PHYSMEM_BITS needs to be set to 48 in file > arch/arm64/include/asm/sparsemem.h > > with 40 bit set, for RAM address beyond 40 bit, seeeing below warning message. > WARNING: CPU: 0 PID: 0 at mm/sparse.c:164 > mminit_validate_memmodel_limits+0xf8/0x118() I agree. Would you mind sending a patch? Thanks. -- Catalin ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-08-14 12:58 ` Catalin Marinas @ 2014-08-14 13:47 ` Ganapatrao Kulkarni 0 siblings, 0 replies; 10+ messages in thread From: Ganapatrao Kulkarni @ 2014-08-14 13:47 UTC (permalink / raw) To: linux-arm-kernel On Thu, Aug 14, 2014 at 6:28 PM, Catalin Marinas <catalin.marinas@arm.com> wrote: > On Thu, Aug 14, 2014 at 12:42:12PM +0100, Ganapatrao Kulkarni wrote: >> IMHO, the Macro MAX_PHYSMEM_BITS needs to be set to 48 in file >> arch/arm64/include/asm/sparsemem.h >> >> with 40 bit set, for RAM address beyond 40 bit, seeeing below warning message. >> WARNING: CPU: 0 PID: 0 at mm/sparse.c:164 >> mminit_validate_memmodel_limits+0xf8/0x118() > > I agree. Would you mind sending a patch? Sure I will send you the patch. > > Thanks. > > -- > Catalin thanks Ganapat ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <2211E520-32A4-435C-9F63-A7EFEE8B677A@gmail.com>]
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables [not found] <2211E520-32A4-435C-9F63-A7EFEE8B677A@gmail.com> @ 2014-07-17 15:04 ` Jungseok Lee 2014-07-17 16:47 ` Catalin Marinas 0 siblings, 1 reply; 10+ messages in thread From: Jungseok Lee @ 2014-07-17 15:04 UTC (permalink / raw) To: linux-arm-kernel On Wed, 16 Jul 2014 20:09:48 +0100, Catalin Marinas wrote: [ ... ] > diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c > index 69000efa015e..fa324bd5a5c4 100644 > --- a/arch/arm64/mm/ioremap.c > +++ b/arch/arm64/mm/ioremap.c > @@ -104,9 +104,12 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) > EXPORT_SYMBOL(ioremap_cache); > > static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; > -#ifndef CONFIG_ARM64_64K_PAGES > +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 In this patch frame, it causes a compile error since [08/11] patch introduces CONFIG_ARM64_PGTABLE_LEVELS. Please ignore my comment if it does not matter. > static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; > #endif > +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 Ditto - Jungseok Lee ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-07-17 15:04 ` Jungseok Lee @ 2014-07-17 16:47 ` Catalin Marinas 0 siblings, 0 replies; 10+ messages in thread From: Catalin Marinas @ 2014-07-17 16:47 UTC (permalink / raw) To: linux-arm-kernel On Thu, Jul 17, 2014 at 04:04:07PM +0100, Jungseok Lee wrote: > On Wed, 16 Jul 2014 20:09:48 +0100, Catalin Marinas wrote: > > [ ... ] > > > diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c > > index 69000efa015e..fa324bd5a5c4 100644 > > --- a/arch/arm64/mm/ioremap.c > > +++ b/arch/arm64/mm/ioremap.c > > @@ -104,9 +104,12 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) > > EXPORT_SYMBOL(ioremap_cache); > > > > static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; > > -#ifndef CONFIG_ARM64_64K_PAGES > > +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 > > In this patch frame, it causes a compile error since [08/11] patch introduces > CONFIG_ARM64_PGTABLE_LEVELS. Please ignore my comment if it does not matter. It matters, thanks. -- Catalin ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 00/11] arm64: Support 4 levels of translation tables @ 2014-07-16 19:09 Catalin Marinas 2014-07-16 19:09 ` [PATCH v7 07/11] arm64: mm: Implement " Catalin Marinas 0 siblings, 1 reply; 10+ messages in thread From: Catalin Marinas @ 2014-07-16 19:09 UTC (permalink / raw) To: linux-arm-kernel Hi, I've taken over Jungseok's 4-level page table series here: https://lkml.kernel.org/g/000001cf6dc6$03ded0c0$0b9c7240$@samsung.com I kept the same series numbering, so this is normally version 7. Changes since v6: - Additional fixes for duplicate (SWAPPER|IDMAP)_DIR_SIZE definitions - Removed the fixmap pmd from swapper_pg_dir populated in head.S - Several clean-ups in Jungseok's patches (annotated above my signed-off-by line) - Removal of the pgtable-*level-hwdef.h files - Converting levels config options to int ARM64_PGTABLE_LEVELS Important change: I dropped the KVM support for 4 levels temporarily. I plan to revive them but didn't have for v7. You can also access the branch here: git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64 pgtable-4levels The branch is on top of the arm64 for-next/core branch. Thanks. Catalin Marinas (6): arm64: Remove duplicate (SWAPPER|IDMAP)_DIR_SIZE definitions arm64: Do not initialise the fixmap page tables in head.S arm64: Convert bool ARM64_x_LEVELS to int ARM64_PGTABLE_LEVELS arm64: Remove asm/pgtable-*level-hwdef.h files arm64: Clean up the initial page table creation in head.S arm64: Determine the vmalloc/vmemmap space at build time based on VA_BITS Jungseok Lee (5): arm64: Use pr_* instead of printk arm64: Introduce VA_BITS and translation level options arm64: Add a description on 48-bit address space with 4KB pages arm64: Add 4 levels of page tables definition with 4KB pages arm64: mm: Implement 4 levels of translation tables Documentation/arm64/memory.txt | 67 ++++++++----------------- arch/arm64/Kconfig | 51 ++++++++++++++++++- arch/arm64/include/asm/memory.h | 6 +-- arch/arm64/include/asm/page.h | 17 +++++-- arch/arm64/include/asm/pgalloc.h | 24 ++++++++- arch/arm64/include/asm/pgtable-2level-hwdef.h | 43 ---------------- arch/arm64/include/asm/pgtable-3level-hwdef.h | 50 ------------------- arch/arm64/include/asm/pgtable-4level-types.h | 71 +++++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 42 ++++++++++++++-- arch/arm64/include/asm/pgtable.h | 56 ++++++++++++++++++--- arch/arm64/include/asm/tlb.h | 11 ++++- arch/arm64/kernel/head.S | 52 +++++++++++++------- arch/arm64/kernel/traps.c | 13 +++-- arch/arm64/mm/fault.c | 1 + arch/arm64/mm/init.c | 22 ++++++--- arch/arm64/mm/ioremap.c | 30 ++++++++--- arch/arm64/mm/mmu.c | 14 ++++-- 17 files changed, 363 insertions(+), 207 deletions(-) delete mode 100644 arch/arm64/include/asm/pgtable-2level-hwdef.h delete mode 100644 arch/arm64/include/asm/pgtable-3level-hwdef.h create mode 100644 arch/arm64/include/asm/pgtable-4level-types.h ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-07-16 19:09 [PATCH v7 00/11] arm64: Support " Catalin Marinas @ 2014-07-16 19:09 ` Catalin Marinas 2014-07-28 15:40 ` Joel Schopp 0 siblings, 1 reply; 10+ messages in thread From: Catalin Marinas @ 2014-07-16 19:09 UTC (permalink / raw) To: linux-arm-kernel From: Jungseok Lee <jays.lee@samsung.com> This patch implements 4 levels of translation tables since 3 levels of page tables with 4KB pages cannot support 40-bit physical address space described in [1] due to the following issue. It is a restriction that kernel logical memory map with 4KB + 3 levels (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create mapping for this region in map_mem function since __phys_to_virt for this region reaches to address overflow. If SoC design follows the document, [1], over 32GB RAM would be placed from 544GB. Even 64GB system is supposed to use the region from 544GB to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. However, it is recommended 4 levels of page table should be only enabled if memory map is too sparse or there is about 512GB RAM. References ---------- [1]: Principles of ARM Memory Maps, White Paper, Issue C Signed-off-by: Jungseok Lee <jays.lee@samsung.com> Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> Acked-by: Kukjin Kim <kgene.kim@samsung.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Reviewed-by: Steve Capper <steve.capper@linaro.org> [catalin.marinas at arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] [catalin.marinas at arm.com: early_ioremap_init() updated for 4 levels] [catalin.marinas at arm.com: 4 page tables levels only if !KVM] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/Kconfig | 9 ++++++++ arch/arm64/include/asm/page.h | 13 ++++++++--- arch/arm64/include/asm/pgalloc.h | 20 ++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 6 +++-- arch/arm64/include/asm/pgtable.h | 40 ++++++++++++++++++++++++++++++++ arch/arm64/include/asm/tlb.h | 9 ++++++++ arch/arm64/kernel/head.S | 42 +++++++++++++++++++++++++++------- arch/arm64/kernel/traps.c | 5 ++++ arch/arm64/mm/fault.c | 1 + arch/arm64/mm/ioremap.c | 6 ++++- arch/arm64/mm/mmu.c | 14 +++++++++--- 11 files changed, 148 insertions(+), 17 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4daf11f5b403..24cbe72c0da9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -196,12 +196,18 @@ config ARM64_VA_BITS_42 bool "42-bit" depends on ARM64_64K_PAGES +config ARM64_VA_BITS_48 + bool "48-bit" + depends on !KVM + depends on ARM64_4K_PAGES + endchoice config ARM64_VA_BITS int default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 + default 48 if ARM64_VA_BITS_48 config ARM64_2_LEVELS def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 @@ -209,6 +215,9 @@ config ARM64_2_LEVELS config ARM64_3_LEVELS def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 +config ARM64_4_LEVELS + def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_48 + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 6bf139188792..cf9afa0366b6 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,19 +33,26 @@ /* * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require a pgd and a next level table to (section) map the - * kernel. The the swapper also maaps the FDT (see __create_page_tables for + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. The swapper also maps the FDT (see __create_page_tables for * more information). */ +#ifdef CONFIG_ARM64_4_LEVELS +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#else #define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) +#endif #ifndef __ASSEMBLY__ #ifdef CONFIG_ARM64_2_LEVELS #include <asm/pgtable-2level-types.h> -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include <asm/pgtable-3level-types.h> +#else +#include <asm/pgtable-4level-types.h> #endif extern void __cpu_clear_user_page(void *p, unsigned long user); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 48298376e46a..8d745fae4c2d 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,6 +26,26 @@ #define check_pgt_cache() do { } while (0) +#ifdef CONFIG_ARM64_4_LEVELS + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + #ifndef CONFIG_ARM64_2_LEVELS static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index c7c603b489b8..fddcc3efa569 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -18,8 +18,10 @@ #ifdef CONFIG_ARM64_2_LEVELS #include <asm/pgtable-2level-hwdef.h> -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include <asm/pgtable-3level-hwdef.h> +#else +#include <asm/pgtable-4level-hwdef.h> #endif /* @@ -27,7 +29,7 @@ * * Level 1 descriptor (PUD). */ - +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6d5854972a77..d9b23efdaded 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,7 +35,11 @@ * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) +#ifndef CONFIG_ARM64_4_LEVELS #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) +#else +#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) +#endif #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) @@ -44,12 +48,16 @@ #ifndef __ASSEMBLY__ extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); +extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) #ifndef CONFIG_ARM64_2_LEVELS #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #endif +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) #ifdef CONFIG_SMP @@ -347,6 +355,30 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) #endif /* CONFIG_ARM64_2_LEVELS */ +#ifdef CONFIG_ARM64_4_LEVELS + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) +#define pgd_present(pgd) (pgd_val(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + *pgdp = pgd; + dsb(ishst); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + set_pgd(pgdp, __pgd(0)); +} + +static inline pud_t *pgd_page_vaddr(pgd_t pgd) +{ + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -355,6 +387,14 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); +} +#endif + /* Find an entry in the second-level page table.. */ #ifndef CONFIG_ARM64_2_LEVELS #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index bc19101edaeb..49dc8f03362f 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -100,6 +100,15 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif +#ifdef CONFIG_ARM64_4_LEVELS +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, + unsigned long addr) +{ + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, virt_to_page(pudp)); +} +#endif + static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long address) { diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index fa3b7fb8a77a..847b99daad79 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -476,16 +476,42 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to populate the PGD for the corresponding block entry in the next - * level (tbl) for the given virtual address. + * Macro to populate the PUD for the corresponding block entry in the next + * level (tbl) for the given virtual address in case of 4 levels. * - * Preserves: pgd, tbl, virt - * Corrupts: tmp1, tmp2 + * Preserves: pgd, virt + * Corrupts: tbl, tmp1, tmp2 + * Returns: pud */ - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 + .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 +#ifdef CONFIG_ARM64_4_LEVELS + add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. + // to make room for pud + add \pud, \pgd, #PAGE_SIZE // pgd points to pud which + // follows pgd + lsr \tmp1, \virt, #PUD_SHIFT + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index + orr \tmp2, \tbl, #3 // PUD entry table type + str \tmp2, [\pud, \tmp1, lsl #3] +#else + mov \pud, \tbl +#endif + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: pgd, virt + * Corrupts: tmp1, tmp2, tmp3 + * Returns: tbl -> page where block mappings can be placed + * (changed to make room for pud with 4 levels, preserved otherwise) + */ + .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 + create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 lsr \tmp1, \virt, #PGDIR_SHIFT and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index - orr \tmp2, \tbl, #3 // PGD entry table type + orr \tmp2, \tmp3, #3 // PGD entry table type str \tmp2, [\pgd, \tmp1, lsl #3] .endm @@ -550,7 +576,7 @@ __create_page_tables: add x0, x25, #PAGE_SIZE // section table address ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x25, x0, x3, x5, x6 + create_pgd_entry x25, x0, x3, x1, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) add x6, x6, x28 // __pa(KERNEL_END) @@ -561,7 +587,7 @@ __create_page_tables: */ add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET - create_pgd_entry x26, x0, x5, x3, x6 + create_pgd_entry x26, x0, x5, x1, x3, x6 ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 506f7814e305..02cd3f023e9a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -339,6 +339,11 @@ void __pmd_error(const char *file, int line, unsigned long val) pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); } +void __pud_error(const char *file, int line, unsigned long val) +{ + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); +} + void __pgd_error(const char *file, int line, unsigned long val) { pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index bcc965e2cce1..41cb6d3d6075 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -62,6 +62,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); + printk(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 69000efa015e..fa324bd5a5c4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -104,9 +104,12 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) EXPORT_SYMBOL(ioremap_cache); static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#ifndef CONFIG_ARM64_64K_PAGES +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif static inline pud_t * __init early_ioremap_pud(unsigned long addr) { @@ -144,6 +147,7 @@ void __init early_ioremap_init(void) unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); pud = pud_offset(pgd, addr); pud_populate(&init_mm, pud, bm_pmd); pmd = pmd_offset(pud, addr); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c43f1dd19489..c55567283cde 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,6 +32,7 @@ #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> +#include <asm/memblock.h> #include <asm/mmu_context.h> #include "mm.h" @@ -204,9 +205,16 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long phys, int map_io) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud; unsigned long next; + if (pgd_none(*pgd)) { + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(&init_mm, pgd, pud); + } + BUG_ON(pgd_bad(*pgd)); + + pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); @@ -290,10 +298,10 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be + * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be * aligned to 2MB as per Documentation/arm64/booting.txt). */ - limit = PHYS_OFFSET + PGDIR_SIZE; + limit = PHYS_OFFSET + PUD_SIZE; memblock_set_current_limit(limit); /* map all the memory banks */ ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables 2014-07-16 19:09 ` [PATCH v7 07/11] arm64: mm: Implement " Catalin Marinas @ 2014-07-28 15:40 ` Joel Schopp 0 siblings, 0 replies; 10+ messages in thread From: Joel Schopp @ 2014-07-28 15:40 UTC (permalink / raw) To: linux-arm-kernel On 07/16/2014 02:09 PM, Catalin Marinas wrote: > From: Jungseok Lee <jays.lee@samsung.com> > > This patch implements 4 levels of translation tables since 3 levels > of page tables with 4KB pages cannot support 40-bit physical address > space described in [1] due to the following issue. > > It is a restriction that kernel logical memory map with 4KB + 3 levels > (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from > 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create > mapping for this region in map_mem function since __phys_to_virt for > this region reaches to address overflow. > > If SoC design follows the document, [1], over 32GB RAM would be placed > from 544GB. Even 64GB system is supposed to use the region from 544GB > to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels > of page tables to avoid hacking __virt_to_phys and __phys_to_virt. > > However, it is recommended 4 levels of page table should be only enabled > if memory map is too sparse or there is about 512GB RAM. > > References > ---------- > [1]: Principles of ARM Memory Maps, White Paper, Issue C > > Signed-off-by: Jungseok Lee <jays.lee@samsung.com> > Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> > Acked-by: Kukjin Kim <kgene.kim@samsung.com> > Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> > Reviewed-by: Steve Capper <steve.capper@linaro.org> > [catalin.marinas at arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] > [catalin.marinas at arm.com: early_ioremap_init() updated for 4 levels] > [catalin.marinas at arm.com: 4 page tables levels only if !KVM] > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > --- > arch/arm64/Kconfig | 9 ++++++++ > arch/arm64/include/asm/page.h | 13 ++++++++--- > arch/arm64/include/asm/pgalloc.h | 20 ++++++++++++++++ > arch/arm64/include/asm/pgtable-hwdef.h | 6 +++-- > arch/arm64/include/asm/pgtable.h | 40 ++++++++++++++++++++++++++++++++ > arch/arm64/include/asm/tlb.h | 9 ++++++++ > arch/arm64/kernel/head.S | 42 +++++++++++++++++++++++++++------- > arch/arm64/kernel/traps.c | 5 ++++ > arch/arm64/mm/fault.c | 1 + > arch/arm64/mm/ioremap.c | 6 ++++- > arch/arm64/mm/mmu.c | 14 +++++++++--- > 11 files changed, 148 insertions(+), 17 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 4daf11f5b403..24cbe72c0da9 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -196,12 +196,18 @@ config ARM64_VA_BITS_42 > bool "42-bit" > depends on ARM64_64K_PAGES > > +config ARM64_VA_BITS_48 > + bool "48-bit" > + depends on !KVM > + depends on ARM64_4K_PAGES > + > endchoice Shouldn't we be able to support 48 bit VA with 3 level 64K pages? If so why the dependency on ARM64_4K_PAGES? More generally it seems like a problem to tie the equate the VA_BITS the page table could address with the VA_BITS the hardware could address. Even with 4 level 4K page tables that can address 48 bits the hardware may only support say 42 bit address space. > > config ARM64_VA_BITS > int > default 39 if ARM64_VA_BITS_39 > default 42 if ARM64_VA_BITS_42 > + default 48 if ARM64_VA_BITS_48 > > config ARM64_2_LEVELS > def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 > @@ -209,6 +215,9 @@ config ARM64_2_LEVELS > config ARM64_3_LEVELS > def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 > > +config ARM64_4_LEVELS > + def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_48 > + It seems like we should also do ARM64_4K_PAGES and ARM64_VA_BITS_42 as a valid combination for ARM64_4_LEVELS. At least if we are assuming the VA_BITS correspond to hardware. > config CPU_BIG_ENDIAN > bool "Build big-endian kernel" > help > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h > index 6bf139188792..cf9afa0366b6 100644 > --- a/arch/arm64/include/asm/page.h > +++ b/arch/arm64/include/asm/page.h > @@ -33,19 +33,26 @@ > > /* > * The idmap and swapper page tables need some space reserved in the kernel > - * image. Both require a pgd and a next level table to (section) map the > - * kernel. The the swapper also maaps the FDT (see __create_page_tables for > + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) > + * map the kernel. The swapper also maps the FDT (see __create_page_tables for > * more information). > */ > +#ifdef CONFIG_ARM64_4_LEVELS > +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) > +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) > +#else > #define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) > #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) > +#endif > > #ifndef __ASSEMBLY__ > > #ifdef CONFIG_ARM64_2_LEVELS > #include <asm/pgtable-2level-types.h> > -#else > +#elif defined(CONFIG_ARM64_3_LEVELS) > #include <asm/pgtable-3level-types.h> > +#else > +#include <asm/pgtable-4level-types.h> > #endif > > extern void __cpu_clear_user_page(void *p, unsigned long user); > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > index 48298376e46a..8d745fae4c2d 100644 > --- a/arch/arm64/include/asm/pgalloc.h > +++ b/arch/arm64/include/asm/pgalloc.h > @@ -26,6 +26,26 @@ > > #define check_pgt_cache() do { } while (0) > > +#ifdef CONFIG_ARM64_4_LEVELS > + > +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > +{ > + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); > +} > + > +static inline void pud_free(struct mm_struct *mm, pud_t *pud) > +{ > + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); > + free_page((unsigned long)pud); > +} > + > +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) > +{ > + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); > +} > + > +#endif /* CONFIG_ARM64_4_LEVELS */ > + > #ifndef CONFIG_ARM64_2_LEVELS > > static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > index c7c603b489b8..fddcc3efa569 100644 > --- a/arch/arm64/include/asm/pgtable-hwdef.h > +++ b/arch/arm64/include/asm/pgtable-hwdef.h > @@ -18,8 +18,10 @@ > > #ifdef CONFIG_ARM64_2_LEVELS > #include <asm/pgtable-2level-hwdef.h> > -#else > +#elif defined(CONFIG_ARM64_3_LEVELS) > #include <asm/pgtable-3level-hwdef.h> > +#else > +#include <asm/pgtable-4level-hwdef.h> > #endif > > /* > @@ -27,7 +29,7 @@ > * > * Level 1 descriptor (PUD). > */ > - > +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) > #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) > #define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) > #define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 6d5854972a77..d9b23efdaded 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -35,7 +35,11 @@ > * VMALLOC and SPARSEMEM_VMEMMAP ranges. > */ > #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) Here's a good example of where we run into trouble equating page table addressable bits with hardware addressable bits. If VA_BITS is 48 due to 4K 4 level page tables but is running on a 42 bit system this will end up being out of range. > +#ifndef CONFIG_ARM64_4_LEVELS > #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) > +#else > +#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) > +#endif > > #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) > > @@ -44,12 +48,16 @@ > #ifndef __ASSEMBLY__ > extern void __pte_error(const char *file, int line, unsigned long val); > extern void __pmd_error(const char *file, int line, unsigned long val); > +extern void __pud_error(const char *file, int line, unsigned long val); > extern void __pgd_error(const char *file, int line, unsigned long val); > > #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) > #ifndef CONFIG_ARM64_2_LEVELS > #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) > #endif > +#ifdef CONFIG_ARM64_4_LEVELS > +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) > +#endif > #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) > > #ifdef CONFIG_SMP > @@ -347,6 +355,30 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) > > #endif /* CONFIG_ARM64_2_LEVELS */ > > +#ifdef CONFIG_ARM64_4_LEVELS > + > +#define pgd_none(pgd) (!pgd_val(pgd)) > +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) > +#define pgd_present(pgd) (pgd_val(pgd)) > + > +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) > +{ > + *pgdp = pgd; > + dsb(ishst); > +} > + > +static inline void pgd_clear(pgd_t *pgdp) > +{ > + set_pgd(pgdp, __pgd(0)); > +} > + > +static inline pud_t *pgd_page_vaddr(pgd_t pgd) > +{ > + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); > +} > + > +#endif /* CONFIG_ARM64_4_LEVELS */ > + > /* to find an entry in a page-table-directory */ > #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) > > @@ -355,6 +387,14 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) > /* to find an entry in a kernel page-table-directory */ > #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) > > +#ifdef CONFIG_ARM64_4_LEVELS > +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) > +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) > +{ > + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); > +} > +#endif > + > /* Find an entry in the second-level page table.. */ > #ifndef CONFIG_ARM64_2_LEVELS > #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h > index bc19101edaeb..49dc8f03362f 100644 > --- a/arch/arm64/include/asm/tlb.h > +++ b/arch/arm64/include/asm/tlb.h > @@ -100,6 +100,15 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, > } > #endif > > +#ifdef CONFIG_ARM64_4_LEVELS > +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, > + unsigned long addr) > +{ > + tlb_add_flush(tlb, addr); > + tlb_remove_page(tlb, virt_to_page(pudp)); > +} > +#endif > + > static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, > unsigned long address) > { > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > index fa3b7fb8a77a..847b99daad79 100644 > --- a/arch/arm64/kernel/head.S > +++ b/arch/arm64/kernel/head.S > @@ -476,16 +476,42 @@ ENDPROC(__calc_phys_offset) > .quad PAGE_OFFSET > > /* > - * Macro to populate the PGD for the corresponding block entry in the next > - * level (tbl) for the given virtual address. > + * Macro to populate the PUD for the corresponding block entry in the next > + * level (tbl) for the given virtual address in case of 4 levels. > * > - * Preserves: pgd, tbl, virt > - * Corrupts: tmp1, tmp2 > + * Preserves: pgd, virt > + * Corrupts: tbl, tmp1, tmp2 > + * Returns: pud > */ > - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 > + .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 > +#ifdef CONFIG_ARM64_4_LEVELS > + add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. > + // to make room for pud > + add \pud, \pgd, #PAGE_SIZE // pgd points to pud which > + // follows pgd > + lsr \tmp1, \virt, #PUD_SHIFT > + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index > + orr \tmp2, \tbl, #3 // PUD entry table type > + str \tmp2, [\pud, \tmp1, lsl #3] > +#else > + mov \pud, \tbl > +#endif > + .endm > + > +/* > + * Macro to populate the PGD (and possibily PUD) for the corresponding > + * block entry in the next level (tbl) for the given virtual address. > + * > + * Preserves: pgd, virt > + * Corrupts: tmp1, tmp2, tmp3 > + * Returns: tbl -> page where block mappings can be placed > + * (changed to make room for pud with 4 levels, preserved otherwise) > + */ > + .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 > + create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 > lsr \tmp1, \virt, #PGDIR_SHIFT > and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index > - orr \tmp2, \tbl, #3 // PGD entry table type > + orr \tmp2, \tmp3, #3 // PGD entry table type > str \tmp2, [\pgd, \tmp1, lsl #3] > .endm > > @@ -550,7 +576,7 @@ __create_page_tables: > add x0, x25, #PAGE_SIZE // section table address > ldr x3, =KERNEL_START > add x3, x3, x28 // __pa(KERNEL_START) > - create_pgd_entry x25, x0, x3, x5, x6 > + create_pgd_entry x25, x0, x3, x1, x5, x6 > ldr x6, =KERNEL_END > mov x5, x3 // __pa(KERNEL_START) > add x6, x6, x28 // __pa(KERNEL_END) > @@ -561,7 +587,7 @@ __create_page_tables: > */ > add x0, x26, #PAGE_SIZE // section table address > mov x5, #PAGE_OFFSET > - create_pgd_entry x26, x0, x5, x3, x6 > + create_pgd_entry x26, x0, x5, x1, x3, x6 > ldr x6, =KERNEL_END > mov x3, x24 // phys offset > create_block_map x0, x7, x3, x5, x6 > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > index 506f7814e305..02cd3f023e9a 100644 > --- a/arch/arm64/kernel/traps.c > +++ b/arch/arm64/kernel/traps.c > @@ -339,6 +339,11 @@ void __pmd_error(const char *file, int line, unsigned long val) > pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); > } > > +void __pud_error(const char *file, int line, unsigned long val) > +{ > + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); > +} > + > void __pgd_error(const char *file, int line, unsigned long val) > { > pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index bcc965e2cce1..41cb6d3d6075 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -62,6 +62,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) > break; > > pud = pud_offset(pgd, addr); > + printk(", *pud=%016llx", pud_val(*pud)); > if (pud_none(*pud) || pud_bad(*pud)) > break; > > diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c > index 69000efa015e..fa324bd5a5c4 100644 > --- a/arch/arm64/mm/ioremap.c > +++ b/arch/arm64/mm/ioremap.c > @@ -104,9 +104,12 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) > EXPORT_SYMBOL(ioremap_cache); > > static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; > -#ifndef CONFIG_ARM64_64K_PAGES > +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 > static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; > #endif > +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 > +static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; > +#endif > > static inline pud_t * __init early_ioremap_pud(unsigned long addr) > { > @@ -144,6 +147,7 @@ void __init early_ioremap_init(void) > unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); > > pgd = pgd_offset_k(addr); > + pgd_populate(&init_mm, pgd, bm_pud); > pud = pud_offset(pgd, addr); > pud_populate(&init_mm, pud, bm_pmd); > pmd = pmd_offset(pud, addr); > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index c43f1dd19489..c55567283cde 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -32,6 +32,7 @@ > #include <asm/setup.h> > #include <asm/sizes.h> > #include <asm/tlb.h> > +#include <asm/memblock.h> > #include <asm/mmu_context.h> > > #include "mm.h" > @@ -204,9 +205,16 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, > unsigned long end, unsigned long phys, > int map_io) > { > - pud_t *pud = pud_offset(pgd, addr); > + pud_t *pud; > unsigned long next; > > + if (pgd_none(*pgd)) { > + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); > + pgd_populate(&init_mm, pgd, pud); > + } > + BUG_ON(pgd_bad(*pgd)); > + > + pud = pud_offset(pgd, addr); > do { > next = pud_addr_end(addr, end); > > @@ -290,10 +298,10 @@ static void __init map_mem(void) > * memory addressable from the initial direct kernel mapping. > * > * The initial direct kernel mapping, located at swapper_pg_dir, > - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be > + * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be > * aligned to 2MB as per Documentation/arm64/booting.txt). > */ > - limit = PHYS_OFFSET + PGDIR_SIZE; > + limit = PHYS_OFFSET + PUD_SIZE; > memblock_set_current_limit(limit); > > /* map all the memory banks */ > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel at lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-08-14 13:47 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- [not found] <CAGoO6fLT6B_vx+Wtm6bKHD6YkjQcM8f=deD7SDP3BR_uC+1c5w@mail.gmail.com> 2014-07-29 13:47 ` [PATCH v7 07/11] arm64: mm: Implement 4 levels of translation tables Jungseok Lee 2014-07-29 14:19 ` Joel Schopp 2014-07-30 14:57 ` Jungseok Lee 2014-08-14 11:42 ` Ganapatrao Kulkarni 2014-08-14 12:58 ` Catalin Marinas 2014-08-14 13:47 ` Ganapatrao Kulkarni [not found] <2211E520-32A4-435C-9F63-A7EFEE8B677A@gmail.com> 2014-07-17 15:04 ` Jungseok Lee 2014-07-17 16:47 ` Catalin Marinas 2014-07-16 19:09 [PATCH v7 00/11] arm64: Support " Catalin Marinas 2014-07-16 19:09 ` [PATCH v7 07/11] arm64: mm: Implement " Catalin Marinas 2014-07-28 15:40 ` Joel Schopp
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).