* [PATCH 2/3] powerpc/64/mm: implement page mapping percpu first chunk allocator
2019-09-17 14:57 [PATCH 1/3] powerpc/book3s64/hash/4k: 4k supports only 16TB linear mapping Aneesh Kumar K.V
@ 2019-09-17 14:57 ` Aneesh Kumar K.V
2019-09-17 14:57 ` [PATCH 3/3] powerpc/book3s64/hash/4K: Update the kernel mapping with 4K page size config Aneesh Kumar K.V
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2019-09-17 14:57 UTC (permalink / raw)
To: mpe; +Cc: Aneesh Kumar K.V, linuxppc-dev
Implement page mapping percpu first chunk allocator as a fallback to
the embedding allocator. With 4K hash translation we limit our page
table range to 64TB and commit: 0034d395f89d ("powerpc/mm/hash64: Map all the
kernel regions in the same 0xc range") moved all kernel mapping to
that 64TB range. In-order to support sparse memory layout we need
to increase our linear mapping space and reduce other mappings.
With such a layout percpu embedded first chunk allocator will fail
because of small vmalloc range. Add a fallback to page mapping
percpu first chunk allocator for such failures.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/Kconfig | 5 ++-
arch/powerpc/kernel/setup_64.c | 59 ++++++++++++++++++++++++++++++++--
2 files changed, 61 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6a7c797fa9d2..69d29e3486f8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -63,7 +63,10 @@ config HAVE_SETUP_PER_CPU_AREA
def_bool PPC64
config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool PPC64
+ def_bool y if PPC64
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ def_bool y if PPC64
config NR_IRQS
int "Number of virtual interrupt numbers"
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 44b4c432a273..4049d450afd6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -65,6 +65,7 @@
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
#include <asm/kup.h>
+#include <asm/pgalloc.h>
#include "setup.h"
@@ -761,13 +762,56 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (pgd_none(*pgd)) {
+ pud_t *new;
+
+ new = memblock_alloc_from(PUD_TABLE_SIZE, PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pgd_populate(&init_mm, pgd, new);
+ }
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc_from(PMD_TABLE_SIZE, PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc_from(PTE_TABLE_SIZE, PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
+
void __init setup_per_cpu_areas(void)
{
const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
size_t atom_size;
unsigned long delta;
unsigned int cpu;
- int rc;
+ int rc = -EINVAL;
/*
* Linear mapping is one of 4K, 1M and 16M. For 4K, no need
@@ -779,8 +823,19 @@ void __init setup_per_cpu_areas(void)
else
atom_size = 1 << 20;
- rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
+ if (rc)
+ pr_warning("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+ }
+
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
+ pcpu_fc_alloc, pcpu_fc_free,
+ pcpu_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
--
2.21.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/3] powerpc/book3s64/hash/4K: Update the kernel mapping with 4K page size config.
2019-09-17 14:57 [PATCH 1/3] powerpc/book3s64/hash/4k: 4k supports only 16TB linear mapping Aneesh Kumar K.V
2019-09-17 14:57 ` [PATCH 2/3] powerpc/64/mm: implement page mapping percpu first chunk allocator Aneesh Kumar K.V
@ 2019-09-17 14:57 ` Aneesh Kumar K.V
2019-10-03 23:05 ` [PATCH 1/3] powerpc/book3s64/hash/4k: 4k supports only 16TB linear mapping Michael Ellerman
2019-10-05 20:15 ` Samuel Holland
3 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2019-09-17 14:57 UTC (permalink / raw)
To: mpe; +Cc: Aneesh Kumar K.V, linuxppc-dev
This patch updates the kernel mapping such that we now start supporting upto
61TB of memory with 4K. The kernel mapping now looks like below:
vmalloc start = 0xc0003d0000000000
IO start = 0xc0003e0000000000
vmemmap start = 0xc0003f0000000000
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/book3s/64/hash-4k.h | 13 ++++++-------
arch/powerpc/include/asm/book3s/64/mmu.h | 8 ++------
2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 8fd8599c9395..d56a2a2ae14a 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -13,20 +13,19 @@
*/
#define MAX_EA_BITS_PER_CONTEXT 46
-#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2)
/*
- * Our page table limit us to 64TB. Hence for the kernel mapping,
- * each MAP area is limited to 16 TB.
- * The four map areas are: linear mapping, vmap, IO and vmemmap
+ * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB
+ * of vmemmap space. To better support sparse memory layout, we use 61TB
+ * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap.
*/
+#define REGION_SHIFT (40)
#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
/*
- * Define the address range of the kernel non-linear virtual area
- * 16TB
+ * Define the address range of the kernel non-linear virtual area (61TB)
*/
-#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
+#define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000)
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 86cce8189240..bb3deb76c951 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -35,16 +35,12 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
* memory requirements with large number of sections.
* 51 bits is the max physical real address on POWER9
*/
-
-#if defined(CONFIG_PPC_64K_PAGES)
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
+ defined(CONFIG_PPC_64K_PAGES)
#define MAX_PHYSMEM_BITS 51
#else
#define MAX_PHYSMEM_BITS 46
#endif
-#else /* CONFIG_PPC_64K_PAGES */
-#define MAX_PHYSMEM_BITS 44
-#endif
/* 64-bit classic hash table MMU */
#include <asm/book3s/64/mmu-hash.h>
--
2.21.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] powerpc/book3s64/hash/4k: 4k supports only 16TB linear mapping
2019-09-17 14:57 [PATCH 1/3] powerpc/book3s64/hash/4k: 4k supports only 16TB linear mapping Aneesh Kumar K.V
` (2 preceding siblings ...)
2019-10-03 23:05 ` [PATCH 1/3] powerpc/book3s64/hash/4k: 4k supports only 16TB linear mapping Michael Ellerman
@ 2019-10-05 20:15 ` Samuel Holland
2019-10-09 5:56 ` Michael Ellerman
3 siblings, 1 reply; 6+ messages in thread
From: Samuel Holland @ 2019-10-05 20:15 UTC (permalink / raw)
To: Aneesh Kumar K.V, mpe; +Cc: Cameron Berkenpas, linuxppc-dev
Hello,
On 9/17/19 9:57 AM, Aneesh Kumar K.V wrote:
> With commit: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the
> same 0xc range"), we now split the 64TB address range into 4 contexts each of
> 16TB. That implies we can do only 16TB linear mapping. Make sure we don't
> add physical memory above 16TB if that is present in the system.
>
> Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in thesame 0xc range")
> Reported-by: Cameron Berkenpas <cam@neo-zeon.de>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> arch/powerpc/include/asm/book3s/64/mmu.h | 8 ++++++--
> 1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index bb3deb76c951..86cce8189240 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -35,12 +35,16 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
> * memory requirements with large number of sections.
> * 51 bits is the max physical real address on POWER9
> */
> -#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
> - defined(CONFIG_PPC_64K_PAGES)
> +
> +#if defined(CONFIG_PPC_64K_PAGES)
> +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
This prevents accessing physical memory over 16TB with 4k pages and radix MMU as
well. Was this intentional?
> #define MAX_PHYSMEM_BITS 51
> #else
> #define MAX_PHYSMEM_BITS 46
> #endif
> +#else /* CONFIG_PPC_64K_PAGES */
> +#define MAX_PHYSMEM_BITS 44
> +#endif
>
> /* 64-bit classic hash table MMU */
> #include <asm/book3s/64/mmu-hash.h>
>
Cheers,
Samuel
^ permalink raw reply [flat|nested] 6+ messages in thread