* [powerpc:next] BUILD SUCCESS 1f12096aca212af8fad3ef58d5673cde691a1452
From: kbuild test robot @ 2020-05-11 11:40 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
branch HEAD: 1f12096aca212af8fad3ef58d5673cde691a1452 Merge the lockless page table walk rework into next
elapsed time: 5675m
configs tested: 178
configs skipped: 1
The following configs have been built successfully.
More configs may be tested in the coming days.
arm defconfig
arm allyesconfig
arm allmodconfig
arm allnoconfig
arm64 allyesconfig
arm64 defconfig
arm64 allmodconfig
arm64 allnoconfig
sparc allyesconfig
m68k allyesconfig
m68k allmodconfig
um defconfig
sparc64 defconfig
c6x allyesconfig
i386 allyesconfig
mips allmodconfig
s390 allmodconfig
m68k allnoconfig
m68k defconfig
alpha defconfig
arc allyesconfig
um allnoconfig
s390 allnoconfig
alpha allyesconfig
sh allmodconfig
openrisc defconfig
openrisc allyesconfig
sh allnoconfig
xtensa allyesconfig
riscv defconfig
mips allnoconfig
parisc allyesconfig
i386 allnoconfig
i386 defconfig
i386 debian-10.3
ia64 allmodconfig
ia64 defconfig
ia64 allnoconfig
ia64 allyesconfig
m68k sun3_defconfig
nios2 defconfig
nios2 allyesconfig
c6x allnoconfig
nds32 defconfig
nds32 allnoconfig
csky allyesconfig
csky defconfig
h8300 allyesconfig
h8300 allmodconfig
xtensa defconfig
arc defconfig
microblaze allnoconfig
mips allyesconfig
parisc allnoconfig
parisc defconfig
parisc allmodconfig
powerpc defconfig
powerpc allyesconfig
powerpc rhel-kconfig
powerpc allmodconfig
powerpc allnoconfig
x86_64 randconfig-a004-20200507
x86_64 randconfig-a006-20200507
x86_64 randconfig-a002-20200507
x86_64 randconfig-a005-20200508
x86_64 randconfig-a003-20200508
x86_64 randconfig-a001-20200508
x86_64 randconfig-a006-20200508
x86_64 randconfig-a004-20200508
x86_64 randconfig-a002-20200508
x86_64 randconfig-a005-20200511
x86_64 randconfig-a003-20200511
x86_64 randconfig-a006-20200511
x86_64 randconfig-a004-20200511
x86_64 randconfig-a001-20200511
x86_64 randconfig-a002-20200511
i386 randconfig-a005-20200509
i386 randconfig-a004-20200509
i386 randconfig-a003-20200509
i386 randconfig-a002-20200509
i386 randconfig-a001-20200509
i386 randconfig-a006-20200509
i386 randconfig-a006-20200510
i386 randconfig-a005-20200510
i386 randconfig-a003-20200510
i386 randconfig-a001-20200510
i386 randconfig-a004-20200510
i386 randconfig-a002-20200510
i386 randconfig-a006-20200511
i386 randconfig-a005-20200511
i386 randconfig-a003-20200511
i386 randconfig-a001-20200511
i386 randconfig-a004-20200511
i386 randconfig-a002-20200511
i386 randconfig-a005-20200508
i386 randconfig-a004-20200508
i386 randconfig-a003-20200508
i386 randconfig-a002-20200508
i386 randconfig-a001-20200508
i386 randconfig-a006-20200508
i386 randconfig-a005-20200507
i386 randconfig-a004-20200507
i386 randconfig-a001-20200507
i386 randconfig-a002-20200507
i386 randconfig-a003-20200507
i386 randconfig-a006-20200507
x86_64 randconfig-a015-20200507
x86_64 randconfig-a014-20200507
x86_64 randconfig-a012-20200507
x86_64 randconfig-a013-20200507
x86_64 randconfig-a011-20200507
x86_64 randconfig-a016-20200507
x86_64 randconfig-a014-20200508
x86_64 randconfig-a012-20200508
x86_64 randconfig-a016-20200508
x86_64 randconfig-a016-20200510
x86_64 randconfig-a012-20200510
x86_64 randconfig-a015-20200510
x86_64 randconfig-a013-20200510
x86_64 randconfig-a014-20200510
x86_64 randconfig-a011-20200510
x86_64 randconfig-a015-20200509
x86_64 randconfig-a014-20200509
x86_64 randconfig-a011-20200509
x86_64 randconfig-a013-20200509
x86_64 randconfig-a012-20200509
x86_64 randconfig-a016-20200509
x86_64 randconfig-a016-20200511
x86_64 randconfig-a012-20200511
x86_64 randconfig-a014-20200511
i386 randconfig-a012-20200507
i386 randconfig-a016-20200507
i386 randconfig-a014-20200507
i386 randconfig-a011-20200507
i386 randconfig-a015-20200507
i386 randconfig-a013-20200507
i386 randconfig-a012-20200509
i386 randconfig-a014-20200509
i386 randconfig-a016-20200509
i386 randconfig-a011-20200509
i386 randconfig-a013-20200509
i386 randconfig-a015-20200509
i386 randconfig-a012-20200511
i386 randconfig-a016-20200511
i386 randconfig-a014-20200511
i386 randconfig-a011-20200511
i386 randconfig-a013-20200511
i386 randconfig-a015-20200511
i386 randconfig-a012-20200508
i386 randconfig-a014-20200508
i386 randconfig-a016-20200508
i386 randconfig-a011-20200508
i386 randconfig-a013-20200508
i386 randconfig-a015-20200508
i386 randconfig-a012-20200510
i386 randconfig-a016-20200510
i386 randconfig-a014-20200510
i386 randconfig-a011-20200510
i386 randconfig-a013-20200510
i386 randconfig-a015-20200510
riscv allyesconfig
riscv allnoconfig
riscv allmodconfig
s390 allyesconfig
s390 defconfig
sparc defconfig
sparc64 allnoconfig
sparc64 allyesconfig
sparc64 allmodconfig
um allmodconfig
um allyesconfig
x86_64 rhel
x86_64 rhel-7.6
x86_64 rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64 lkp
x86_64 fedora-25
x86_64 kexec
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [PATCH 1/2] powerpc/pmem: Add new instructions for persistent storage and sync
From: Aneesh Kumar K.V @ 2020-05-11 11:34 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: alistair, oohall, Aneesh Kumar K.V
POWER10 introduces two new variants of dcbf instructions (dcbstps and dcbfps)
that can be used to write modified locations back to persistent storage.
Additionally, POWER10 also introduce phwsync and plwsync which can be used
to establish order of these writes to persistent storage.
This patch exposes these instructions to the rest of the kernel. The existing
dcbf and hwsync instructions in P9 are adequate to enable appropriate
synchronization with OpenCAPI-hosted persistent storage. Hence the new
instructions are added as a variant of the old ones that old hardware
won't differentiate.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/ppc-opcode.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index c1df75edde44..e8ca55e5d31e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -216,6 +216,8 @@
#define PPC_INST_STWCX 0x7c00012d
#define PPC_INST_LWSYNC 0x7c2004ac
#define PPC_INST_SYNC 0x7c0004ac
+#define PPC_INST_PHWSYNC 0x7c8004ac
+#define PPC_INST_PLWSYNC 0x7ca004ac
#define PPC_INST_SYNC_MASK 0xfc0007fe
#define PPC_INST_ISYNC 0x4c00012c
#define PPC_INST_LXVD2X 0x7c000698
@@ -281,6 +283,8 @@
#define PPC_INST_TABORT 0x7c00071d
#define PPC_INST_TSR 0x7c0005dd
+#define PPC_INST_DCBF 0x7c0000ac
+
#define PPC_INST_NAP 0x4c000364
#define PPC_INST_SLEEP 0x4c0003a4
#define PPC_INST_WINKLE 0x4c0003e4
@@ -529,6 +533,14 @@
#define STBCIX(s,a,b) stringify_in_c(.long PPC_INST_STBCIX | \
__PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_DCBFPS(a, b) stringify_in_c(.long PPC_INST_DCBF | \
+ ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
+#define PPC_DCBSTPS(a, b) stringify_in_c(.long PPC_INST_DCBF | \
+ ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
+
+#define PPC_PHWSYNC() stringify_in_c(.long PPC_INST_PHWSYNC)
+#define PPC_PLWSYNC() stringify_in_c(.long PPC_INST_PLWSYNC)
+
/*
* Define what the VSX XX1 form instructions will look like, then add
* the 128 bit load store instructions based on that.
--
2.26.2
^ permalink raw reply related
* [PATCH 2/2] powerpc/pmem: Add flush routines using new pmem store and sync instruction
From: Aneesh Kumar K.V @ 2020-05-11 11:34 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: alistair, oohall, Aneesh Kumar K.V
In-Reply-To: <20200511113443.36569-1-aneesh.kumar@linux.ibm.com>
Start using dcbstps; phwsync; sequence for flushing persistent memory range.
The new instructions are implemented as a variant of dcbf and hwsync and on
POWER9 they will be executed as those instructions.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/cacheflush.h | 31 +++++++++++++++++++++++++++
arch/powerpc/lib/pmem.c | 8 +++----
2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e92191b390f3..edb59edeeacb 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -74,6 +74,37 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
}
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+
+
+ asm volatile(PPC_PHWSYNC():::"memory");
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+
+
+ asm volatile(PPC_PHWSYNC():::"memory");
+}
+
+
/*
* Write any modified data cache blocks out to memory.
* Does not invalidate the corresponding cache lines (especially for
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 0666a8d29596..624708bcaaad 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -15,14 +15,14 @@
void arch_wb_cache_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_dcache_range(start, start + size);
+ flush_pmem_range(start, start + size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
@@ -35,7 +35,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
unsigned long copied, start = (unsigned long) dest;
copied = __copy_from_user(dest, src, size);
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
return copied;
}
@@ -45,7 +45,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size)
unsigned long start = (unsigned long) dest;
memcpy(dest, src, size);
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
return dest;
}
--
2.26.2
^ permalink raw reply related
* Re: [PATCH] powerpc/kvm: silence kmemleak false positives
From: Catalin Marinas @ 2020-05-11 11:28 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linux-kernel, kvm-ppc, Qian Cai, linuxppc-dev
In-Reply-To: <87y2pybu38.fsf@mpe.ellerman.id.au>
On Mon, May 11, 2020 at 09:15:55PM +1000, Michael Ellerman wrote:
> Qian Cai <cai@lca.pw> writes:
> > kvmppc_pmd_alloc() and kvmppc_pte_alloc() allocate some memory but then
> > pud_populate() and pmd_populate() will use __pa() to reference the newly
> > allocated memory. The same is in xive_native_provision_pages().
> >
> > Since kmemleak is unable to track the physical memory resulting in false
> > positives, silence those by using kmemleak_ignore().
>
> There is kmemleak_alloc_phys(), which according to the docs can be used
> for tracking a phys address.
This won't help. While kmemleak_alloc_phys() allows passing a physical
address, it doesn't track physical address references to this object. It
still expects VA pointing to it, otherwise the object would be reported
as a leak.
We currently only call this from the memblock code with a min_count of
0, meaning it will not be reported as a leak if no references are found.
We don't have this issue with page tables on other architectures since
most of them use whole page allocations which aren't tracked by
kmemleak. These powerpc functions use kmem_cache_alloc() which would be
tracked automatically by kmemleak. While we could add a phys alias to
kmemleak (another search tree), I think the easiest is as per Qian's
patch, just ignore those objects.
--
Catalin
^ permalink raw reply
* Re: [PATCH v3 1/3] powerpc/numa: Set numa_node for all possible cpus
From: Michael Ellerman @ 2020-05-11 11:27 UTC (permalink / raw)
To: Srikar Dronamraju, Christopher Lameter
Cc: Gautham R Shenoy, Michal Hocko, Linus Torvalds, linux-kernel,
linux-mm, Mel Gorman, Kirill A. Shutemov, Andrew Morton,
linuxppc-dev, Vlastimil Babka
In-Reply-To: <20200508132130.GC1961@linux.vnet.ibm.com>
Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
> * Christopher Lameter <cl@linux.com> [2020-05-02 22:55:16]:
>
>> On Fri, 1 May 2020, Srikar Dronamraju wrote:
>>
>> > - for_each_present_cpu(cpu)
>> > - numa_setup_cpu(cpu);
>> > + for_each_possible_cpu(cpu) {
>> > + /*
>> > + * Powerpc with CONFIG_NUMA always used to have a node 0,
>> > + * even if it was memoryless or cpuless. For all cpus that
>> > + * are possible but not present, cpu_to_node() would point
>> > + * to node 0. To remove a cpuless, memoryless dummy node,
>> > + * powerpc need to make sure all possible but not present
>> > + * cpu_to_node are set to a proper node.
>> > + */
>> > + if (cpu_present(cpu))
>> > + numa_setup_cpu(cpu);
>> > + else
>> > + set_cpu_numa_node(cpu, first_online_node);
>> > + }
>> > }
>>
>> Can this be folded into numa_setup_cpu?
>>
>> This looks more like numa_setup_cpu needs to change?
>
> We can fold this into numa_setup_cpu().
>
> However till now we were sure that numa_setup_cpu() would be called only for
> a present cpu. That assumption will change.
> + (non-consequential) an additional check everytime cpu is hotplugged in.
>
> If Michael Ellerman is okay with the change, I can fold it in.
Yes I agree it would be better in numa_setup_cpu().
cheers
^ permalink raw reply
* [PATCH v3 45/45] powerpc/32s: Implement dedicated kasan_init_region()
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Implement a kasan_init_region() dedicated to book3s/32 that
allocates KASAN regions using BATs.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/kasan.h | 1 +
arch/powerpc/mm/kasan/Makefile | 1 +
arch/powerpc/mm/kasan/book3s_32.c | 57 +++++++++++++++++++++++++++
arch/powerpc/mm/kasan/kasan_init_32.c | 2 +-
4 files changed, 60 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/mm/kasan/book3s_32.c
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 107a24c3f7b3..be85c7005fb1 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -34,6 +34,7 @@ static inline void kasan_init(void) { }
static inline void kasan_late_init(void) { }
#endif
+void kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte);
int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end);
int kasan_init_region(void *start, size_t size);
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
index 440038ea79f1..bb1a5408b86b 100644
--- a/arch/powerpc/mm/kasan/Makefile
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -4,3 +4,4 @@ KASAN_SANITIZE := n
obj-$(CONFIG_PPC32) += kasan_init_32.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
+obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
new file mode 100644
index 000000000000..4bc491a4a1fd
--- /dev/null
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <mm/mmu_decl.h>
+
+int __init kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_cur = k_start;
+ int k_size = k_end - k_start;
+ int k_size_base = 1 << (ffs(k_size) - 1);
+ int ret;
+ void *block;
+
+ block = memblock_alloc(k_size, k_size_base);
+
+ if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
+ int k_size_more = 1 << (ffs(k_size - k_size_base) - 1);
+
+ setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
+ if (k_size_more >= SZ_128K)
+ setbat(-1, k_start + k_size_base, __pa(block) + k_size_base,
+ k_size_more, PAGE_KERNEL);
+ if (v_block_mapped(k_start))
+ k_cur = k_start + k_size_base;
+ if (v_block_mapped(k_start + k_size_base))
+ k_cur = k_start + k_size_base + k_size_more;
+
+ update_bats();
+ }
+
+ if (!block)
+ block = memblock_alloc(k_size, PAGE_SIZE);
+ if (!block)
+ return -ENOMEM;
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ kasan_update_early_region(k_start, k_cur, __pte(0));
+
+ for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_ptr_k(k_cur);
+ void *va = block + k_cur - k_start;
+ pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
+ return 0;
+}
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 76d418af4ce8..c42085801c04 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -79,7 +79,7 @@ int __init __weak kasan_init_region(void *start, size_t size)
return 0;
}
-static void __init
+void __init
kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte)
{
unsigned long k_cur;
--
2.25.0
^ permalink raw reply related
* [PATCH v3 44/45] powerpc/32s: Allow mapping with BATs with DEBUG_PAGEALLOC
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
DEBUG_PAGEALLOC only manages RW data.
Text and RO data can still be mapped with BATs.
In order to map with BATs, also enforce data alignment. Set
by default to 256M which is a good compromise for keeping
enough BATs for also KASAN and IMMR.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/mm/book3s32/mmu.c | 6 ++++++
arch/powerpc/mm/init_32.c | 5 ++---
3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9d94e8b178d8..5c1fcfe9be74 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -796,6 +796,7 @@ config DATA_SHIFT
range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32
range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx
default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32
default 23 if STRICT_KERNEL_RWX && PPC_8xx
default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA
default 19 if DEBUG_PAGEALLOC && PPC_8xx
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index a9b2cbc74797..a6dcc708eee3 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -170,6 +170,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
pr_debug("RAM mapped without BATs\n");
return base;
}
+ if (debug_pagealloc_enabled()) {
+ if (base >= border)
+ return base;
+ if (top >= border)
+ top = border;
+ }
if (!strict_kernel_rwx_enabled() || base >= border || top <= border)
return __mmu_mapin_ram(base, top);
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 8977a7c2543d..36c39bd37256 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -99,10 +99,9 @@ static void __init MMU_setup(void)
if (IS_ENABLED(CONFIG_PPC_8xx))
return;
- if (debug_pagealloc_enabled()) {
- __map_without_bats = 1;
+ if (debug_pagealloc_enabled())
__map_without_ltlbs = 1;
- }
+
if (strict_kernel_rwx_enabled())
__map_without_ltlbs = 1;
}
--
2.25.0
^ permalink raw reply related
* [PATCH v3 43/45] powerpc/8xx: Implement dedicated kasan_init_region()
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Implement a kasan_init_region() dedicated to 8xx that
allocates KASAN regions using huge pages.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/mm/kasan/8xx.c | 74 ++++++++++++++++++++++++++++++++++
arch/powerpc/mm/kasan/Makefile | 1 +
2 files changed, 75 insertions(+)
create mode 100644 arch/powerpc/mm/kasan/8xx.c
diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
new file mode 100644
index 000000000000..db4ef44af22f
--- /dev/null
+++ b/arch/powerpc/mm/kasan/8xx.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+#include <asm/pgalloc.h>
+
+static int __init
+kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block)
+{
+ pmd_t *pmd = pmd_ptr_k(k_start);
+ unsigned long k_cur, k_next;
+
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) {
+ pte_basic_t *new;
+
+ k_next = pgd_addr_end(k_cur, k_end);
+ k_next = pgd_addr_end(k_next, k_end);
+ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+ continue;
+
+ new = memblock_alloc(sizeof(pte_basic_t), SZ_4K);
+ if (!new)
+ return -ENOMEM;
+
+ *new = pte_val(pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block)), PAGE_KERNEL)));
+
+ hugepd_populate_kernel((hugepd_t *)pmd, (pte_t *)new, PAGE_SHIFT_8M);
+ hugepd_populate_kernel((hugepd_t *)pmd + 1, (pte_t *)new, PAGE_SHIFT_8M);
+ }
+ return 0;
+}
+
+int __init kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_cur;
+ int ret;
+ void *block;
+
+ block = memblock_alloc(k_end - k_start, SZ_8M);
+ if (!block)
+ return -ENOMEM;
+
+ if (IS_ALIGNED(k_start, SZ_8M)) {
+ kasan_init_shadow_8M(k_start, ALIGN_DOWN(k_end, SZ_8M), block);
+ k_cur = ALIGN_DOWN(k_end, SZ_8M);
+ if (k_cur == k_end)
+ goto finish;
+ } else {
+ k_cur = k_start;
+ }
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_ptr_k(k_cur);
+ void *va = block + k_cur - k_start;
+ pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+ if (k_cur < ALIGN_DOWN(k_end, SZ_512K))
+ pte = pte_mkhuge(pte);
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+finish:
+ flush_tlb_kernel_range(k_start, k_end);
+ return 0;
+}
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
index 6577897673dd..440038ea79f1 100644
--- a/arch/powerpc/mm/kasan/Makefile
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -3,3 +3,4 @@
KASAN_SANITIZE := n
obj-$(CONFIG_PPC32) += kasan_init_32.o
+obj-$(CONFIG_PPC_8xx) += 8xx.o
--
2.25.0
^ permalink raw reply related
* [PATCH v3 42/45] powerpc/8xx: Allow large TLBs with DEBUG_PAGEALLOC
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
DEBUG_PAGEALLOC only manages RW data.
Text and RO data can still be mapped with hugepages and pinned TLB.
In order to map with hugepages, also enforce a 512kB data alignment
minimum. That's a trade-off between size of speed, taking into
account that DEBUG_PAGEALLOC is a debug option. Anyway the alignment
is still tunable.
We also allow tuning of alignment for book3s to limit the complexity
of the test in Kconfig that will anyway disappear in the following
patches once DEBUG_PAGEALLOC is handled together with BATs.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/Kconfig | 11 +++++++----
arch/powerpc/mm/init_32.c | 5 ++++-
arch/powerpc/mm/nohash/8xx.c | 11 ++++++++---
arch/powerpc/platforms/8xx/Kconfig | 2 +-
4 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index edbe39140da0..9d94e8b178d8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -780,8 +780,9 @@ config THREAD_SHIFT
config DATA_SHIFT_BOOL
bool "Set custom data alignment"
depends on ADVANCED_OPTIONS
- depends on STRICT_KERNEL_RWX
- depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !PIN_TLB_TEXT)
+ depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC
+ depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \
+ (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX))
help
This option allows you to set the kernel data alignment. When
RAM is mapped by blocks, the alignment needs to fit the size and
@@ -792,10 +793,12 @@ config DATA_SHIFT_BOOL
config DATA_SHIFT
int "Data shift" if DATA_SHIFT_BOOL
default 24 if STRICT_KERNEL_RWX && PPC64
- range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
- range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
+ range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32
+ range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx
default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
default 23 if STRICT_KERNEL_RWX && PPC_8xx
+ default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA
+ default 19 if DEBUG_PAGEALLOC && PPC_8xx
default PPC_PAGE_SHIFT
help
On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index a6991ef8727d..8977a7c2543d 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -96,11 +96,14 @@ static void __init MMU_setup(void)
if (strstr(boot_command_line, "noltlbs")) {
__map_without_ltlbs = 1;
}
+ if (IS_ENABLED(CONFIG_PPC_8xx))
+ return;
+
if (debug_pagealloc_enabled()) {
__map_without_bats = 1;
__map_without_ltlbs = 1;
}
- if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx))
+ if (strict_kernel_rwx_enabled())
__map_without_ltlbs = 1;
}
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 35796ce81695..e112eb157e48 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -149,7 +149,8 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
unsigned long sinittext = __pa(_sinittext);
- unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
+ bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled();
+ unsigned long boundary = strict_boundary ? sinittext : etext8;
unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
WARN_ON(top < einittext8);
@@ -160,8 +161,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return 0;
mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true);
- mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true);
- mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true);
+ if (debug_pagealloc_enabled()) {
+ top = boundary;
+ } else {
+ mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true);
+ mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true);
+ }
if (top > SZ_32M)
memblock_set_current_limit(top);
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 05669f2fadce..abb2b45b2789 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -167,7 +167,7 @@ menu "8xx advanced setup"
config PIN_TLB
bool "Pinned Kernel TLBs"
- depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC
+ depends on ADVANCED_OPTIONS
help
On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each
table 4 TLBs can be pinned.
--
2.25.0
^ permalink raw reply related
* [PATCH v3 41/45] powerpc/8xx: Allow STRICT_KERNEL_RwX with pinned TLB
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Pinned TLB are 8M. Now that there is no strict boundary anymore
between text and RO data, it is possible to use 8M pinned executable
TLB that covers both text and RO data.
When PIN_TLB_DATA or PIN_TLB_TEXT is selected, enforce 8M RW data
alignment and allow STRICT_KERNEL_RWX.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Use the new function that sets all pinned TLBs at once.
---
arch/powerpc/Kconfig | 8 +++++---
arch/powerpc/mm/nohash/8xx.c | 9 +++++++--
arch/powerpc/platforms/8xx/Kconfig | 2 +-
3 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 970a5802850f..edbe39140da0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -778,9 +778,10 @@ config THREAD_SHIFT
want. Only change this if you know what you are doing.
config DATA_SHIFT_BOOL
- bool "Set custom data alignment" if STRICT_KERNEL_RWX && \
- (PPC_BOOK3S_32 || PPC_8xx)
+ bool "Set custom data alignment"
depends on ADVANCED_OPTIONS
+ depends on STRICT_KERNEL_RWX
+ depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !PIN_TLB_TEXT)
help
This option allows you to set the kernel data alignment. When
RAM is mapped by blocks, the alignment needs to fit the size and
@@ -802,7 +803,8 @@ config DATA_SHIFT
On 8xx, large pages (512kb or 8M) are used to map kernel linear
memory. Aligning to 8M reduces TLB misses as only 8M pages are used
- in that case.
+ in that case. If PIN_TLB is selected, it must be aligned to 8M as
+ 8M pages will be pinned.
config FORCE_MAX_ZONEORDER
int "Maximum zone order"
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index c62cab996d4d..35796ce81695 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -126,8 +126,8 @@ void __init mmu_mapin_immr(void)
PAGE_KERNEL_NCG, MMU_PAGE_512K, true);
}
-static void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
- pgprot_t prot, bool new)
+static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
+ pgprot_t prot, bool new)
{
unsigned long v = PAGE_OFFSET + offset;
unsigned long p = offset;
@@ -180,6 +180,9 @@ void mmu_mark_initmem_nx(void)
mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false);
mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
+
+ if (IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+ mmu_pin_tlb(block_mapped_ram, false);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
@@ -188,6 +191,8 @@ void mmu_mark_rodata_ro(void)
unsigned long sinittext = __pa(_sinittext);
mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false);
+ if (IS_ENABLED(CONFIG_PIN_TLB_DATA))
+ mmu_pin_tlb(block_mapped_ram, true);
}
#endif
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 04ea1a8a0bdc..05669f2fadce 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -167,7 +167,7 @@ menu "8xx advanced setup"
config PIN_TLB
bool "Pinned Kernel TLBs"
- depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX
+ depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC
help
On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each
table 4 TLBs can be pinned.
--
2.25.0
^ permalink raw reply related
* [PATCH v3 40/45] powerpc/8xx: Map linear memory with huge pages
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Map linear memory space with 512k and 8M pages whenever
possible.
Three mappings are performed:
- One for kernel text
- One for RO data
- One for the rest
Separating the mappings is done to be able to update the
protection later when using STRICT_KERNEL_RWX.
The ITLB miss handler now need to also handle huge TLBs
unless kernel text in pinned.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/head_8xx.S | 4 +--
arch/powerpc/mm/nohash/8xx.c | 50 +++++++++++++++++++++++++++++++++-
2 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9a117b9f0998..abb71fad7d6a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -224,7 +224,7 @@ InstructionTLBMiss:
3:
mtcr r11
#endif
-#ifdef CONFIG_HUGETLBFS
+#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT)
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
mtspr SPRN_MD_TWC, r11
#else
@@ -234,7 +234,7 @@ InstructionTLBMiss:
#endif
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
-#ifdef CONFIG_HUGETLBFS
+#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT)
rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MI_TWC, r11
#endif
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index fb31a0c1c2a4..c62cab996d4d 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -126,20 +126,68 @@ void __init mmu_mapin_immr(void)
PAGE_KERNEL_NCG, MMU_PAGE_512K, true);
}
+static void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
+ pgprot_t prot, bool new)
+{
+ unsigned long v = PAGE_OFFSET + offset;
+ unsigned long p = offset;
+
+ WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K));
+
+ for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K)
+ __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+ for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M)
+ __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new);
+ for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K)
+ __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+
+ if (!new)
+ flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top);
+}
+
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
+ unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+ unsigned long sinittext = __pa(_sinittext);
+ unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
+ WARN_ON(top < einittext8);
+
mmu_mapin_immr();
- return 0;
+ if (__map_without_ltlbs)
+ return 0;
+
+ mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true);
+ mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true);
+ mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true);
+
+ if (top > SZ_32M)
+ memblock_set_current_limit(top);
+
+ block_mapped_ram = top;
+
+ return top;
}
void mmu_mark_initmem_nx(void)
{
+ unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+ unsigned long sinittext = __pa(_sinittext);
+ unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
+ mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false);
+ mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
void mmu_mark_rodata_ro(void)
{
+ unsigned long sinittext = __pa(_sinittext);
+
+ mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false);
}
#endif
--
2.25.0
^ permalink raw reply related
* [PATCH v3 39/45] powerpc/8xx: Map IMMR with a huge page
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Map the IMMR area with a single 512k huge page.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/mm/nohash/8xx.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 570ab2114a73..fb31a0c1c2a4 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -117,17 +117,13 @@ static bool immr_is_mapped __initdata;
void __init mmu_mapin_immr(void)
{
- unsigned long p = PHYS_IMMR_BASE;
- unsigned long v = VIRT_IMMR_BASE;
- int offset;
-
if (immr_is_mapped)
return;
immr_is_mapped = true;
- for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
- map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
+ __early_map_kernel_hugepage(VIRT_IMMR_BASE, PHYS_IMMR_BASE,
+ PAGE_KERNEL_NCG, MMU_PAGE_512K, true);
}
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
--
2.25.0
^ permalink raw reply related
* [PATCH v3 38/45] powerpc/8xx: Add a function to early map kernel via huge pages
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Add a function to early map kernel memory using huge pages.
For 512k pages, just use standard page table and map in using 512k
pages.
For 8M pages, create a hugepd table and populate the two PGD
entries with it.
This function can only be used to create page tables at startup. Once
the regular SLAB allocation functions replace memblock functions,
this function cannot allocate new pages anymore. However it can still
update existing mappings with new protections.
hugepd_none() macro is moved into asm/hugetlb.h to be usable outside
of mm/hugetlbpage.c
early_pte_alloc_kernel() is made visible.
_PAGE_HUGE flag is now displayed by ptdump.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Select CONFIG_HUGETLBFS instead of CONFIG_HUGETLB_PAGE which leads to linktime failure
---
.../include/asm/nohash/32/hugetlb-8xx.h | 5 ++
arch/powerpc/include/asm/pgtable.h | 2 +
arch/powerpc/mm/nohash/8xx.c | 52 +++++++++++++++++++
arch/powerpc/mm/pgtable_32.c | 2 +-
arch/powerpc/mm/ptdump/8xx.c | 5 ++
arch/powerpc/platforms/Kconfig.cputype | 1 +
6 files changed, 66 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index 1c7d4693a78e..e752a5807a59 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -35,6 +35,11 @@ static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshi
*hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | _PMD_PAGE_8M);
}
+static inline void hugepd_populate_kernel(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ *hpdp = __hugepd(__pa(new) | _PMD_PRESENT | _PMD_PAGE_8M);
+}
+
static inline int check_and_get_huge_psize(int shift)
{
return shift_to_mmu_psize(shift);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index b1f1d5339735..961895be932a 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -107,6 +107,8 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned int shift);
+pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va);
+
#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
void mark_initmem_nx(void);
#else
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index d9f205d9a654..570ab2114a73 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -9,8 +9,10 @@
#include <linux/memblock.h>
#include <linux/mmu_context.h>
+#include <linux/hugetlb.h>
#include <asm/fixmap.h>
#include <asm/code-patching.h>
+#include <asm/pgalloc.h>
#include <mm/mmu_decl.h>
@@ -54,6 +56,56 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
+static pte_t __init *early_hugepd_alloc_kernel(hugepd_t *pmdp, unsigned long va)
+{
+ if (hpd_val(*pmdp) == 0) {
+ pte_t *ptep = memblock_alloc(sizeof(pte_basic_t), SZ_4K);
+
+ if (!ptep)
+ return NULL;
+
+ hugepd_populate_kernel((hugepd_t *)pmdp, ptep, PAGE_SHIFT_8M);
+ hugepd_populate_kernel((hugepd_t *)pmdp + 1, ptep, PAGE_SHIFT_8M);
+ }
+ return hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT);
+}
+
+static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
+ pgprot_t prot, int psize, bool new)
+{
+ pmd_t *pmdp = pmd_ptr_k(va);
+ pte_t *ptep;
+
+ if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M))
+ return -EINVAL;
+
+ if (new) {
+ if (WARN_ON(slab_is_available()))
+ return -EINVAL;
+
+ if (psize == MMU_PAGE_512K)
+ ptep = early_pte_alloc_kernel(pmdp, va);
+ else
+ ptep = early_hugepd_alloc_kernel((hugepd_t *)pmdp, va);
+ } else {
+ if (psize == MMU_PAGE_512K)
+ ptep = pte_offset_kernel(pmdp, va);
+ else
+ ptep = hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT);
+ }
+
+ if (WARN_ON(!ptep))
+ return -ENOMEM;
+
+ /* The PTE should never be already present */
+ if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
+ return -EINVAL;
+
+ set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)));
+
+ return 0;
+}
+
/*
* MMU_init_hw does the chip-specific initialization of the MMU hardware.
*/
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bd0cb6e3573e..05902bbff8d6 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -61,7 +61,7 @@ static void __init *early_alloc_pgtable(unsigned long size)
return ptr;
}
-static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
+pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
{
if (pmd_none(*pmdp)) {
pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index a3169677dced..b3185b32793d 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -11,6 +11,11 @@
static const struct flag_info flag_array[] = {
{
+ .mask = _PAGE_HUGE,
+ .val = _PAGE_HUGE,
+ .set = "h",
+ .clear = " ",
+ }, {
.mask = _PAGE_RO | _PAGE_NA,
.val = 0,
.set = "rw",
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 5774a55a9c58..e3fb0ef5129f 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -56,6 +56,7 @@ config PPC_8xx
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
select HAVE_ARCH_VMAP_STACK
+ select HUGETLBFS
config 40x
bool "AMCC 40x"
--
2.25.0
^ permalink raw reply related
* [PATCH v3 37/45] powerpc/8xx: Refactor kernel address boundary comparison
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Now that linear and IMMR dedicated TLB handling is gone, kernel
boundary address comparison is similar in ITLB miss handler and
in DTLB miss handler.
Create a macro named compare_to_kernel_boundary.
When TASK_SIZE is strictly below 0x80000000 and PAGE_OFFSET is
above 0x80000000, it is enough to compare to 0x8000000, and this
can be done with a single instruction.
Using not. instruction, we get to use 'blt' conditional branch as
when doing a regular comparison:
0x00000000 <= addr <= 0x7fffffff ==>
0xffffffff >= NOT(addr) >= 0x80000000
The above test corresponds to a 'blt'
Otherwise, do a regular comparison using two instructions.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/head_8xx.S | 22 ++++++++--------------
1 file changed, 8 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9f3f7f3d03a7..9a117b9f0998 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -32,10 +32,15 @@
#include "head_32.h"
+.macro compare_to_kernel_boundary scratch, addr
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
-#define SIMPLE_KERNEL_ADDRESS 1
+ not. \scratch, \addr
+#else
+ rlwinm \scratch, \addr, 16, 0xfff8
+ cmpli cr0, \scratch, PAGE_OFFSET@h
#endif
+.endm
/*
* We need an ITLB miss handler for kernel addresses if:
@@ -209,20 +214,11 @@ InstructionTLBMiss:
mtspr SPRN_MD_EPN, r10
#ifdef ITLB_MISS_KERNEL
mfcr r11
-#if defined(SIMPLE_KERNEL_ADDRESS)
- cmpi cr0, r10, 0 /* Address >= 0x80000000 */
-#else
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
-#endif
+ compare_to_kernel_boundary r10, r10
#endif
mfspr r10, SPRN_M_TWB /* Get level 1 table */
#ifdef ITLB_MISS_KERNEL
-#if defined(SIMPLE_KERNEL_ADDRESS)
- bge+ 3f
-#else
blt+ 3f
-#endif
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
@@ -288,9 +284,7 @@ DataStoreTLBMiss:
* kernel page tables.
*/
mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
-
+ compare_to_kernel_boundary r10, r10
mfspr r10, SPRN_M_TWB /* Get level 1 table */
blt+ 3f
rlwinm r10, r10, 0, 20, 31
--
2.25.0
^ permalink raw reply related
* [PATCH v3 36/45] powerpc/mm: Don't be too strict with _etext alignment on PPC32
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Similar to PPC64, accept to map RO data as ROX as a trade off between
between security and memory usage.
Having RO data executable is not a high risk as RO data can't be
modified to forge an exploit.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/Kconfig | 26 --------------------------
arch/powerpc/kernel/vmlinux.lds.S | 3 +--
2 files changed, 1 insertion(+), 28 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f552726c9de2..970a5802850f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -777,32 +777,6 @@ config THREAD_SHIFT
Used to define the stack size. The default is almost always what you
want. Only change this if you know what you are doing.
-config ETEXT_SHIFT_BOOL
- bool "Set custom etext alignment" if STRICT_KERNEL_RWX && \
- (PPC_BOOK3S_32 || PPC_8xx)
- depends on ADVANCED_OPTIONS
- help
- This option allows you to set the kernel end of text alignment. When
- RAM is mapped by blocks, the alignment needs to fit the size and
- number of possible blocks. The default should be OK for most configs.
-
- Say N here unless you know what you are doing.
-
-config ETEXT_SHIFT
- int "_etext shift" if ETEXT_SHIFT_BOOL
- range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
- range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
- default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
- default 19 if STRICT_KERNEL_RWX && PPC_8xx
- default PPC_PAGE_SHIFT
- help
- On Book3S 32 (603+), IBATs are used to map kernel text.
- Smaller is the alignment, greater is the number of necessary IBATs.
-
- On 8xx, large pages (512kb or 8M) are used to map kernel linear
- memory. Aligning to 8M reduces TLB misses as only 8M pages are used
- in that case.
-
config DATA_SHIFT_BOOL
bool "Set custom data alignment" if STRICT_KERNEL_RWX && \
(PPC_BOOK3S_32 || PPC_8xx)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 31a0f201fb6f..54f23205c2b9 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -15,7 +15,6 @@
#include <asm/thread_info.h>
#define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT)
-#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT)
ENTRY(_stext)
@@ -116,7 +115,7 @@ SECTIONS
} :text
- . = ALIGN(ETEXT_ALIGN_SIZE);
+ . = ALIGN(PAGE_SIZE);
_etext = .;
PROVIDE32 (etext = .);
--
2.25.0
^ permalink raw reply related
* [PATCH v3 35/45] powerpc/8xx: Move DTLB perf handling closer.
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Now that space have been freed next to the DTLB miss handler,
it's associated DTLB perf handling can be brought back in
the same place.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/head_8xx.S | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index fb5d17187772..9f3f7f3d03a7 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -344,6 +344,17 @@ DataStoreTLBMiss:
rfi
patch_site 0b, patch__dtlbmiss_exit_1
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
+ rfi
+#endif
+
/* This is an instruction TLB error on the MPC8xx. This could be due
* to many reasons, such as executing guarded memory or illegal instruction
* addresses. There is nothing to do but handle a big time error fault.
@@ -390,18 +401,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
-/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
-#ifdef CONFIG_PERF_EVENTS
- patch_site 0f, patch__dtlbmiss_perf
-0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- addi r10, r10, 1
- stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_M_TW
- rfi
-#endif
-
stack_overflow:
vmap_stack_overflow_exception
--
2.25.0
^ permalink raw reply related
* [PATCH v3 34/45] powerpc/8xx: Remove now unused TLB miss functions
From: Christophe Leroy @ 2020-05-11 11:26 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
The code to setup linear and IMMR mapping via huge TLB entries is
not called anymore. Remove it.
Also remove the handling of removed code exits in the perf driver.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 8 +-
arch/powerpc/kernel/head_8xx.S | 83 --------------------
arch/powerpc/perf/8xx-pmu.c | 10 ---
3 files changed, 1 insertion(+), 100 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 4d3ef3841b00..e82368838416 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -240,13 +240,7 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
}
/* patch sites */
-extern s32 patch__itlbmiss_linmem_top, patch__itlbmiss_linmem_top8;
-extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp;
-extern s32 patch__fixupdar_linmem_top;
-extern s32 patch__dtlbmiss_romem_top, patch__dtlbmiss_romem_top8;
-
-extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2;
-extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3;
+extern s32 patch__itlbmiss_exit_1, patch__dtlbmiss_exit_1;
extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index d1546f379757..fb5d17187772 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -278,33 +278,6 @@ InstructionTLBMiss:
rfi
#endif
-#ifndef CONFIG_PIN_TLB_TEXT
-ITLBMissLinear:
- mtcr r11
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23
- patch_site 0f, patch__itlbmiss_linmem_top8
-
- mfspr r10, SPRN_SRR0
-0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
- rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
- ori r11, r11, MI_PS512K | MI_SVALID
- rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
-#else
- /* Set 8M byte page and mark it valid */
- li r11, MI_PS8MEG | MI_SVALID
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
-#endif
- mtspr SPRN_MI_TWC, r11
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
-
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
- patch_site 0b, patch__itlbmiss_exit_2
-#endif
-
. = 0x1200
DataStoreTLBMiss:
mtspr SPRN_DAR, r10
@@ -371,62 +344,6 @@ DataStoreTLBMiss:
rfi
patch_site 0b, patch__dtlbmiss_exit_1
-DTLBMissIMMR:
- mtcr r11
- /* Set 512k byte guarded page and mark it valid */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID
- mtspr SPRN_MD_TWC, r10
- mfspr r10, SPRN_IMMR /* Get current IMMR */
- rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT | _PAGE_NO_CACHE
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
-
-0: mfspr r10, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_M_TW
- rfi
- patch_site 0b, patch__dtlbmiss_exit_2
-
-DTLBMissLinear:
- mtcr r11
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23
- patch_site 0f, patch__dtlbmiss_romem_top8
-
-0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
- rlwinm r11, r11, 0, 0xff800000
- neg r10, r11
- or r11, r11, r10
- rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
- ori r11, r11, MI_PS512K | MI_SVALID
- mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
-#else
- /* Set 8M byte page and mark it valid */
- li r11, MD_PS8MEG | MD_SVALID
-#endif
- mtspr SPRN_MD_TWC, r11
-#ifdef CONFIG_STRICT_KERNEL_RWX
- patch_site 0f, patch__dtlbmiss_romem_top
-
-0: subis r11, r10, 0
- rlwimi r10, r11, 11, _PAGE_RO
-#endif
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
-
-0: mfspr r10, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_M_TW
- rfi
- patch_site 0b, patch__dtlbmiss_exit_3
-
/* This is an instruction TLB error on the MPC8xx. This could be due
* to many reasons, such as executing guarded memory or illegal instruction
* addresses. There is nothing to do but handle a big time error fault.
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 1ad03c55c88c..0c56b42a4f91 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -99,9 +99,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
unsigned long target = patch_site_addr(&patch__itlbmiss_perf);
patch_branch_site(&patch__itlbmiss_exit_1, target, 0);
-#ifndef CONFIG_PIN_TLB_TEXT
- patch_branch_site(&patch__itlbmiss_exit_2, target, 0);
-#endif
}
val = itlb_miss_counter;
break;
@@ -110,8 +107,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
unsigned long target = patch_site_addr(&patch__dtlbmiss_perf);
patch_branch_site(&patch__dtlbmiss_exit_1, target, 0);
- patch_branch_site(&patch__dtlbmiss_exit_2, target, 0);
- patch_branch_site(&patch__dtlbmiss_exit_3, target, 0);
}
val = dtlb_miss_counter;
break;
@@ -174,9 +169,6 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
__PPC_SPR(SPRN_SPRG_SCRATCH0);
patch_instruction_site(&patch__itlbmiss_exit_1, insn);
-#ifndef CONFIG_PIN_TLB_TEXT
- patch_instruction_site(&patch__itlbmiss_exit_2, insn);
-#endif
}
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
@@ -186,8 +178,6 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
__PPC_SPR(SPRN_DAR);
patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
- patch_instruction_site(&patch__dtlbmiss_exit_2, insn);
- patch_instruction_site(&patch__dtlbmiss_exit_3, insn);
}
break;
}
--
2.25.0
^ permalink raw reply related
* [PATCH v3 33/45] powerpc/8xx: Drop special handling of Linear and IMMR mappings in I/D TLB handlers
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Up to now, linear and IMMR mappings are managed via huge TLB entries
through specific code directly in TLB miss handlers. This implies
some patching of the TLB miss handlers at startup, and a lot of
dedicated code.
Remove all this specific dedicated code.
For now we are back to normal handling via standard 4k pages. In the
next patches, linear memory mapping and IMMR mapping will be managed
through huge pages.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/head_8xx.S | 29 +--------
arch/powerpc/mm/nohash/8xx.c | 106 +--------------------------------
2 files changed, 3 insertions(+), 132 deletions(-)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index b0cceee6405c..d1546f379757 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -207,31 +207,21 @@ InstructionTLBMiss:
mfspr r10, SPRN_SRR0 /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10)
mtspr SPRN_MD_EPN, r10
- /* Only modules will cause ITLB Misses as we always
- * pin the first 8MB of kernel memory */
#ifdef ITLB_MISS_KERNEL
mfcr r11
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+#if defined(SIMPLE_KERNEL_ADDRESS)
cmpi cr0, r10, 0 /* Address >= 0x80000000 */
#else
rlwinm r10, r10, 16, 0xfff8
cmpli cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_TEXT
- /* It is assumed that kernel code fits into the first 32M */
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
- patch_site 0b, patch__itlbmiss_linmem_top
-#endif
#endif
#endif
mfspr r10, SPRN_M_TWB /* Get level 1 table */
#ifdef ITLB_MISS_KERNEL
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+#if defined(SIMPLE_KERNEL_ADDRESS)
bge+ 3f
#else
blt+ 3f
-#endif
-#ifndef CONFIG_PIN_TLB_TEXT
- blt cr7, ITLBMissLinear
#endif
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
@@ -327,19 +317,9 @@ DataStoreTLBMiss:
mfspr r10, SPRN_MD_EPN
rlwinm r10, r10, 16, 0xfff8
cmpli cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_IMMR
- cmpli cr6, r10, VIRT_IMMR_BASE@h
-#endif
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
- patch_site 0b, patch__dtlbmiss_linmem_top
mfspr r10, SPRN_M_TWB /* Get level 1 table */
blt+ 3f
-#ifndef CONFIG_PIN_TLB_IMMR
-0: beq- cr6, DTLBMissIMMR
- patch_site 0b, patch__dtlbmiss_immr_jmp
-#endif
- blt cr7, DTLBMissLinear
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
@@ -571,14 +551,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
cmpli cr1, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TWB /* Get level 1 table */
blt+ cr1, 3f
- rlwinm r11, r10, 16, 0xfff8
-
-0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
- patch_site 0b, patch__fixupdar_linmem_top
/* create physical page address from effective address */
tophys(r11, r10)
- blt- cr7, 201f
mfspr r11, SPRN_M_TWB /* Get level 1 table */
rlwinm r11, r11, 0, 20, 31
oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 43578a8a8cad..d9f205d9a654 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -54,8 +54,6 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-#define LARGE_PAGE_SIZE_8M (1<<23)
-
/*
* MMU_init_hw does the chip-specific initialization of the MMU hardware.
*/
@@ -80,122 +78,20 @@ void __init mmu_mapin_immr(void)
map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
}
-static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
-{
- modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16);
-}
-
-static void mmu_patch_addis(s32 *site, long simm)
-{
- unsigned int instr = *(unsigned int *)patch_site_addr(site);
-
- instr &= 0xffff0000;
- instr |= ((unsigned long)simm) >> 16;
- patch_instruction_site(site, instr);
-}
-
-static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
-{
- unsigned long s = offset;
- unsigned long v = PAGE_OFFSET + s;
- phys_addr_t p = memstart_addr + s;
-
- for (; s < top; s += PAGE_SIZE) {
- map_kernel_page(v, p, prot);
- v += PAGE_SIZE;
- p += PAGE_SIZE;
- }
-}
-
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
- unsigned long mapped;
-
mmu_mapin_immr();
- if (__map_without_ltlbs) {
- mapped = 0;
- if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR))
- patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
- if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
- mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
- } else {
- unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
-
- mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
- if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
- mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8);
-
- /*
- * Populate page tables to:
- * - have them appear in /sys/kernel/debug/kernel_page_tables
- * - allow the BDI to find the pages when they are not PINNED
- */
- mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X);
- mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL);
- }
-
- mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
- mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped);
-
- /* If the size of RAM is not an exact power of two, we may not
- * have covered RAM in its entirety with 8 MiB
- * pages. Consequently, restrict the top end of RAM currently
- * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
- * coverage with normal-sized pages (or other reasons) do not
- * attempt to allocate outside the allowed range.
- */
- if (mapped)
- memblock_set_current_limit(mapped);
-
- block_mapped_ram = mapped;
-
- return mapped;
+ return 0;
}
void mmu_mark_initmem_nx(void)
{
- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
- mmu_patch_addis(&patch__itlbmiss_linmem_top8,
- -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
- if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) {
- unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
- unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
- unsigned long etext = __pa(_etext);
-
- mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
-
- /* Update page tables for PTDUMP and BDI */
- mmu_mapin_ram_chunk(0, einittext8, __pgprot(0));
- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
- mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT);
- mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL);
- } else {
- mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT);
- mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL);
- }
- }
- _tlbil_all();
}
#ifdef CONFIG_STRICT_KERNEL_RWX
void mmu_mark_rodata_ro(void)
{
- unsigned long sinittext = __pa(_sinittext);
- unsigned long etext = __pa(_etext);
-
- if (CONFIG_DATA_SHIFT < 23)
- mmu_patch_addis(&patch__dtlbmiss_romem_top8,
- -__pa(((unsigned long)_sinittext) &
- ~(LARGE_PAGE_SIZE_8M - 1)));
- mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
-
- _tlbil_all();
-
- /* Update page tables for PTDUMP and BDI */
- mmu_mapin_ram_chunk(0, sinittext, __pgprot(0));
- mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX);
- mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO);
}
#endif
--
2.25.0
^ permalink raw reply related
* [PATCH v3 27/45] powerpc/8xx: Only 8M pages are hugepte pages now
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
512k pages are now standard pages, so only 8M pages
are hugepte.
No more handling of normal page tables through hugepd allocation
and freeing, and hugepte helpers can also be simplified.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 7 +++----
arch/powerpc/mm/hugetlbpage.c | 16 +++-------------
2 files changed, 6 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index 785437323576..1c7d4693a78e 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -13,13 +13,13 @@ static inline pte_t *hugepd_page(hugepd_t hpd)
static inline unsigned int hugepd_shift(hugepd_t hpd)
{
- return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17;
+ return PAGE_SHIFT_8M;
}
static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned int pdshift)
{
- unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT;
+ unsigned long idx = (addr & (SZ_4M - 1)) >> PAGE_SHIFT;
return hugepd_page(hpd) + idx;
}
@@ -32,8 +32,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
{
- *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT |
- (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K));
+ *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | _PMD_PAGE_8M);
}
static inline int check_and_get_huge_psize(int shift)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 35eb29584b54..243e90db400c 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -54,24 +54,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (pshift >= pdshift) {
cachep = PGT_CACHE(PTE_T_ORDER);
num_hugepd = 1 << (pshift - pdshift);
- new = NULL;
- } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
- cachep = NULL;
- num_hugepd = 1;
- new = pte_alloc_one(mm);
} else {
cachep = PGT_CACHE(pdshift - pshift);
num_hugepd = 1;
- new = NULL;
}
- if (!cachep && !new) {
+ if (!cachep) {
WARN_ONCE(1, "No page table cache created for hugetlb tables");
return -ENOMEM;
}
- if (cachep)
- new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
+ new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -102,10 +95,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (i < num_hugepd) {
for (i = i - 1 ; i >= 0; i--, hpdp--)
*hpdp = __hugepd(0);
- if (cachep)
- kmem_cache_free(cachep, new);
- else
- pte_free(mm, new);
+ kmem_cache_free(cachep, new);
} else {
kmemleak_ignore(new);
}
--
2.25.0
^ permalink raw reply related
* [PATCH v3 32/45] powerpc/8xx: Always pin TLBs at startup.
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
At startup, map 32 Mbytes of memory through 4 pages of 8M,
and PIN them inconditionnaly. They need to be pinned because
KASAN is using page tables early and the TLBs might be
dynamically replaced otherwise.
Remove RSV4I flag after installing mappings unless
CONFIG_PIN_TLB_XXXX is selected.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/head_8xx.S | 31 +++++++++++++++++--------------
arch/powerpc/mm/nohash/8xx.c | 19 +------------------
2 files changed, 18 insertions(+), 32 deletions(-)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index d607f4b53e0f..b0cceee6405c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -765,6 +765,14 @@ start_here:
mtspr SPRN_MD_RPN, r0
lis r0, (MD_TWAM | MD_RSV4I)@h
mtspr SPRN_MD_CTR, r0
+#endif
+#ifndef CONFIG_PIN_TLB_TEXT
+ li r0, 0
+ mtspr SPRN_MI_CTR, r0
+#endif
+#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR)
+ lis r0, MD_TWAM@h
+ mtspr SPRN_MD_CTR, r0
#endif
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
@@ -802,10 +810,6 @@ initial_mmu:
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB_DATA
- oris r10, r10, MD_RSV4I@h
- mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
-#endif
lis r8, MI_APG_INIT@h /* Set protection modes */
ori r8, r8, MI_APG_INIT@l
@@ -814,33 +818,32 @@ initial_mmu:
ori r8, r8, MD_APG_INIT@l
mtspr SPRN_MD_AP, r8
- /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
-#ifdef CONFIG_PIN_TLB_TEXT
+ /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */
lis r8, MI_RSV4I@h
ori r8, r8, 0x1c00
-#endif
+ oris r12, r10, MD_RSV4I@h
+ ori r12, r12, 0x1c00
li r9, 4 /* up to 4 pages of 8M */
mtctr r9
lis r9, KERNELBASE@h /* Create vaddr for TLB */
li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
li r11, MI_BOOTINIT /* Create RPN for address 0 */
- lis r12, _einittext@h
- ori r12, r12, _einittext@l
1:
-#ifdef CONFIG_PIN_TLB_TEXT
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
addi r8, r8, 0x100
-#endif
-
ori r0, r9, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r0
mtspr SPRN_MI_TWC, r10
mtspr SPRN_MI_RPN, r11 /* Store TLB entry */
+ mtspr SPRN_MD_CTR, r12
+ addi r12, r12, 0x100
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r10
+ mtspr SPRN_MD_RPN, r11
addis r9, r9, 0x80
addis r11, r11, 0x80
- cmpl cr0, r9, r12
- bdnzf gt, 1b
+ bdnz 1b
/* Since the cache is enabled according to the information we
* just loaded into the TLB, invalidate and enable the caches here.
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index d54d395c3378..43578a8a8cad 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -61,23 +61,6 @@ unsigned long p_block_mapped(phys_addr_t pa)
*/
void __init MMU_init_hw(void)
{
- /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
- if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) {
- unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
- unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
- int i = 28;
- unsigned long addr = 0;
- unsigned long mem = total_lowmem;
-
- for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
- mtspr(SPRN_MD_CTR, ctr | (i << 8));
- mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
- mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
- mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
- addr += LARGE_PAGE_SIZE_8M;
- mem -= LARGE_PAGE_SIZE_8M;
- }
- }
}
static bool immr_is_mapped __initdata;
@@ -225,7 +208,7 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
BUG_ON(first_memblock_base != 0);
/* 8xx can only access 32MB at the moment */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000));
+ memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
}
/*
--
2.25.0
^ permalink raw reply related
* [PATCH v3 31/45] powerpc/8xx: Don't set IMMR map anymore at boot
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Only early debug requires IMMR to be mapped early.
No need to set it up and pin it in assembly. Map it
through page tables at udbg init when necessary.
If CONFIG_PIN_TLB_IMMR is selected, pin it once we
don't need the 32 Mb pinned RAM anymore.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Disable TLB reservation to modify entry 31
---
arch/powerpc/kernel/head_8xx.S | 39 +++++++++++++-----------------
arch/powerpc/mm/mmu_decl.h | 4 +++
arch/powerpc/mm/nohash/8xx.c | 15 +++++++++---
arch/powerpc/platforms/8xx/Kconfig | 2 +-
arch/powerpc/sysdev/cpm_common.c | 2 ++
5 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c9e3d54e6a6f..d607f4b53e0f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -749,6 +749,23 @@ start_here:
rfi
/* Load up the kernel context */
2:
+#ifdef CONFIG_PIN_TLB_IMMR
+ lis r0, MD_TWAM@h
+ oris r0, r0, 0x1f00
+ mtspr SPRN_MD_CTR, r0
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ tlbie r0
+ mtspr SPRN_MD_EPN, r0
+ LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED)
+ mtspr SPRN_MD_TWC, r0
+ mfspr r0, SPRN_IMMR
+ rlwinm r0, r0, 0, 0xfff80000
+ ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_RPN, r0
+ lis r0, (MD_TWAM | MD_RSV4I)@h
+ mtspr SPRN_MD_CTR, r0
+#endif
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
@@ -797,28 +814,6 @@ initial_mmu:
ori r8, r8, MD_APG_INIT@l
mtspr SPRN_MD_AP, r8
- /* Map a 512k page for the IMMR to get the processor
- * internal registers (among other things).
- */
-#ifdef CONFIG_PIN_TLB_IMMR
- oris r10, r10, MD_RSV4I@h
- ori r10, r10, 0x1c00
- mtspr SPRN_MD_CTR, r10
-
- mfspr r9, 638 /* Get current IMMR */
- andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */
-
- lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */
- ori r8, r8, MD_EVALID /* Mark it valid */
- mtspr SPRN_MD_EPN, r8
- li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID /* Make it valid */
- mtspr SPRN_MD_TWC, r8
- mr r8, r9 /* Create paddr for TLB */
- ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
- mtspr SPRN_MD_RPN, r8
-#endif
-
/* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
#ifdef CONFIG_PIN_TLB_TEXT
lis r8, MI_RSV4I@h
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7097e07a209a..1b6d39e9baed 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -182,6 +182,10 @@ static inline void mmu_mark_initmem_nx(void) { }
static inline void mmu_mark_rodata_ro(void) { }
#endif
+#ifdef CONFIG_PPC_8xx
+void __init mmu_mapin_immr(void);
+#endif
+
#ifdef CONFIG_PPC_DEBUG_WX
void ptdump_check_wx(void);
#else
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index d83a12c5bc7f..d54d395c3378 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -65,7 +65,7 @@ void __init MMU_init_hw(void)
if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) {
unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
- int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28;
+ int i = 28;
unsigned long addr = 0;
unsigned long mem = total_lowmem;
@@ -80,12 +80,19 @@ void __init MMU_init_hw(void)
}
}
-static void __init mmu_mapin_immr(void)
+static bool immr_is_mapped __initdata;
+
+void __init mmu_mapin_immr(void)
{
unsigned long p = PHYS_IMMR_BASE;
unsigned long v = VIRT_IMMR_BASE;
int offset;
+ if (immr_is_mapped)
+ return;
+
+ immr_is_mapped = true;
+
for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
}
@@ -121,9 +128,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long mapped;
+ mmu_mapin_immr();
+
if (__map_without_ltlbs) {
mapped = 0;
- mmu_mapin_immr();
if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR))
patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
@@ -142,7 +150,6 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
*/
mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X);
mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL);
- mmu_mapin_immr();
}
mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 0d036cd868ef..04ea1a8a0bdc 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -187,7 +187,7 @@ config PIN_TLB_DATA
config PIN_TLB_IMMR
bool "Pinned TLB for IMMR"
- depends on PIN_TLB || PPC_EARLY_DEBUG_CPM
+ depends on PIN_TLB
default y
help
This pins the IMMR area with a 512kbytes page. In case
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 71660bacb264..7dc1960f8bdb 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -68,6 +68,8 @@ static void udbg_putc_cpm(char c)
void __init udbg_init_cpm(void)
{
#ifdef CONFIG_PPC_8xx
+ mmu_mapin_immr();
+
cpm_udbg_txdesc = (u32 __iomem __force *)
(CONFIG_PPC_EARLY_DEBUG_CPM_ADDR - PHYS_IMMR_BASE +
VIRT_IMMR_BASE);
--
2.25.0
^ permalink raw reply related
* [PATCH v3 28/45] powerpc/8xx: MM_SLICE is not needed anymore
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
As the 8xx now manages 512k pages in standard page tables,
it doesn't need CONFIG_PPC_MM_SLICES anymore.
Don't select it anymore and remove all related code.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 64 --------------------
arch/powerpc/include/asm/nohash/32/slice.h | 20 ------
arch/powerpc/include/asm/slice.h | 2 -
arch/powerpc/platforms/Kconfig.cputype | 1 -
4 files changed, 87 deletions(-)
delete mode 100644 arch/powerpc/include/asm/nohash/32/slice.h
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 26b7cee34dfe..a092e6434bda 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -176,12 +176,6 @@
*/
#define SPRN_M_TW 799
-#ifdef CONFIG_PPC_MM_SLICES
-#include <asm/nohash/32/slice.h>
-#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
-#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE
-#endif
-
#if defined(CONFIG_PPC_4K_PAGES)
#define mmu_virtual_psize MMU_PAGE_4K
#elif defined(CONFIG_PPC_16K_PAGES)
@@ -199,71 +193,13 @@
#include <linux/mmdebug.h>
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, 0);
-};
-
typedef struct {
unsigned int id;
unsigned int active;
unsigned long vdso_base;
-#ifdef CONFIG_PPC_MM_SLICES
- u16 user_psize; /* page size index */
- unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
- unsigned char high_slices_psize[0];
- unsigned long slb_addr_limit;
- struct slice_mask mask_base_psize; /* 4k or 16k */
- struct slice_mask mask_512k;
- struct slice_mask mask_8m;
-#endif
void *pte_frag;
} mm_context_t;
-#ifdef CONFIG_PPC_MM_SLICES
-static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
-{
- return ctx->user_psize;
-}
-
-static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
-{
- ctx->user_psize = user_psize;
-}
-
-static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
-{
- return ctx->low_slices_psize;
-}
-
-static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
-{
- return ctx->high_slices_psize;
-}
-
-static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
-{
- return ctx->slb_addr_limit;
-}
-
-static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
-{
- ctx->slb_addr_limit = limit;
-}
-
-static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
-{
- if (psize == MMU_PAGE_512K)
- return &ctx->mask_512k;
- if (psize == MMU_PAGE_8M)
- return &ctx->mask_8m;
-
- BUG_ON(psize != mmu_virtual_psize);
-
- return &ctx->mask_base_psize;
-}
-#endif /* CONFIG_PPC_MM_SLICE */
-
#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
deleted file mode 100644
index 39eb0154ae2d..000000000000
--- a/arch/powerpc/include/asm/nohash/32/slice.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H
-#define _ASM_POWERPC_NOHASH_32_SLICE_H
-
-#ifdef CONFIG_PPC_MM_SLICES
-
-#define SLICE_LOW_SHIFT 26 /* 64 slices */
-#define SLICE_LOW_TOP (0x100000000ull)
-#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
-
-#define SLICE_HIGH_SHIFT 0
-#define SLICE_NUM_HIGH 0ul
-#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
-
-#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW
-
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
index c6f466f4c241..0bdd9c62eca0 100644
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -4,8 +4,6 @@
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/slice.h>
-#elif defined(CONFIG_PPC_MMU_NOHASH_32)
-#include <asm/nohash/32/slice.h>
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 27a81c291be8..5774a55a9c58 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -55,7 +55,6 @@ config PPC_8xx
select SYS_SUPPORTS_HUGETLBFS
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
- select PPC_MM_SLICES if HUGETLB_PAGE
select HAVE_ARCH_VMAP_STACK
config 40x
--
2.25.0
^ permalink raw reply related
* [PATCH v3 29/45] powerpc/8xx: Move PPC_PIN_TLB options into 8xx Kconfig
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
PPC_PIN_TLB options are dedicated to the 8xx, move them into
the 8xx Kconfig.
While we are at it, add some text to explain what it does.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/Kconfig | 20 ---------------
arch/powerpc/platforms/8xx/Kconfig | 41 ++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8324d98728db..f552726c9de2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1226,26 +1226,6 @@ config TASK_SIZE
hex "Size of user task space" if TASK_SIZE_BOOL
default "0x80000000" if PPC_8xx
default "0xc0000000"
-
-config PIN_TLB
- bool "Pinned Kernel TLBs (860 ONLY)"
- depends on ADVANCED_OPTIONS && PPC_8xx && \
- !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX
-
-config PIN_TLB_DATA
- bool "Pinned TLB for DATA"
- depends on PIN_TLB
- default y
-
-config PIN_TLB_IMMR
- bool "Pinned TLB for IMMR"
- depends on PIN_TLB || PPC_EARLY_DEBUG_CPM
- default y
-
-config PIN_TLB_TEXT
- bool "Pinned TLB for TEXT"
- depends on PIN_TLB
- default y
endmenu
if PPC64
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index b37de62d7e7f..0d036cd868ef 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -162,4 +162,45 @@ config UCODE_PATCH
default y
depends on !NO_UCODE_PATCH
+menu "8xx advanced setup"
+ depends on PPC_8xx
+
+config PIN_TLB
+ bool "Pinned Kernel TLBs"
+ depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX
+ help
+ On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each
+ table 4 TLBs can be pinned.
+
+ It reduces the amount of usable TLBs to 28 (ie by 12%). That's the
+ reason why we make it selectable.
+
+ This option does nothing, it just activate the selection of what
+ to pin.
+
+config PIN_TLB_DATA
+ bool "Pinned TLB for DATA"
+ depends on PIN_TLB
+ default y
+ help
+ This pins the first 32 Mbytes of memory with 8M pages.
+
+config PIN_TLB_IMMR
+ bool "Pinned TLB for IMMR"
+ depends on PIN_TLB || PPC_EARLY_DEBUG_CPM
+ default y
+ help
+ This pins the IMMR area with a 512kbytes page. In case
+ CONFIG_PIN_TLB_DATA is also selected, it will reduce
+ CONFIG_PIN_TLB_DATA to 24 Mbytes.
+
+config PIN_TLB_TEXT
+ bool "Pinned TLB for TEXT"
+ depends on PIN_TLB
+ default y
+ help
+ This pins kernel text with 8M pages.
+
+endmenu
+
endmenu
--
2.25.0
^ permalink raw reply related
* [PATCH v3 30/45] powerpc/8xx: Add function to set pinned TLBs
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
Pinned TLBs cannot be modified when the MMU is enabled.
Create a function to rewrite the pinned TLB entries with MMU off.
To set pinned TLB, we have to turn off MMU, disable pinning,
do a TLB flush (Either with tlbie and tlbia) then reprogam
the TLB entries, enable pinning and turn on MMU.
If using tlbie, it cleared entries in both instruction and data
TLB regardless whether pinning is disabled or not.
If using tlbia, it clears all entries of the TLB which has
disabled pinning.
To make it easy, just clear all entries in both TLBs, and
reprogram them.
The function takes two arguments, the top of the memory to
consider and whether data is RO under _sinittext.
When DEBUG_PAGEALLOC is set, the top is the end of kernel rodata.
Otherwise, that's the top of physical RAM.
Everything below _sinittext is set RX, over _sinittext that's RW.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Function rewritten to manage all entries at once.
---
arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 2 +
arch/powerpc/kernel/head_8xx.S | 103 +++++++++++++++++++
2 files changed, 105 insertions(+)
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index a092e6434bda..4d3ef3841b00 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -193,6 +193,8 @@
#include <linux/mmdebug.h>
+void mmu_pin_tlb(unsigned long top, bool readonly);
+
typedef struct {
unsigned int id;
unsigned int active;
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 423465b10c82..c9e3d54e6a6f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/magic.h>
+#include <linux/sizes.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -866,6 +867,108 @@ initial_mmu:
mtspr SPRN_DER, r8
blr
+#ifdef CONFIG_PIN_TLB
+_GLOBAL(mmu_pin_tlb)
+ lis r9, (1f - PAGE_OFFSET)@h
+ ori r9, r9, (1f - PAGE_OFFSET)@l
+ mfmsr r10
+ mflr r11
+ li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ rlwinm r0, r10, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
+ isync
+ .align 4
+ mtspr SPRN_SRR0, r9
+ mtspr SPRN_SRR1, r12
+ rfi
+1:
+ li r5, 0
+ lis r6, MD_TWAM@h
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MD_CTR, r6
+ tlbia
+
+#ifdef CONFIG_PIN_TLB_TEXT
+ LOAD_REG_IMMEDIATE(r5, 28 << 8)
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG)
+ LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+ LOAD_REG_ADDR(r9, _sinittext)
+ li r0, 4
+ mtctr r0
+
+2: ori r0, r6, MI_EVALID
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r7
+ mtspr SPRN_MI_RPN, r8
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+ lis r0, MI_RSV4I@h
+ mtspr SPRN_MI_CTR, r0
+#endif
+ LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
+#ifdef CONFIG_PIN_TLB_DATA
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG)
+#ifdef CONFIG_PIN_TLB_IMMR
+ li r0, 3
+#else
+ li r0, 4
+#endif
+ mtctr r0
+ cmpwi r4, 0
+ beq 4f
+ LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+ LOAD_REG_ADDR(r9, _sinittext)
+
+2: ori r0, r6, MD_EVALID
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r8
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+
+4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+2: ori r0, r6, MD_EVALID
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r8
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r3
+ bdnzt lt, 2b
+#endif
+#ifdef CONFIG_PIN_TLB_IMMR
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED)
+ mfspr r8, SPRN_IMMR
+ rlwinm r8, r8, 0, 0xfff80000
+ ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r8
+#endif
+#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+ lis r0, (MD_RSV4I | MD_TWAM)@h
+ mtspr SPRN_MI_CTR, r0
+#endif
+ mtspr SPRN_SRR1, r10
+ mtspr SPRN_SRR0, r11
+ rfi
+#endif /* CONFIG_PIN_TLB */
/*
* We put a few things here that have to be page-aligned.
--
2.25.0
^ permalink raw reply related
* [PATCH v3 26/45] powerpc/8xx: Manage 512k huge pages as standard pages.
From: Christophe Leroy @ 2020-05-11 11:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1589196133.git.christophe.leroy@csgroup.eu>
At the time being, 512k huge pages are handled through hugepd page
tables. The PMD entry is flagged as a hugepd pointer and it
means that only 512k hugepages can be managed in that 4M block.
However, the hugepd table has the same size as a normal page
table, and 512k entries can therefore be nested with normal pages.
On the 8xx, TLB loading is performed by software and allthough the
page tables are organised to match the L1 and L2 level defined by
the HW, all TLB entries have both L1 and L2 independent entries.
It means that even if two TLB entries are associated with the same
PMD entry, they can be loaded with different values in L1 part.
The L1 entry contains the page size (PS field):
- 00 for 4k and 16 pages
- 01 for 512k pages
- 11 for 8M pages
By adding a flag for hugepages in the PTE (_PAGE_HUGE) and copying it
into the lower bit of PS, we can then manage 512k pages with normal
page tables:
- PMD entry has PS=11 for 8M pages
- PMD entry has PS=00 for other pages.
As a PMD entry covers 4M areas, a PMD will either point to a hugepd
table having a single entry to an 8M page, or the PMD will point to
a standard page table which will have either entries to 4k or 16k or
512k pages. For 512k pages, as the L1 entry will not know it is a
512k page before the PTE is read, there will be 128 entries in the
PTE as if it was 4k pages. But when loading the TLB, it will be
flagged as a 512k page.
Note that we can't use pmd_ptr() in asm/nohash/32/pgtable.h because
it is not defined yet.
In ITLB miss, we keep the possibility to opt it out as when kernel
text is pinned and no user hugepages are used, we can save several
instruction by not using r11.
In DTLB miss, that's just one instruction so it's not worth bothering
with it.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++---
arch/powerpc/include/asm/nohash/32/pte-8xx.h | 4 +++-
arch/powerpc/include/asm/nohash/pgtable.h | 2 +-
arch/powerpc/kernel/head_8xx.S | 12 +++++------
arch/powerpc/mm/hugetlbpage.c | 22 +++++++++++++++++---
arch/powerpc/mm/pgtable.c | 10 ++++++++-
6 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 1a86d20b58f3..1504af38a9a8 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -229,8 +229,9 @@ static inline void pmd_clear(pmd_t *pmdp)
* those implementations.
*
* On the 8xx, the page tables are a bit special. For 16k pages, we have
- * 4 identical entries. For other page sizes, we have a single entry in the
- * table.
+ * 4 identical entries. For 512k pages, we have 128 entries as if it was
+ * 4k pages, but they are flagged as 512k pages for the hardware.
+ * For other page sizes, we have a single entry in the table.
*/
#ifdef CONFIG_PPC_8xx
static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
@@ -240,13 +241,16 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
pte_basic_t old = pte_val(*p);
pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
int num, i;
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
if (!huge)
num = PAGE_SIZE / SZ_4K;
+ else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M)
+ num = SZ_512K / SZ_4K;
else
num = 1;
- for (i = 0; i < num; i++, entry++)
+ for (i = 0; i < num; i++, entry++, new += SZ_4K)
*entry = new;
return old;
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index c9e4b2d90f65..66f403a7da44 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -46,6 +46,8 @@
#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
+#define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */
+
/* cache related flags non existing on 8xx */
#define _PAGE_COHERENT 0
#define _PAGE_WRITETHRU 0
@@ -128,7 +130,7 @@ static inline pte_t pte_mkuser(pte_t pte)
static inline pte_t pte_mkhuge(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SPS);
+ return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE);
}
#define pte_mkhuge pte_mkhuge
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 7fed9dc0f147..f27c967d9269 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -267,7 +267,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
static inline int hugepd_ok(hugepd_t hpd)
{
#ifdef CONFIG_PPC_8xx
- return ((hpd_val(hpd) & 0x4) != 0);
+ return ((hpd_val(hpd) & _PMD_PAGE_MASK) == _PMD_PAGE_8M);
#else
/* We clear the top bit to indicate hugepd */
return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0);
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index adad8baadcf5..423465b10c82 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -239,7 +239,6 @@ InstructionTLBMiss:
#endif
#ifdef CONFIG_HUGETLBFS
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
- mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
mtspr SPRN_MD_TWC, r11
#else
lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
@@ -248,6 +247,10 @@ InstructionTLBMiss:
#endif
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
+#ifdef CONFIG_HUGETLBFS
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
+ mtspr SPRN_MI_TWC, r11
+#endif
#ifdef CONFIG_SWAP
rlwinm r11, r10, 32-5, _PAGE_PRESENT
and r11, r11, r10
@@ -353,6 +356,7 @@ DataStoreTLBMiss:
* above.
*/
rlwimi r11, r10, 0, _PAGE_GUARDED
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MD_TWC, r11
/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
@@ -584,7 +588,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
mfspr r11, SPRN_MD_TWC
lwz r11, 0(r11) /* Get the pte */
bt 28,200f /* bit 28 = Large page (8M) */
- bt 29,202f /* bit 29 = Large page (8M or 512K) */
/* concat physical page address(r11) and page offset(r10) */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31
201: lwz r11,0(r11)
@@ -611,11 +614,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31
b 201b
-202:
- /* concat physical page address(r11) and page offset(r10) */
- rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31
- b 201b
-
144: mfspr r10, SPRN_DSISR
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index edf511c2a30a..35eb29584b54 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -189,6 +189,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (!hpdp)
return NULL;
+ if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K)
+ return pte_alloc_map(mm, (pmd_t *)hpdp, addr);
+
BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr,
@@ -331,13 +334,20 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift)
hugepd_free(tlb, hugepte);
- else if (IS_ENABLED(CONFIG_PPC_8xx))
- pgtable_free_tlb(tlb, hugepte, 0);
else
pgtable_free_tlb(tlb, hugepte,
get_hugepd_cache_index(pdshift - shift));
}
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr)
+{
+ pgtable_t token = pmd_pgtable(*pmd);
+
+ pmd_clear(pmd);
+ pte_free_tlb(tlb, token, addr);
+ mm_dec_nr_ptes(tlb->mm);
+}
+
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
@@ -353,11 +363,17 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+
/*
* if it is not hugepd pointer, we should already find
* it cleared.
*/
- WARN_ON(!pmd_none_or_clear_bad(pmd));
+ WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));
+
+ hugetlb_free_pte_range(tlb, pmd, addr);
+
continue;
}
/*
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 214a5f4beb6c..60c4b8ff046c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -264,6 +264,12 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
#if defined(CONFIG_PPC_8xx)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
{
+ pmd_t *pmd = pmd_ptr(mm, addr);
+ pte_basic_t val;
+ pte_basic_t *entry = &ptep->pte;
+ int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K;
+ int i;
+
/*
* Make sure hardware valid bit is not set. We don't do
* tlb flush for this update.
@@ -274,7 +280,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_
pte = set_pte_filter(pte);
- ptep->pte = pte_val(pte);
+ val = pte_val(pte);
+ for (i = 0; i < num; i++, entry++, val += SZ_4K)
+ *entry = val;
}
#endif
#endif /* CONFIG_HUGETLB_PAGE */
--
2.25.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox