* [PATCH v11 02/13] powerpc: prepare string/mem functions for KASAN
From: Christophe Leroy @ 2019-04-26 16:23 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin, Aneesh Kumar K.V, Andrey Ryabinin,
Alexander Potapenko, Dmitry Vyukov, Daniel Axtens
Cc: linux-mm, linuxppc-dev, linux-kernel, kasan-dev
In-Reply-To: <cover.1556295459.git.christophe.leroy@c-s.fr>
CONFIG_KASAN implements wrappers for memcpy() memmove() and memset()
Those wrappers are doing the verification then call respectively
__memcpy() __memmove() and __memset(). The arches are therefore
expected to rename their optimised functions that way.
For files on which KASAN is inhibited, #defines are used to allow
them to directly call optimised versions of the functions without
going through the KASAN wrappers.
See commit 393f203f5fd5 ("x86_64: kasan: add interceptors for
memset/memmove/memcpy functions") for details.
Other string / mem functions do not (yet) have kasan wrappers,
we therefore have to fallback to the generic versions when
KASAN is active, otherwise KASAN checks will be skipped.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/kasan.h | 15 +++++++++++++++
arch/powerpc/include/asm/string.h | 32 +++++++++++++++++++++++++++++---
arch/powerpc/kernel/prom_init_check.sh | 10 +++++++++-
arch/powerpc/lib/Makefile | 11 ++++++++---
arch/powerpc/lib/copy_32.S | 12 +++++++++---
arch/powerpc/lib/mem_64.S | 9 +++++++--
arch/powerpc/lib/memcpy_64.S | 4 +++-
7 files changed, 80 insertions(+), 13 deletions(-)
create mode 100644 arch/powerpc/include/asm/kasan.h
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
new file mode 100644
index 000000000000..2c179a39d4ba
--- /dev/null
+++ b/arch/powerpc/include/asm/kasan.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifdef CONFIG_KASAN
+#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn)
+#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn)
+#else
+#define _GLOBAL_KASAN(fn) _GLOBAL(fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn)
+#define EXPORT_SYMBOL_KASAN(fn)
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 1647de15a31e..9bf6dffb4090 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -4,14 +4,17 @@
#ifdef __KERNEL__
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_STRNCPY
#define __HAVE_ARCH_STRNCMP
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_MEMSET16
+#endif
+
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMMOVE
-#define __HAVE_ARCH_MEMCMP
-#define __HAVE_ARCH_MEMCHR
-#define __HAVE_ARCH_MEMSET16
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
extern char * strcpy(char *,const char *);
@@ -27,7 +30,27 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
extern void * memcpy_flushcache(void *,const void *,__kernel_size_t);
+void *__memset(void *s, int c, __kernel_size_t count);
+void *__memcpy(void *to, const void *from, __kernel_size_t n);
+void *__memmove(void *to, const void *from, __kernel_size_t n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
+
#ifdef CONFIG_PPC64
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
@@ -49,8 +72,11 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
{
return __memset64(p, v, n * 8);
}
+#endif
#else
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_STRLEN
+#endif
extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
#endif
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 667df97d2595..181fd10008ef 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -16,8 +16,16 @@
# If you really need to reference something from prom_init.o add
# it to the list below:
+grep "^CONFIG_KASAN=y$" .config >/dev/null
+if [ $? -eq 0 ]
+then
+ MEM_FUNCS="__memcpy __memset"
+else
+ MEM_FUNCS="memcpy memset"
+fi
+
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
-_end enter_prom memcpy memset reloc_offset __secondary_hold
+_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 79396e184bca..47a4de434c22 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -8,9 +8,14 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
-obj-y += string.o alloc.o code-patching.o feature-fixups.o
+obj-y += alloc.o code-patching.o feature-fixups.o
-obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o
+ifndef CONFIG_KASAN
+obj-y += string.o memcmp_$(BITS).o
+obj-$(CONFIG_PPC32) += strlen_32.o
+endif
+
+obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
@@ -34,7 +39,7 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
test_emulate_step_exec_instr.o
obj-y += checksum_$(BITS).o checksum_wrappers.o \
- string_$(BITS).o memcmp_$(BITS).o
+ string_$(BITS).o
obj-y += sstep.o ldstfp.o quad.o
obj64-y += quad.o
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index ba66846fe973..d5642481fb98 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -14,6 +14,7 @@
#include <asm/ppc_asm.h>
#include <asm/export.h>
#include <asm/code-patching-asm.h>
+#include <asm/kasan.h>
#define COPY_16_BYTES \
lwz r7,4(r4); \
@@ -68,6 +69,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
+#ifndef CONFIG_KASAN
_GLOBAL(memset16)
rlwinm. r0 ,r5, 31, 1, 31
addi r6, r3, -4
@@ -81,6 +83,7 @@ _GLOBAL(memset16)
sth r4, 4(r6)
blr
EXPORT_SYMBOL(memset16)
+#endif
/*
* Use dcbz on the complete cache lines in the destination
@@ -91,7 +94,7 @@ EXPORT_SYMBOL(memset16)
* We therefore skip the optimised bloc that uses dcbz. This jump is
* replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(memset)
+_GLOBAL_KASAN(memset)
cmplwi 0,r5,4
blt 7f
@@ -151,6 +154,7 @@ _GLOBAL(memset)
bdnz 9b
blr
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
/*
* This version uses dcbz on the complete cache lines in the
@@ -163,12 +167,12 @@ EXPORT_SYMBOL(memset)
* We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
* replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(memmove)
+_GLOBAL_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
/* fall through */
-_GLOBAL(memcpy)
+_GLOBAL_KASAN(memcpy)
1: b generic_memcpy
patch_site 1b, patch__memcpy_nocache
@@ -244,6 +248,8 @@ _GLOBAL(memcpy)
65: blr
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memcpy)
+EXPORT_SYMBOL_KASAN(memmove)
generic_memcpy:
srwi. r7,r5,3
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 3c3be02f33b7..7f6bd031c306 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -12,7 +12,9 @@
#include <asm/errno.h>
#include <asm/ppc_asm.h>
#include <asm/export.h>
+#include <asm/kasan.h>
+#ifndef CONFIG_KASAN
_GLOBAL(__memset16)
rlwimi r4,r4,16,0,15
/* fall through */
@@ -29,8 +31,9 @@ _GLOBAL(__memset64)
EXPORT_SYMBOL(__memset16)
EXPORT_SYMBOL(__memset32)
EXPORT_SYMBOL(__memset64)
+#endif
-_GLOBAL(memset)
+_GLOBAL_KASAN(memset)
neg r0,r3
rlwimi r4,r4,8,16,23
andi. r0,r0,7 /* # bytes to be 8-byte aligned */
@@ -96,8 +99,9 @@ _GLOBAL(memset)
stb r4,0(r6)
blr
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
-_GLOBAL_TOC(memmove)
+_GLOBAL_TOC_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
b memcpy
@@ -139,3 +143,4 @@ _GLOBAL(backwards_memcpy)
mtctr r7
b 1b
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 273ea67e60a1..25c3772c1dfb 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -11,6 +11,7 @@
#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
+#include <asm/kasan.h>
#ifndef SELFTEST_CASE
/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
@@ -18,7 +19,7 @@
#endif
.align 7
-_GLOBAL_TOC(memcpy)
+_GLOBAL_TOC_KASAN(memcpy)
BEGIN_FTR_SECTION
#ifdef __LITTLE_ENDIAN__
cmpdi cr7,r5,0
@@ -230,3 +231,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
#endif
EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL_KASAN(memcpy)
--
2.13.3
^ permalink raw reply related
* [PATCH v11 01/13] powerpc/32: Move early_init() in a separate file
From: Christophe Leroy @ 2019-04-26 16:23 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin, Aneesh Kumar K.V, Andrey Ryabinin,
Alexander Potapenko, Dmitry Vyukov, Daniel Axtens
Cc: linux-mm, linuxppc-dev, linux-kernel, kasan-dev
In-Reply-To: <cover.1556295459.git.christophe.leroy@c-s.fr>
In preparation of KASAN, move early_init() into a separate
file in order to allow deactivation of KASAN for that function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/Makefile | 2 +-
arch/powerpc/kernel/early_32.c | 36 ++++++++++++++++++++++++++++++++++++
arch/powerpc/kernel/setup_32.c | 28 ----------------------------
3 files changed, 37 insertions(+), 29 deletions(-)
create mode 100644 arch/powerpc/kernel/early_32.c
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cddadccf551d..45e47752b692 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -93,7 +93,7 @@ extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
new file mode 100644
index 000000000000..cf3cdd81dc47
--- /dev/null
+++ b/arch/powerpc/kernel/early_32.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Early init before relocation
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/asm-prototypes.h>
+
+/*
+ * We're called here very early in the boot.
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings). -- paulus
+ */
+notrace unsigned long __init early_init(unsigned long dt_ptr)
+{
+ unsigned long offset = reloc_offset();
+
+ /* First zero the BSS -- use memset_io, some platforms don't have caches on yet */
+ memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+
+ /*
+ * Identify the CPU type and fix up code sections
+ * that depend on which cpu we have.
+ */
+ identify_cpu(offset, mfspr(SPRN_PVR));
+
+ apply_feature_fixups();
+
+ return KERNELBASE + offset;
+}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 4a65e08a6042..3fb9f64f88fd 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -64,34 +64,6 @@ EXPORT_SYMBOL(DMA_MODE_READ);
EXPORT_SYMBOL(DMA_MODE_WRITE);
/*
- * We're called here very early in the boot.
- *
- * Note that the kernel may be running at an address which is different
- * from the address that it was linked at, so we must use RELOC/PTRRELOC
- * to access static data (including strings). -- paulus
- */
-notrace unsigned long __init early_init(unsigned long dt_ptr)
-{
- unsigned long offset = reloc_offset();
-
- /* First zero the BSS -- use memset_io, some platforms don't have
- * caches on yet */
- memset_io((void __iomem *)PTRRELOC(&__bss_start), 0,
- __bss_stop - __bss_start);
-
- /*
- * Identify the CPU type and fix up code sections
- * that depend on which cpu we have.
- */
- identify_cpu(offset, mfspr(SPRN_PVR));
-
- apply_feature_fixups();
-
- return KERNELBASE + offset;
-}
-
-
-/*
* This is run before start_kernel(), the kernel has been relocated
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
--
2.13.3
^ permalink raw reply related
* [PATCH v11 00/13] KASAN for powerpc/32
From: Christophe Leroy @ 2019-04-26 16:23 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin, Aneesh Kumar K.V, Andrey Ryabinin,
Alexander Potapenko, Dmitry Vyukov, Daniel Axtens
Cc: linux-mm, linuxppc-dev, linux-kernel, kasan-dev
This series adds KASAN support to powerpc/32
32 bits tested on nohash/32 (8xx), book3s/32 (mpc832x ie 603) and qemu mac99
Changes in v11:
- Dropped book3e RFC part.
- Rebased on latest powerpc merge branch (b251649c77625b7ad4430e518dc0f1608be9edf4).
Main impact is in head_32.S do to the merge with KUAP functionnality
- Added a fix from Daniel in prom_init: changed a direct struct assignation by a memcpy in patch 5
Changes in v10:
- Prepended the patch which fixes boot on hash32
- Reduced ifdef mess related to CONFIG_CMDLINE in prom_init.c
- Fixed strings preparation macros for ppc64 build (Reported by Daniel)
- Fixed boot failure on hash32 when total amount of memory is above the initial amount mapped with BATs.
- Reordered stuff in kasan.h to have a smoother patch when adding 64bit Book3E
- Split the change to PAGE_READONLY out of the hash32 patch.
- Appended Daniel's series for 64bit Book3E (with a build failure fix and a few cosmetic changes)
Changes in v9:
- Fixed fixmap IMMR alignment issue on 8xx with KASAN enabled.
- Set up final shadow page tables before switching to the final hash table on hash32
- Using PAGE_READONLY instead of PAGE_KERNEL_RO on hash32
- Use flash_tlb_kernel_range() instead of flash_tlb_mm() which doesn't work for kernel on some subarches.
- use __set_pte_at() instead of pte_update() to install final page tables
Changes in v8:
- Fixed circular issue between pgtable.h and fixmap.h
- Added missing includes in ppc64 string files
- Fixed kasan string related macro names for ppc64.
- Fixed most checkpatch messages
- build tested on kisskb (http://kisskb.ellerman.id.au/kisskb/head/6e65827de2fe71d21682dafd9084ed2cc6e06d4f/)
- moved CONFIG_KASAN_SHADOW_OFFSET in Kconfig.debug
Changes in v7:
- split in several smaller patches
- prom_init now has its own string functions
- full deactivation of powerpc-optimised string functions when KASAN is active
- shadow area now at a fixed place on very top of kernel virtual space.
- Early static hash table for hash book3s/32.
- Full support of both inline and outline instrumentation for both hash and nohash ppc32
- Earlier full activation of kasan.
Changes in v6:
- Fixed oops on module loading (due to access to RO shadow zero area).
- Added support for hash book3s/32, thanks to Daniel's patch to differ KASAN activation.
- Reworked handling of optimised string functions (dedicated patch for it)
- Reordered some files to ease adding of book3e/64 support.
Changes in v5:
- Added KASAN_SHADOW_OFFSET in Makefile, otherwise we fallback to KASAN_MINIMAL
and some stuff like stack instrumentation is not performed
- Moved calls to kasan_early_init() in head.S because stack instrumentation
in machine_init was performed before the call to kasan_early_init()
- Mapping kasan_early_shadow_page RW in kasan_early_init() and
remaping RO later in kasan_init()
- Allocating a big memblock() for shadow area, falling back to PAGE_SIZE blocks in case of failure.
Changes in v4:
- Comments from Andrey (DISABLE_BRANCH_PROFILING, Activation of reports)
- Proper initialisation of shadow area in kasan_init()
- Panic in case Hash table is required.
- Added comments in patch one to explain why *t = *s becomes memcpy(t, s, ...)
- Call of kasan_init_tags()
Changes in v3:
- Removed the printk() in kasan_early_init() to avoid build failure (see https://github.com/linuxppc/issues/issues/218)
- Added necessary changes in asm/book3s/32/pgtable.h to get it work on powerpc 603 family
- Added a few KASAN_SANITIZE_xxx.o := n to successfully boot on powerpc 603 family
Changes in v2:
- Rebased.
- Using __set_pte_at() to build the early table.
- Worked around and got rid of the patch adding asm/page.h in asm/pgtable-types.h
==> might be fixed independently but not needed for this serie.
Christophe Leroy (13):
powerpc/32: Move early_init() in a separate file
powerpc: prepare string/mem functions for KASAN
powerpc: remove CONFIG_CMDLINE #ifdef mess
powerpc/prom_init: don't use string functions from lib/
powerpc: don't use direct assignation during early boot.
powerpc/32: use memset() instead of memset_io() to zero BSS
powerpc/32: make KVIRT_TOP dependent on FIXMAP_START
powerpc/32: prepare shadow area for KASAN
powerpc: disable KASAN instrumentation on early/critical files.
powerpc/32: Add KASAN support
powerpc/32s: move hash code patching out of MMU_init_hw()
powerpc/32s: set up an early static hash table for KASAN.
powerpc/32s: map kasan zero shadow with PAGE_READONLY instead of
PAGE_KERNEL_RO
arch/powerpc/Kconfig | 7 +-
arch/powerpc/Kconfig.debug | 5 +
arch/powerpc/include/asm/book3s/32/pgtable.h | 13 +-
arch/powerpc/include/asm/fixmap.h | 5 +
arch/powerpc/include/asm/kasan.h | 40 +++++
arch/powerpc/include/asm/nohash/32/pgtable.h | 13 +-
arch/powerpc/include/asm/string.h | 32 +++-
arch/powerpc/kernel/Makefile | 14 +-
arch/powerpc/kernel/cputable.c | 13 +-
arch/powerpc/kernel/early_32.c | 36 +++++
arch/powerpc/kernel/head_32.S | 76 ++++++---
arch/powerpc/kernel/head_40x.S | 3 +
arch/powerpc/kernel/head_44x.S | 3 +
arch/powerpc/kernel/head_8xx.S | 3 +
arch/powerpc/kernel/head_fsl_booke.S | 3 +
arch/powerpc/kernel/prom_init.c | 228 +++++++++++++++++++++------
arch/powerpc/kernel/prom_init_check.sh | 12 +-
arch/powerpc/kernel/setup-common.c | 3 +
arch/powerpc/kernel/setup_32.c | 28 ----
arch/powerpc/lib/Makefile | 19 ++-
arch/powerpc/lib/copy_32.S | 12 +-
arch/powerpc/lib/mem_64.S | 9 +-
arch/powerpc/lib/memcpy_64.S | 4 +-
arch/powerpc/mm/Makefile | 7 +
arch/powerpc/mm/init_32.c | 3 +
arch/powerpc/mm/kasan/Makefile | 5 +
arch/powerpc/mm/kasan/kasan_init_32.c | 183 +++++++++++++++++++++
arch/powerpc/mm/mem.c | 4 +
arch/powerpc/mm/mmu_decl.h | 2 +
arch/powerpc/mm/ppc_mmu_32.c | 36 +++--
arch/powerpc/mm/ptdump/ptdump.c | 8 +
arch/powerpc/platforms/powermac/Makefile | 6 +
arch/powerpc/purgatory/Makefile | 3 +
arch/powerpc/xmon/Makefile | 1 +
34 files changed, 697 insertions(+), 142 deletions(-)
create mode 100644 arch/powerpc/include/asm/kasan.h
create mode 100644 arch/powerpc/kernel/early_32.c
create mode 100644 arch/powerpc/mm/kasan/Makefile
create mode 100644 arch/powerpc/mm/kasan/kasan_init_32.c
--
2.13.3
^ permalink raw reply
* [PATCH v2 15/15] powerpc/mm: refactor pgd_alloc() and pgd_free() on nohash
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
pgd_alloc() and pgd_free() are identical on nohash 32 and 64.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/nohash/32/pgalloc.h | 11 -----------
arch/powerpc/include/asm/nohash/64/pgalloc.h | 11 -----------
arch/powerpc/include/asm/nohash/pgalloc.h | 12 ++++++++++++
3 files changed, 12 insertions(+), 22 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 137761b01588..11eac371e7e0 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -5,17 +5,6 @@
#include <linux/threads.h>
#include <linux/slab.h>
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
/*
* We don't have any real pmd's, and this code never triggers because
* the pgd will always be present..
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 5a0ea63c77c7..62321cd12da9 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -18,17 +18,6 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 4fccac6af3ad..332b13b4ecdb 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_NOHASH_PGALLOC_H
#include <linux/mm.h>
+#include <linux/slab.h>
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#ifdef CONFIG_PPC64
@@ -16,6 +17,17 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
}
#endif /* !CONFIG_PPC_BOOK3E */
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
#ifdef CONFIG_PPC64
#include <asm/nohash/64/pgalloc.h>
#else
--
2.13.3
^ permalink raw reply related
* [PATCH v2 14/15] powerpc/mm: refactor pmd_pgtable()
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
pmd_pgtable() is identical on the 4 subarches, refactor it.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/book3s/32/pgalloc.h | 2 --
arch/powerpc/include/asm/book3s/64/pgalloc.h | 5 -----
arch/powerpc/include/asm/nohash/32/pgalloc.h | 2 --
arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 --
arch/powerpc/include/asm/pgalloc.h | 5 +++++
5 files changed, 5 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 1b9b5c228230..998317702630 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -37,8 +37,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
*pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT);
}
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-
static inline void pgtable_free(void *table, unsigned index_size)
{
if (!index_size) {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index df2dce6afe14..053a7940504e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -163,11 +163,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
*pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
}
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
- return (pgtable_t)pmd_page_vaddr(pmd);
-}
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 6c0f5151dc1d..137761b01588 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -43,6 +43,4 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
*pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
}
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-
#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index c636feced1ff..5a0ea63c77c7 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -59,8 +59,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pmd_set(pmd, (unsigned long)pte_page);
}
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 5761bee0f004..2b2c60a1a66d 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -72,4 +72,9 @@ static inline void check_pgt_cache(void) { }
#include <asm/nohash/pgalloc.h>
#endif
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+ return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
#endif /* _ASM_POWERPC_PGALLOC_H */
--
2.13.3
^ permalink raw reply related
* [PATCH v2 13/15] powerpc/mm: refactor pgtable freeing functions on nohash
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
pgtable_free() and others are identical on nohash/32 and 64,
so move them into asm/nohash/pgalloc.h
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/nohash/32/pgalloc.h | 43 ---------------------------
arch/powerpc/include/asm/nohash/64/pgalloc.h | 43 ---------------------------
arch/powerpc/include/asm/nohash/pgalloc.h | 44 ++++++++++++++++++++++++++++
3 files changed, 44 insertions(+), 86 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 7ee8e27070f4..6c0f5151dc1d 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -45,47 +45,4 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- if (!index_size) {
- pte_fragment_free((unsigned long *)table, 0);
- } else {
- BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(index_size), table);
- }
-}
-
-#define get_hugepd_cache_index(x) (x)
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, table, 0);
-}
#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index ffc86d42816d..c636feced1ff 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -72,49 +72,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
}
-static inline void pgtable_free(void *table, int shift)
-{
- if (!shift) {
- pte_fragment_free((unsigned long *)table, 0);
- } else {
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(shift), table);
- }
-}
-
-#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
-
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, table, 0);
-}
-
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
#define __pud_free_tlb(tlb, pud, addr) \
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 0634f2949438..4fccac6af3ad 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -21,4 +21,48 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
#else
#include <asm/nohash/32/pgalloc.h>
#endif
+
+static inline void pgtable_free(void *table, int shift)
+{
+ if (!shift) {
+ pte_fragment_free((unsigned long *)table, 0);
+ } else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+
+#define get_hugepd_cache_index(x) (x)
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
+
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_free_tlb(tlb, table, 0);
+}
#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
--
2.13.3
^ permalink raw reply related
* [PATCH v2 12/15] powerpc/mm: Only keep one version of pmd_populate() functions on nohash/32
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
Use IS_ENABLED(CONFIG_BOOKE) to make single versions of
pmd_populate() and pmd_populate_kernel()
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/nohash/32/pgalloc.h | 28 ++++++++--------------------
1 file changed, 8 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 4615801aa953..7ee8e27070f4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -25,37 +25,25 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#define __pmd_free_tlb(tlb,x,a) do { } while (0)
/* #define pgd_populate(mm, pmd, pte) BUG() */
-#ifndef CONFIG_BOOKE
-
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
pte_t *pte)
{
- *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pte_page)
{
- *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
}
#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-#else
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
- pte_t *pte)
-{
- *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pte_page)
-{
- *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
-}
-
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-#endif
static inline void pgtable_free(void *table, unsigned index_size)
{
--
2.13.3
^ permalink raw reply related
* [PATCH v2 11/15] powerpc/mm: refactor definition of pgtable_cache[]
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
pgtable_cache[] is the same for the 4 subarches, lets make it common.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/book3s/32/pgalloc.h | 21 ---------------------
arch/powerpc/include/asm/book3s/64/pgalloc.h | 22 ----------------------
arch/powerpc/include/asm/nohash/32/pgalloc.h | 21 ---------------------
arch/powerpc/include/asm/nohash/64/pgalloc.h | 22 ----------------------
arch/powerpc/include/asm/pgalloc.h | 21 +++++++++++++++++++++
5 files changed, 21 insertions(+), 86 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 46422309d6e0..1b9b5c228230 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -5,26 +5,6 @@
#include <linux/threads.h>
#include <linux/slab.h>
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
@@ -69,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
}
}
-#define check_pgt_cache() do { } while (0)
#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index cfd48d8cc055..df2dce6afe14 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,26 +19,6 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
@@ -199,8 +179,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
pgtable_free_tlb(tlb, table, PTE_INDEX);
}
-#define check_pgt_cache() do { } while (0)
-
extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
static inline void update_page_count(int psize, long count)
{
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index e96ef2fde2ca..4615801aa953 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -5,26 +5,6 @@
#include <linux/threads.h>
#include <linux/slab.h>
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
@@ -87,7 +67,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
}
}
-#define check_pgt_cache() do { } while (0)
#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 98de4f3b0306..ffc86d42816d 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -18,26 +18,6 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
@@ -140,6 +120,4 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-#define check_pgt_cache() do { } while (0)
-
#endif /* _ASM_POWERPC_PGALLOC_64_H */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index c2c6fd438840..5761bee0f004 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -45,6 +45,27 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
pte_fragment_free((unsigned long *)ptepage, 0);
}
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation. For PTE pages, the allocation size will be
+ * (2^index_size * sizeof(pointer)) and allocations are drawn from
+ * the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer. In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value. This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) pgtable_cache[shift]
+
+static inline void check_pgt_cache(void) { }
+
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
--
2.13.3
^ permalink raw reply related
* [PATCH v2 10/15] powerpc/mm: refactor pte_alloc_one() and pte_free() families definition.
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
Functions pte_alloc_one(), pte_alloc_one_kernel(), pte_free(),
pte_free_kernel() are identical for the four subarches.
This patch moves their definition in a common place.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/book3s/32/pgalloc.h | 25 -------------------------
arch/powerpc/include/asm/book3s/64/pgalloc.h | 22 ----------------------
arch/powerpc/include/asm/nohash/32/pgalloc.h | 25 -------------------------
arch/powerpc/include/asm/nohash/64/pgalloc.h | 25 -------------------------
arch/powerpc/include/asm/pgalloc.h | 25 +++++++++++++++++++++++++
5 files changed, 25 insertions(+), 97 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 0ed856068bb8..46422309d6e0 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -59,31 +59,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
-void pte_frag_destroy(void *pte_frag);
-void pte_fragment_free(unsigned long *table, int kernel);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void pgtable_free(void *table, unsigned index_size)
{
if (!index_size) {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 138bc2ecc0c4..cfd48d8cc055 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -39,9 +39,7 @@ extern struct vmemmap_backing *vmemmap_list;
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) pgtable_cache[shift]
-extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
-extern void pte_fragment_free(unsigned long *, int);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
#ifdef CONFIG_SMP
@@ -190,26 +188,6 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
return (pgtable_t)pmd_page_vaddr(pmd);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 1d41508f0676..e96ef2fde2ca 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -77,31 +77,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
#endif
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
-void pte_frag_destroy(void *pte_frag);
-void pte_fragment_free(unsigned long *table, int kernel);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void pgtable_free(void *table, unsigned index_size)
{
if (!index_size) {
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 7fb87235f845..98de4f3b0306 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -92,31 +92,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
}
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
-void pte_frag_destroy(void *pte_frag);
-void pte_fragment_free(unsigned long *table, int kernel);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void pgtable_free(void *table, int shift)
{
if (!shift) {
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index e11f03007b57..c2c6fd438840 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -20,6 +20,31 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+ return (pte_t *)pte_fragment_alloc(mm, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
+}
+
+void pte_frag_destroy(void *pte_frag);
+void pte_fragment_free(unsigned long *table, int kernel);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ pte_fragment_free((unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pte_fragment_free((unsigned long *)ptepage, 0);
+}
+
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
--
2.13.3
^ permalink raw reply related
* [PATCH v2 09/15] powerpc/mm: inline pte_alloc_one_kernel() and pte_alloc_one() on PPC32
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
pte_alloc_one_kernel() and pte_alloc_one() are simple calls to
pte_fragment_alloc(), so they are good candidates for inlining as
already done on PPC64.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/book3s/32/pgalloc.h | 15 ++++++++++++---
arch/powerpc/include/asm/nohash/32/pgalloc.h | 15 ++++++++++++---
arch/powerpc/mm/pgtable_32.c | 10 ----------
3 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 645af86cd072..0ed856068bb8 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -59,10 +59,19 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm);
-void pte_frag_destroy(void *pte_frag);
pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+ return (pte_t *)pte_fragment_alloc(mm, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
+}
+
+void pte_frag_destroy(void *pte_frag);
void pte_fragment_free(unsigned long *table, int kernel);
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index ea265a578eb0..1d41508f0676 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -77,10 +77,19 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
#endif
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm);
-void pte_frag_destroy(void *pte_frag);
pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+ return (pte_t *)pte_fragment_alloc(mm, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
+}
+
+void pte_frag_destroy(void *pte_frag);
void pte_fragment_free(unsigned long *table, int kernel);
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a1c3062f0665..d02fe3ce64db 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,16 +43,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
extern char etext[], _stext[], _sinittext[], _einittext[];
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
void __iomem *
ioremap(phys_addr_t addr, unsigned long size)
{
--
2.13.3
^ permalink raw reply related
* [PATCH v2 08/15] powerpc/mm: don't use pte_alloc_kernel() until slab is available on PPC32
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
In the same way as PPC64, implement early allocation functions and
avoid calling pte_alloc_kernel() before slab is available.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/mm/pgtable_32.c | 34 ++++++++++++++++++++++++++++------
1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 6e56a6240bfa..a1c3062f0665 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,11 +43,8 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
extern char etext[], _stext[], _sinittext[], _einittext[];
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
- if (!slab_is_available())
- return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
-
return (pte_t *)pte_fragment_alloc(mm, 1);
}
@@ -205,7 +202,29 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
+static void __init *early_alloc_pgtable(unsigned long size)
+{
+ void *ptr = memblock_alloc(size, size);
+
+ if (!ptr)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, size, size);
+
+ return ptr;
+}
+
+static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
+{
+ if (pmd_none(*pmdp)) {
+ pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ return pte_offset_kernel(pmdp, va);
+}
+
+
+int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
{
pmd_t *pd;
pte_t *pg;
@@ -214,7 +233,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
/* Use upper 10 bits of VA to index the first level map */
pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
/* Use middle 10 bits of VA to index the second-level map */
- pg = pte_alloc_kernel(pd, va);
+ if (likely(slab_is_available()))
+ pg = pte_alloc_kernel(pd, va);
+ else
+ pg = early_pte_alloc_kernel(pd, va);
if (pg != 0) {
err = 0;
/* The PTE should never be already set nor present in the
--
2.13.3
^ permalink raw reply related
* [PATCH v2 05/15] powerpc/mm: get rid of nohash/32/mmu.h and nohash/64/mmu.h
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
Those files have no real added values, especially the 64 bit
which only includes the common book3e mmu.h which is also
included from 32 bits side.
So lets do the final inclusion directly from nohash/mmu.h
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/nohash/32/mmu.h | 19 -------------------
arch/powerpc/include/asm/nohash/64/mmu.h | 10 ----------
arch/powerpc/include/asm/nohash/mmu-book3e.h | 2 ++
arch/powerpc/include/asm/nohash/mmu.h | 16 ++++++++++++----
4 files changed, 14 insertions(+), 33 deletions(-)
delete mode 100644 arch/powerpc/include/asm/nohash/32/mmu.h
delete mode 100644 arch/powerpc/include/asm/nohash/64/mmu.h
diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h
deleted file mode 100644
index af0e8b54876a..000000000000
--- a/arch/powerpc/include/asm/nohash/32/mmu.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_
-#define _ASM_POWERPC_NOHASH_32_MMU_H_
-
-#if defined(CONFIG_40x)
-/* 40x-style software loaded TLB */
-#include <asm/nohash/32/mmu-40x.h>
-#elif defined(CONFIG_44x)
-/* 44x-style software loaded TLB */
-#include <asm/nohash/32/mmu-44x.h>
-#elif defined(CONFIG_PPC_BOOK3E_MMU)
-/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-#include <asm/nohash/mmu-book3e.h>
-#elif defined (CONFIG_PPC_8xx)
-/* Motorola/Freescale 8xx software loaded TLB */
-#include <asm/nohash/32/mmu-8xx.h>
-#endif
-
-#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h
deleted file mode 100644
index e490ecdac012..000000000000
--- a/arch/powerpc/include/asm/nohash/64/mmu.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_
-#define _ASM_POWERPC_NOHASH_64_MMU_H_
-
-#define MAX_PHYSMEM_BITS 44
-
-/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-#include <asm/nohash/mmu-book3e.h>
-
-#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index e20072972e35..4c9777d256fb 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -306,6 +306,8 @@ extern int book3e_htw_mode;
#define mmu_cleanup_all NULL
+#define MAX_PHYSMEM_BITS 44
+
#endif
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h
index a037cb1efb57..edc793e5f08f 100644
--- a/arch/powerpc/include/asm/nohash/mmu.h
+++ b/arch/powerpc/include/asm/nohash/mmu.h
@@ -2,10 +2,18 @@
#ifndef _ASM_POWERPC_NOHASH_MMU_H_
#define _ASM_POWERPC_NOHASH_MMU_H_
-#ifdef CONFIG_PPC64
-#include <asm/nohash/64/mmu.h>
-#else
-#include <asm/nohash/32/mmu.h>
+#if defined(CONFIG_40x)
+/* 40x-style software loaded TLB */
+#include <asm/nohash/32/mmu-40x.h>
+#elif defined(CONFIG_44x)
+/* 44x-style software loaded TLB */
+#include <asm/nohash/32/mmu-44x.h>
+#elif defined(CONFIG_PPC_BOOK3E_MMU)
+/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
+#include <asm/nohash/mmu-book3e.h>
+#elif defined (CONFIG_PPC_8xx)
+/* Motorola/Freescale 8xx software loaded TLB */
+#include <asm/nohash/32/mmu-8xx.h>
#endif
#endif /* _ASM_POWERPC_NOHASH_MMU_H_ */
--
2.13.3
^ permalink raw reply related
* [PATCH v2 06/15] powerpc/Kconfig: select PPC_MM_SLICES from subarch type
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
Lets select PPC_MM_SLICES from the subarch config item instead of
doing it via defaults declaration in the PPC_MM_SLICES item itself.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/platforms/Kconfig.cputype | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 00b2bb536c74..043c0909c857 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -38,6 +38,7 @@ config PPC_8xx
select SYS_SUPPORTS_HUGETLBFS
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
+ select PPC_MM_SLICES if HUGETLB_PAGE
config 40x
bool "AMCC 40x"
@@ -79,6 +80,7 @@ config PPC_BOOK3S_64
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
+ select PPC_MM_SLICES
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -397,8 +399,6 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
- default y if PPC_BOOK3S_64
- default y if PPC_8xx && HUGETLB_PAGE
config PPC_HAVE_PMU_SUPPORT
bool
--
2.13.3
^ permalink raw reply related
* [PATCH v2 07/15] powerpc/book3e: move early_alloc_pgtable() to init section
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
early_alloc_pgtable() is only used during init.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/mm/pgtable-book3e.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c
index 1032ef7aaf62..f6fc709697ee 100644
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/pgtable-book3e.c
@@ -55,7 +55,7 @@ void vmemmap_remove_mapping(unsigned long start,
#endif
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-static __ref void *early_alloc_pgtable(unsigned long size)
+static void __init *early_alloc_pgtable(unsigned long size)
{
void *ptr;
@@ -74,7 +74,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
* map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
{
pgd_t *pgdp;
pud_t *pudp;
--
2.13.3
^ permalink raw reply related
* [PATCH v2 02/15] powerpc/mm: define __pud_free_tlb() at all time on nohash/64
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
CONFIG_PPC_64K_PAGES is not selectable on nohash/64, so get
__pud_free_tlb() defined at all time.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/nohash/64/pgalloc.h | 3 ---
1 file changed, 3 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 66d086f85bd5..ded453f9b5a8 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -171,12 +171,9 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef CONFIG_PPC_64K_PAGES
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-#endif /* CONFIG_PPC_64K_PAGES */
-
#define check_pgt_cache() do { } while (0)
#endif /* _ASM_POWERPC_PGALLOC_64_H */
--
2.13.3
^ permalink raw reply related
* [PATCH v2 04/15] powerpc/mm: move pgtable_t in asm/mmu.h
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
pgtable_t is now identical for all subarches, move it to the
top level asm/mmu.h
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/book3s/32/mmu-hash.h | 4 ----
arch/powerpc/include/asm/book3s/64/mmu.h | 8 --------
arch/powerpc/include/asm/mmu.h | 3 +++
arch/powerpc/include/asm/nohash/32/mmu.h | 6 ------
arch/powerpc/include/asm/nohash/64/mmu.h | 6 ------
5 files changed, 3 insertions(+), 24 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index f9eae105a9f4..2e277ca0170f 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -10,8 +10,6 @@
* BATs
*/
-#include <asm/page.h>
-
/* Block size masks */
#define BL_128K 0x000
#define BL_256K 0x001
@@ -49,8 +47,6 @@ struct ppc_bat {
u32 batu;
u32 batl;
};
-
-typedef pte_t *pgtable_t;
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 230a9dec7677..d4a94c2b29f2 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -25,14 +25,6 @@ struct mmu_psize_def {
};
};
extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
-
-/*
- * For BOOK3s 64 with 4k and 64K linux page size
- * we want to use pointers, because the page table
- * actually store pfn
- */
-typedef pte_t *pgtable_t;
-
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index d86c5641bd97..ba94ce8c22d7 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -129,6 +129,9 @@
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <asm/cputable.h>
+#include <asm/page.h>
+
+typedef pte_t *pgtable_t;
#ifdef CONFIG_PPC_FSL_BOOK3E
#include <asm/percpu.h>
diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h
index 7d94a36d57d2..af0e8b54876a 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu.h
@@ -2,8 +2,6 @@
#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_
#define _ASM_POWERPC_NOHASH_32_MMU_H_
-#include <asm/page.h>
-
#if defined(CONFIG_40x)
/* 40x-style software loaded TLB */
#include <asm/nohash/32/mmu-40x.h>
@@ -18,8 +16,4 @@
#include <asm/nohash/32/mmu-8xx.h>
#endif
-#ifndef __ASSEMBLY__
-typedef pte_t *pgtable_t;
-#endif
-
#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h
index 26e05ce8f5aa..e490ecdac012 100644
--- a/arch/powerpc/include/asm/nohash/64/mmu.h
+++ b/arch/powerpc/include/asm/nohash/64/mmu.h
@@ -4,13 +4,7 @@
#define MAX_PHYSMEM_BITS 44
-#include <asm/page.h>
-
/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
#include <asm/nohash/mmu-book3e.h>
-#ifndef __ASSEMBLY__
-typedef pte_t *pgtable_t;
-#endif
-
#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */
--
2.13.3
^ permalink raw reply related
* [PATCH v2 00/15] Refactor pgalloc stuff
From: Christophe Leroy @ 2019-04-26 15:57 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
This series converts book3e64 to pte_fragment and refactor
things that are common among subarches.
Changes in v2:
- Rebased on latest merge branch (b251649c77625b7ad4430e518dc0f1608be9edf4)
- Resolved conflicts, especially due to the addition of MAX_PHYSMEM_BITS in
asm/nohash/64/mmu.h ==> moved it to asm/nohash/mmu-book3e.h
- compilation test at http://kisskb.ellerman.id.au/kisskb/head/cf0b6c1ff5ca934bbe02f88b1b1ba533b386225e/
Christophe Leroy (15):
powerpc/mm: drop __bad_pte()
powerpc/mm: define __pud_free_tlb() at all time on nohash/64
powerpc/mm: convert Book3E 64 to pte_fragment
powerpc/mm: move pgtable_t in asm/mmu.h
powerpc/mm: get rid of nohash/32/mmu.h and nohash/64/mmu.h
powerpc/Kconfig: select PPC_MM_SLICES from subarch type
powerpc/book3e: move early_alloc_pgtable() to init section
powerpc/mm: don't use pte_alloc_kernel() until slab is available on
PPC32
powerpc/mm: inline pte_alloc_one_kernel() and pte_alloc_one() on PPC32
powerpc/mm: refactor pte_alloc_one() and pte_free() families
definition.
powerpc/mm: refactor definition of pgtable_cache[]
powerpc/mm: Only keep one version of pmd_populate() functions on
nohash/32
powerpc/mm: refactor pgtable freeing functions on nohash
powerpc/mm: refactor pmd_pgtable()
powerpc/mm: refactor pgd_alloc() and pgd_free() on nohash
arch/powerpc/include/asm/book3s/32/mmu-hash.h | 4 -
arch/powerpc/include/asm/book3s/32/pgalloc.h | 41 ---------
arch/powerpc/include/asm/book3s/64/mmu.h | 8 --
arch/powerpc/include/asm/book3s/64/pgalloc.h | 49 ----------
arch/powerpc/include/asm/mmu.h | 3 +
arch/powerpc/include/asm/mmu_context.h | 6 --
arch/powerpc/include/asm/nohash/32/mmu.h | 25 ------
arch/powerpc/include/asm/nohash/32/pgalloc.h | 123 ++------------------------
arch/powerpc/include/asm/nohash/64/mmu.h | 14 ---
arch/powerpc/include/asm/nohash/64/pgalloc.h | 117 +-----------------------
arch/powerpc/include/asm/nohash/mmu-book3e.h | 2 +
arch/powerpc/include/asm/nohash/mmu.h | 16 +++-
arch/powerpc/include/asm/nohash/pgalloc.h | 56 ++++++++++++
arch/powerpc/include/asm/pgalloc.h | 51 +++++++++++
arch/powerpc/mm/Makefile | 4 +-
arch/powerpc/mm/mmu_context.c | 2 +-
arch/powerpc/mm/pgtable-book3e.c | 4 +-
arch/powerpc/mm/pgtable_32.c | 42 +++++----
arch/powerpc/platforms/Kconfig.cputype | 4 +-
19 files changed, 167 insertions(+), 404 deletions(-)
delete mode 100644 arch/powerpc/include/asm/nohash/32/mmu.h
delete mode 100644 arch/powerpc/include/asm/nohash/64/mmu.h
--
2.13.3
^ permalink raw reply
* [PATCH v2 01/15] powerpc/mm: drop __bad_pte()
From: Christophe Leroy @ 2019-04-26 15:57 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
This has never been called (since Kernel has been in git at least),
drop it.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/book3s/32/pgalloc.h | 2 --
arch/powerpc/include/asm/nohash/32/pgalloc.h | 2 --
2 files changed, 4 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 3633502e102c..645af86cd072 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -22,8 +22,6 @@
*/
#define MAX_PGTABLE_INDEX_SIZE 0xf
-extern void __bad_pte(pmd_t *pmd);
-
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) pgtable_cache[shift]
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index bd186e85b4f7..ea265a578eb0 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -22,8 +22,6 @@
*/
#define MAX_PGTABLE_INDEX_SIZE 0xf
-extern void __bad_pte(pmd_t *pmd);
-
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) pgtable_cache[shift]
--
2.13.3
^ permalink raw reply related
* [PATCH v2 03/15] powerpc/mm: convert Book3E 64 to pte_fragment
From: Christophe Leroy @ 2019-04-26 15:58 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
aneesh.kumar
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556293738.git.christophe.leroy@c-s.fr>
Book3E 64 is the only subarch not using pte_fragment. In order
to allow refactorisation, this patch converts it to pte_fragment.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/mmu_context.h | 6 -----
arch/powerpc/include/asm/nohash/64/mmu.h | 4 +++-
arch/powerpc/include/asm/nohash/64/pgalloc.h | 33 ++++++++++------------------
arch/powerpc/mm/Makefile | 4 ++--
arch/powerpc/mm/mmu_context.c | 2 +-
5 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6ee8195a2ffb..66a3805dc935 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -228,13 +228,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
#endif
}
-#ifdef CONFIG_PPC_BOOK3E_64
-static inline void arch_exit_mmap(struct mm_struct *mm)
-{
-}
-#else
extern void arch_exit_mmap(struct mm_struct *mm);
-#endif
static inline void arch_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h
index 81cf30c370e5..26e05ce8f5aa 100644
--- a/arch/powerpc/include/asm/nohash/64/mmu.h
+++ b/arch/powerpc/include/asm/nohash/64/mmu.h
@@ -4,11 +4,13 @@
#define MAX_PHYSMEM_BITS 44
+#include <asm/page.h>
+
/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
#include <asm/nohash/mmu-book3e.h>
#ifndef __ASSEMBLY__
-typedef struct page *pgtable_t;
+typedef pte_t *pgtable_t;
#endif
#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index ded453f9b5a8..7fb87235f845 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -76,10 +76,10 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, (unsigned long)page_address(pte_page));
+ pmd_set(pmd, (unsigned long)pte_page);
}
-#define pmd_pgtable(pmd) pmd_page(pmd)
+#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@@ -92,44 +92,35 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
}
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ return (pte_t *)pte_fragment_alloc(mm, 1);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- struct page *page;
- pte_t *pte;
-
- pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
}
+void pte_frag_destroy(void *pte_frag);
+void pte_fragment_free(unsigned long *table, int kernel);
+
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- free_page((unsigned long)pte);
+ pte_fragment_free((unsigned long *)pte, 1);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
+ pte_fragment_free((unsigned long *)ptepage, 0);
}
static inline void pgtable_free(void *table, int shift)
{
if (!shift) {
- pgtable_page_dtor(virt_to_page(table));
- free_page((unsigned long)table);
+ pte_fragment_free((unsigned long *)table, 0);
} else {
BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
kmem_cache_free(PGT_CACHE(shift), table);
@@ -166,7 +157,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, page_address(table), 0);
+ pgtable_free_tlb(tlb, table, 0);
}
#define __pmd_free_tlb(tlb, pmd, addr) \
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3c1bd9fa23cd..138c772d58d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -9,6 +9,7 @@ CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
+ pgtable-frag.o \
init-common.o mmu_context.o drmem.o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
@@ -17,8 +18,7 @@ hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \
$(hash64-y) mmu_context_book3s64.o \
- pgtable-book3s64.o pgtable-frag.o
-obj-$(CONFIG_PPC32) += pgtable-frag.o
+ pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index bb52320b7369..6b049d82b98a 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -98,7 +98,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
switch_mmu_context(prev, next, tsk);
}
-#ifdef CONFIG_PPC32
+#ifndef CONFIG_PPC_BOOK3S_64
void arch_exit_mmap(struct mm_struct *mm)
{
void *frag = pte_frag_get(&mm->context);
--
2.13.3
^ permalink raw reply related
* Re: BUG: crash in __tlb_remove_page_size with STRICT_KERNEL_RWX on BOOK3S_32
From: Serge Belyshev @ 2019-04-26 14:38 UTC (permalink / raw)
To: Christophe Leroy; +Cc: linuxppc-dev
In-Reply-To: <db886597-8867-93e6-a507-6da4963ce0bd@c-s.fr>
Hi!
> Could you please compile your kernel with CONFIG_PPC_PTDUMP, and
> provide the content of:
>
> /sys/kernel/debug/kernel_page_tables
---[ Start of kernel VM ]---
0xe1000000-0xefffffff 0x21000000 240M rw present dirty accessed
---[ vmalloc() Area ]---
0xf1000000-0xf1000fff 0x80041000 4K rw present guarded dirty accessed no cache
0xf1002000-0xf1003fff 0x80041000 8K rw present guarded dirty accessed no cache
0xf1005000-0xf1005fff 0x80060000 4K rw present guarded dirty accessed no cache
0xf1007000-0xf1007fff 0x80050000 4K rw present guarded dirty accessed no cache
0xf100a000-0xf100bfff 0xf8001000 8K rw present guarded dirty accessed no cache
0xf100d000-0xf100dfff 0x80018000 4K rw present guarded dirty accessed no cache
0xf1010000-0xf1011fff 0xa0006000 8K rw present guarded dirty accessed no cache
0xf101b000-0xf1025fff 0x3fc00000 44K rw present dirty accessed
0xf1027000-0xf1027fff 0xb87ff000 4K rw present guarded dirty accessed no cache
0xf1029000-0xf1029fff 0xba7ff000 4K rw present guarded dirty accessed no cache
0xf102b000-0xf106afff 0x2f300000 256K rw present guarded dirty accessed no cache
0xf106c000-0xf106ffff 0x3fc0b000 16K rw present dirty accessed
0xf1071000-0xf1071fff 0x80020000 4K rw present guarded dirty accessed no cache
0xf1074000-0xf1077fff 0xf5004000 16K rw present guarded dirty accessed no cache
0xf107a000-0xf107bfff 0x80008000 8K rw present guarded dirty accessed no cache
0xf107d000-0xf107dfff 0xf5000000 4K rw present guarded dirty accessed no cache
0xf1080000-0xf118afff 0xb8008000 1068K rw present guarded dirty accessed no cache
0xf1195000-0xf119cfff 0x2f3a0000 32K rw present dirty accessed
0xf119d000-0xf119efff 0x2f3a0000 8K rw present dirty accessed
0xf11a0000-0xf11a5fff 0x2f3a8000 24K rw present dirty accessed
0xf11a6000-0xf11a7fff 0x2f3da000 8K rw present dirty accessed
0xf11a8000-0xf11a9fff 0x2f3a8000 8K rw present dirty accessed
0xf11ab000-0xf11abfff 0xa0004000 4K rw present guarded dirty accessed no cache
0xf11ad000-0xf11adfff 0xa0000000 4K rw present guarded dirty accessed no cache
0xf11af000-0xf11affff 0xa0003000 4K rw present guarded dirty accessed no cache
0xf11b1000-0xf11b1fff 0xa0002000 4K rw present guarded dirty accessed no cache
0xf11b3000-0xf11b3fff 0xa0001000 4K rw present guarded dirty accessed no cache
0xf11b5000-0xf11b5fff 0x80010000 4K rw present guarded dirty accessed no cache
0xf11b7000-0xf11b7fff 0x80008000 4K rw present guarded dirty accessed no cache
0xf11b9000-0xf11b9fff 0x80008000 4K rw present guarded dirty accessed no cache
0xf11c0000-0xf11cffff 0x88000000 64K rw present guarded dirty accessed no cache
0xf11eb000-0xf11ebfff 0x3fc16000 4K rw present dirty accessed
0xf11ec000-0xf11ecfff 0x3fc15000 4K rw present dirty accessed
0xf11ed000-0xf11edfff 0x3fc14000 4K rw present dirty accessed
0xf1200000-0xf1259fff 0xba008000 360K rw present guarded dirty accessed no cache
0xf1280000-0xf147ffff 0xf5200000 2M rw present guarded dirty accessed no cache
---[ vmalloc() End ]---
---[ Early I/O remap start ]---
0xfde2b000-0xfde2cfff 0x80016000 8K rw present guarded dirty accessed no cache
0xfde2d000-0xfde2dfff 0x80000000 4K rw present guarded dirty accessed no cache
0xfde2e000-0xfde2efff 0xf8000000 4K rw present guarded dirty accessed no cache
0xfde2f000-0xfe62efff 0xf4000000 8M rw present guarded dirty accessed no cache
0xfe62f000-0xfe62ffff 0xf4c00000 4K rw present guarded dirty accessed no cache
0xfe630000-0xfe630fff 0xf4800000 4K rw present guarded dirty accessed no cache
0xfe631000-0xfee30fff 0xf2000000 8M rw present guarded dirty accessed no cache
0xfee31000-0xfee31fff 0xf2c00000 4K rw present guarded dirty accessed no cache
0xfee32000-0xfee32fff 0xf2800000 4K rw present guarded dirty accessed no cache
0xfee33000-0xff632fff 0xf0000000 8M rw present guarded dirty accessed no cache
0xff633000-0xff633fff 0xf0c00000 4K rw present guarded dirty accessed no cache
0xff634000-0xff634fff 0xf0800000 4K rw present guarded dirty accessed no cache
0xff635000-0xff73ffff 0xb8008000 1068K rw present dirty accessed no cache
0xff740000-0xff7bffff 0x80000000 512K rw present guarded dirty accessed no cache
0xff7c0000-0xff7fffff 0xf8000000 256K rw present guarded dirty accessed no cache
---[ Early I/O remap end ]---
---[ Highmem PTEs start ]---
0xff801000-0xff801fff 0x3f804000 4K rw present dirty accessed
0xff802000-0xff802fff 0x3f9c6000 4K rw present dirty accessed
0xff803000-0xff803fff 0x3fa22000 4K rw present dirty accessed
0xff804000-0xff804fff 0x3fa3b000 4K rw present dirty accessed
0xff805000-0xff805fff 0x3fa52000 4K rw present dirty accessed
0xff806000-0xff806fff 0x3fb3b000 4K rw present dirty accessed
0xff807000-0xff807fff 0x3fb52000 4K rw present dirty accessed
0xff808000-0xff808fff 0x3fb26000 4K rw present dirty accessed
0xff809000-0xff809fff 0x3fb48000 4K rw present dirty accessed
0xff80a000-0xff80afff 0x3fb79000 4K rw present dirty accessed
0xff80b000-0xff80bfff 0x3f9e4000 4K rw present dirty accessed
0xff80c000-0xff80cfff 0x3f574000 4K rw present dirty accessed
0xff80d000-0xff80dfff 0x3f5c6000 4K rw present dirty accessed
0xff80e000-0xff80efff 0x3f578000 4K rw present dirty accessed
0xff80f000-0xff80ffff 0x3f60a000 4K rw present dirty accessed
0xff810000-0xff810fff 0x3f791000 4K rw present dirty accessed
---[ Highmem PTEs end ]---
---[ Fixmap start ]---
0xfffde000-0xfffdefff 0x3f086000 4K rw present dirty accessed
0xfffdf000-0xfffdffff 0x3f08a000 4K rw present dirty accessed
---[ Fixmap end ]---
> /sys/kernel/debug/powerpc/block_address_translation
---[ Instruction Block Address Translation ]---
0: 0xc0000000-0xc07fffff 0x00000000 Kernel EXEC coherent
1: 0xc0800000-0xc087ffff 0x00800000 Kernel EXEC coherent
2: -
3: -
4: -
5: -
6: -
7: -
---[ Data Block Address Translation ]---
0: 0xc0000000-0xc07fffff 0x00000000 Kernel RO coherent
1: 0xc0800000-0xc0bfffff 0x00800000 Kernel RO coherent
2: 0xc0c00000-0xc13fffff 0x00c00000 Kernel RW coherent
3: 0xc1400000-0xc23fffff 0x01400000 Kernel RW coherent
4: 0xc2400000-0xc43fffff 0x02400000 Kernel RW coherent
5: 0xc4400000-0xc83fffff 0x04400000 Kernel RW coherent
6: 0xc8400000-0xd03fffff 0x08400000 Kernel RW coherent
7: 0xd0400000-0xe03fffff 0x10400000 Kernel RW coherent
> /sys/kernel/debug/powerpc/segment_registers
---[ User Segments ]---
0x00000000-0x0fffffff Kern key 1 User key 1 VSID 0x07aa30
0x10000000-0x1fffffff Kern key 1 User key 1 VSID 0x07ab41
0x20000000-0x2fffffff Kern key 1 User key 1 VSID 0x07ac52
0x30000000-0x3fffffff Kern key 1 User key 1 VSID 0x07ad63
0x40000000-0x4fffffff Kern key 1 User key 1 VSID 0x07ae74
0x50000000-0x5fffffff Kern key 1 User key 1 VSID 0x07af85
0x60000000-0x6fffffff Kern key 1 User key 1 VSID 0x07b096
0x70000000-0x7fffffff Kern key 1 User key 1 VSID 0x07b1a7
0x80000000-0x8fffffff Kern key 1 User key 1 VSID 0x07b2b8
0x90000000-0x9fffffff Kern key 1 User key 1 VSID 0x07b3c9
0xa0000000-0xafffffff Kern key 1 User key 1 VSID 0x07b4da
0xb0000000-0xbfffffff Kern key 1 User key 1 VSID 0x07b5eb
---[ Kernel Segments ]---
0xc0000000-0xcfffffff Kern key 0 User key 1 No Exec VSID 0x000ccc
0xd0000000-0xdfffffff Kern key 0 User key 1 No Exec VSID 0x000ddd
0xe0000000-0xefffffff Kern key 0 User key 1 No Exec VSID 0x000eee
0xf0000000-0xffffffff Kern key 0 User key 1 No Exec VSID 0x000fff
> Please also provide the begining of 'dmesg' that shows the memory layout.
[ 0.000000] Total memory = 1024MB; using 2048kB for hash table (at (ptrval))
[ 0.000000] Linux version 5.1.0-rc6-00072-g8113a85f8720 (ssb@spider) (gcc version 9.0.1 20190409 (experimental) [trunk revision 270234] (GCC)) #1192 PREEMPT Fri Apr 26 16:45:50 MSK 2019
[ 0.000000] Found UniNorth memory controller & host bridge @ 0xf8000000 revision: 0xd2
[ 0.000000] Mapped at 0xff7c0000
[ 0.000000] Found a Intrepid mac-io controller, rev: 0, mapped at 0x(ptrval)
[ 0.000000] Processor NAP mode on idle enabled.
[ 0.000000] PowerMac motherboard: PowerBook G4 15"
[ 0.000000] Using PowerMac machine description
[ 0.000000] printk: bootconsole [udbg0] enabled
[ 0.000000] -----------------------------------------------------
[ 0.000000] Hash_size = 0x200000
[ 0.000000] phys_mem_size = 0x40000000
[ 0.000000] dcache_bsize = 0x20
[ 0.000000] icache_bsize = 0x20
[ 0.000000] cpu_features = 0x000000002510600a
[ 0.000000] possible = 0x000000002f7ff14b
[ 0.000000] always = 0x0000000000000000
[ 0.000000] cpu_user_features = 0x9c000001 0x00000000
[ 0.000000] mmu_features = 0x00010001
[ 0.000000] Hash = 0x(ptrval)
[ 0.000000] Hash_mask = 0x7fff
[ 0.000000] -----------------------------------------------------
[ 0.000000] Found UniNorth PCI host bridge at 0x00000000f0000000. Firmware bus number: 0->1
[ 0.000000] PCI host bridge /pci@f0000000 ranges:
[ 0.000000] MEM 0x00000000f1000000..0x00000000f1ffffff -> 0x00000000f1000000
[ 0.000000] IO 0x00000000f0000000..0x00000000f07fffff -> 0x0000000000000000
[ 0.000000] MEM 0x00000000b0000000..0x00000000bfffffff -> 0x00000000b0000000
[ 0.000000] Found UniNorth PCI host bridge at 0x00000000f2000000. Firmware bus number: 0->1
[ 0.000000] PCI host bridge /pci@f2000000 (primary) ranges:
[ 0.000000] MEM 0x00000000f3000000..0x00000000f3ffffff -> 0x00000000f3000000
[ 0.000000] IO 0x00000000f2000000..0x00000000f27fffff -> 0x0000000000000000
[ 0.000000] MEM 0x0000000080000000..0x00000000afffffff -> 0x0000000080000000
[ 0.000000] Found UniNorth PCI host bridge at 0x00000000f4000000. Firmware bus number: 0->1
[ 0.000000] PCI host bridge /pci@f4000000 ranges:
[ 0.000000] MEM 0x00000000f5000000..0x00000000f5ffffff -> 0x00000000f5000000
[ 0.000000] IO 0x00000000f4000000..0x00000000f47fffff -> 0x0000000000000000
[ 0.000000] via-pmu: Server Mode is disabled
[ 0.000000] PMU driver v2 initialized for Core99, firmware: 0c
[ 0.000000] Top of RAM: 0x40000000, Total RAM: 0x40000000
[ 0.000000] Memory hole size: 0MB
[ 0.000000] Zone ranges:
[ 0.000000] Normal [mem 0x0000000000000000-0x000000002fffffff]
[ 0.000000] HighMem [mem 0x0000000030000000-0x000000003fffffff]
[ 0.000000] Movable zone start for each node
[ 0.000000] Early memory node ranges
[ 0.000000] node 0: [mem 0x0000000000000000-0x000000003fffffff]
[ 0.000000] Initmem setup node 0 [mem 0x0000000000000000-0x000000003fffffff]
[ 0.000000] On node 0 totalpages: 262144
[ 0.000000] Normal zone: 1536 pages used for memmap
[ 0.000000] Normal zone: 0 pages reserved
[ 0.000000] Normal zone: 196608 pages, LIFO batch:63
[ 0.000000] HighMem zone: 65536 pages, LIFO batch:15
[ 0.000000] pcpu-alloc: s0 r0 d32768 u32768 alloc=1*32768
[ 0.000000] pcpu-alloc: [0] 0
[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 260608
[ 0.000000] Kernel command line: root=/dev/hda3 ro console=ttyUSB0,115200 console=tty0 init=/boot/init
[ 0.000000] Dentry cache hash table entries: 131072 (order: 7, 524288 bytes)
[ 0.000000] Inode-cache hash table entries: 65536 (order: 6, 262144 bytes)
[ 0.000000] Memory: 1023092K/1048576K available (8704K kernel code, 348K rwdata, 1344K rodata, 212K init, 1176K bss, 25484K reserved, 0K cma-reserved, 262144K highmem)
[ 0.000000] Kernel virtual memory layout:
[ 0.000000] * 0xfffcf000..0xfffff000 : fixmap
[ 0.000000] * 0xff800000..0xffc00000 : highmem PTEs
[ 0.000000] * 0xfde2b000..0xff800000 : early ioremap
[ 0.000000] * 0xf1000000..0xfde2b000 : vmalloc & ioremap
[ 0.000000] SLUB: HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
full dmesg: http://aargh.no-ip.org/dmesg.txt
kernel config: http://aargh.no-ip.org/config.txt
> Thanks
> Christophe
Thanks!
^ permalink raw reply
* Re: [PATCH v4 39/63] Documentation: x86: convert topology.txt to reST
From: Changbin Du @ 2019-04-26 14:23 UTC (permalink / raw)
To: Mauro Carvalho Chehab
Cc: fenghua.yu, x86, Jonathan Corbet, linux-pci, linux-gpio,
linux-doc, rjw, linux-kernel, linux-acpi, mingo, Bjorn Helgaas,
tglx, linuxppc-dev, Changbin Du
In-Reply-To: <20190424144407.0df2c2f5@coco.lan>
On Wed, Apr 24, 2019 at 02:44:07PM -0300, Mauro Carvalho Chehab wrote:
> Em Wed, 24 Apr 2019 00:29:08 +0800
> Changbin Du <changbin.du@gmail.com> escreveu:
>
> > This converts the plain text documentation to reStructuredText format and
> > add it to Sphinx TOC tree. No essential content change.
> >
> > Signed-off-by: Changbin Du <changbin.du@gmail.com>
> > ---
> > Documentation/x86/index.rst | 1 +
> > Documentation/x86/topology.rst | 228 +++++++++++++++++++++++++++++++++
> > Documentation/x86/topology.txt | 217 -------------------------------
> > 3 files changed, 229 insertions(+), 217 deletions(-)
> > create mode 100644 Documentation/x86/topology.rst
> > delete mode 100644 Documentation/x86/topology.txt
>
> Why? Please preserve as much as possible from the original file...
> it is really hard to see what you're doing. Most of those x86
> files are already almost at ReST format (like this one). There's
> absolutely **no reason** why you would do so much radical changes
> that would below the 50% similarity threshold that would make git
> to recognize as a change on the same file!
>
My editor changed the indent. I need to redo this conversion. Thanks.
> I'll give a quick review on this one, but it is really hard to be
> sure that something is missing, when the similarity is too low.
>
> >
> > diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
> > index 8f08caf4fbbb..2033791e53bc 100644
> > --- a/Documentation/x86/index.rst
> > +++ b/Documentation/x86/index.rst
> > @@ -9,3 +9,4 @@ Linux x86 Support
> > :numbered:
> >
> > boot
> > + topology
> > diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst
> > new file mode 100644
> > index 000000000000..1df5f56f4882
> > --- /dev/null
> > +++ b/Documentation/x86/topology.rst
> > @@ -0,0 +1,228 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +============
> > +x86 Topology
> > +============
> > +
> > +This documents and clarifies the main aspects of x86 topology modelling and
> > +representation in the kernel. Update/change when doing changes to the
> > +respective code.
> > +
> > +The architecture-agnostic topology definitions are in
> > +Documentation/cputopology.txt. This file holds x86-specific
> > +differences/specialities which must not necessarily apply to the generic
> > +definitions. Thus, the way to read up on Linux topology on x86 is to start
> > +with the generic one and look at this one in parallel for the x86 specifics.
> > +
> > +Needless to say, code should use the generic functions - this file is *only*
> > +here to *document* the inner workings of x86 topology.
> > +
> > +Started by Thomas Gleixner <tglx@linutronix.de> and Borislav Petkov <bp@alien8.de>.
> > +
> > +The main aim of the topology facilities is to present adequate interfaces to
> > +code which needs to know/query/use the structure of the running system wrt
> > +threads, cores, packages, etc.
> > +
> > +The kernel does not care about the concept of physical sockets because a
> > +socket has no relevance to software. It's an electromechanical component. In
> > +the past a socket always contained a single package (see below), but with the
> > +advent of Multi Chip Modules (MCM) a socket can hold more than one package. So
> > +there might be still references to sockets in the code, but they are of
> > +historical nature and should be cleaned up.
> > +
> > +The topology of a system is described in the units of:
> > +
> > + - packages
> > + - cores
> > + - threads
> > +
> > +Package
> > +=======
> > +
> > +Packages contain a number of cores plus shared resources, e.g. DRAM
> > +controller, shared caches etc.
> > +
> > +AMD nomenclature for package is 'Node'.
> > +
> > +Package-related topology information in the kernel:
> > +
> > + - cpuinfo_x86.x86_max_cores:
> > +
> > + The number of cores in a package. This information is retrieved via CPUID.
> > +
> > + - cpuinfo_x86.phys_proc_id:
> > +
> > + The physical ID of the package. This information is retrieved via CPUID
> > + and deduced from the APIC IDs of the cores in the package.
> > +
> > + - cpuinfo_x86.logical_id:
> > +
> > + The logical ID of the package. As we do not trust BIOSes to enumerate the
> > + packages in a consistent way, we introduced the concept of logical package
> > + ID so we can sanely calculate the number of maximum possible packages in
> > + the system and have the packages enumerated linearly.
> > +
> > + - topology_max_packages():
> > +
> > + The maximum possible number of packages in the system. Helpful for per
> > + package facilities to preallocate per package information.
> > +
> > + - cpu_llc_id:
> > +
> > + A per-CPU variable containing:
> > +
> > + - On Intel, the first APIC ID of the list of CPUs sharing the Last Level
> > + Cache.
> > +
> > + - On AMD, the Node ID or Core Complex ID containing the Last Level
> > + Cache. In general, it is a number identifying an LLC uniquely on the
> > + system.
> > +
> > +Cores
> > +=====
> > +
> > +A core consists of 1 or more threads. It does not matter whether the threads
> > +are SMT- or CMT-type threads.
> > +
> > +AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
> > +"core".
> > +
> > +Core-related topology information in the kernel:
> > +
> > + - smp_num_siblings:
> > +
> > + The number of threads in a core. The number of threads in a package can be
> > + calculated by::
> > +
> > + threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings
> > +
> > +
> > +Threads
> > +=======
> > +
> > +A thread is a single scheduling unit. It's the equivalent to a logical Linux
> > +CPU.
> > +
> > +AMDs nomenclature for CMT threads is "Compute Unit Core". The kernel always
> > +uses "thread".
> > +
> > +Thread-related topology information in the kernel:
> > +
> > + - topology_core_cpumask():
> > +
> > + The cpumask contains all online threads in the package to which a thread
> > + belongs.
> > +
> > + The number of online threads is also printed in /proc/cpuinfo "siblings."
> > +
> > + - topology_sibling_cpumask():
> > +
> > + The cpumask contains all online threads in the core to which a thread
> > + belongs.
> > +
> > + - topology_logical_package_id():
> > +
> > + The logical package ID to which a thread belongs.
> > +
> > + - topology_physical_package_id():
> > +
> > + The physical package ID to which a thread belongs.
> > +
> > + - topology_core_id();
> > +
> > + The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
> > + "core_id."
> > +
> > +
> > +
> > +System topology examples
> > +========================
> > +
> > +.. note:: The alternative Linux CPU enumeration depends on how the BIOS
> > + enumerates the threads. Many BIOSes enumerate all threads 0 first and
> > + then all threads 1. That has the "advantage" that the logical Linux CPU
> > + numbers of threads 0 stay the same whether threads are enabled or not.
> > + That's merely an implementation detail and has no practical impact.
> > +
> > +1) Single Package, Single Core
> > +::
>
> I would just place the :: on the above line. Same applies to similar
> cases on this file.
>
Sure.
> > +
> > + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
> > +
> > +2) Single Package, Dual Core
> > +
> > + a) One thread per core
> > + ::
> > +
> > + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
> > + -> [core 1] -> [thread 0] -> Linux CPU 1
>
> Something got broken here.
>
> > +
> > + b) Two threads per core
> > + ::
> > +
> > + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
> > + -> [thread 1] -> Linux CPU 1
> > + -> [core 1] -> [thread 0] -> Linux CPU 2
> > + -> [thread 1] -> Linux CPU 3
>
> And here... This one, for example, should be, instead:
>
> [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
> -> [thread 1] -> Linux CPU 1
> -> [core 1] -> [thread 0] -> Linux CPU 2
> -> [thread 1] -> Linux CPU 3
>
> Clearly there's something that it is messing with tabs on your
> x86 conversion.
>
Sorry for such mistake. I will check them one by one.
> I'll stop my review here, as it sounds pointless to review it,
> as there are too many broken whitespace stuff on your
> conversion.
>
> Thanks,
> Mauro
--
Cheers,
Changbin Du
^ permalink raw reply
* Re: [PATCH v10 15/18] dt-bindings: counter: ftm-quaddec
From: Rob Herring @ 2019-04-26 14:10 UTC (permalink / raw)
To: William Breathitt Gray
Cc: mark.rutland, benjamin.gaignard, linux-pwm, linux-iio,
patrick.havelange, thierry.reding, pmeerw, shawnguo, lars,
daniel.lezcano, linux-arm-kernel, devicetree, david, tglx,
fabrice.gasnier, esben, gregkh, linux-kernel, leoyang.li,
knaack.h, akpm, linuxppc-dev, jic23
In-Reply-To: <c852a16f5828a7e651e710dc95b66a5622a454d0.1554184734.git.vilhelm.gray@gmail.com>
On Tue, Apr 02, 2019 at 03:30:50PM +0900, William Breathitt Gray wrote:
> From: Patrick Havelange <patrick.havelange@essensium.com>
>
> FlexTimer quadrature decoder driver.
>
> Reviewed-by: Esben Haabendal <esben@haabendal.dk>
> Signed-off-by: Patrick Havelange <patrick.havelange@essensium.com>
> Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
> ---
> .../bindings/counter/ftm-quaddec.txt | 18 ++++++++++++++++++
> 1 file changed, 18 insertions(+)
> create mode 100644 Documentation/devicetree/bindings/counter/ftm-quaddec.txt
>
> diff --git a/Documentation/devicetree/bindings/counter/ftm-quaddec.txt b/Documentation/devicetree/bindings/counter/ftm-quaddec.txt
> new file mode 100644
> index 000000000000..4d18cd722074
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/counter/ftm-quaddec.txt
> @@ -0,0 +1,18 @@
> +FlexTimer Quadrature decoder counter
> +
> +This driver exposes a simple counter for the quadrature decoder mode.
> +
> +Required properties:
> +- compatible: Must be "fsl,ftm-quaddec".
This needs an SoC specific compatible.
> +- reg: Must be set to the memory region of the flextimer.
> +
> +Optional property:
> +- big-endian: Access the device registers in big-endian mode.
> +
> +Example:
> + counter0: counter@29d0000 {
> + compatible = "fsl,ftm-quaddec";
> + reg = <0x0 0x29d0000 0x0 0x10000>;
> + big-endian;
> + status = "disabled";
Don't show status in examples.
> + };
> --
> 2.21.0
>
^ permalink raw reply
* [PATCH 3/3] powerpc/powernv: remove dead NPU DMA code
From: Christoph Hellwig @ 2019-04-26 12:49 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <20190426124917.23789-1-hch@lst.de>
None of these routines were ever used since they were added to the
kernel.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/include/asm/book3s/64/mmu.h | 3 -
arch/powerpc/include/asm/powernv.h | 22 -
arch/powerpc/mm/mmu_context_book3s64.c | 2 -
arch/powerpc/platforms/powernv/npu-dma.c | 556 -----------------------
4 files changed, 583 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 1ceee000c18d..d35197d376fd 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -120,9 +120,6 @@ typedef struct {
/* Number of users of the external (Nest) MMU */
atomic_t copros;
- /* NPU NMMU context */
- struct npu_context *npu_context;
-
#ifdef CONFIG_PPC_MM_SLICES
/* SLB page size encodings*/
unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index 05b552418519..40f868c5e93c 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -11,35 +11,13 @@
#define _ASM_POWERNV_H
#ifdef CONFIG_PPC_POWERNV
-#define NPU2_WRITE 1
extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
-extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
- unsigned long flags,
- void (*cb)(struct npu_context *, void *),
- void *priv);
-extern void pnv_npu2_destroy_context(struct npu_context *context,
- struct pci_dev *gpdev);
-extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
- unsigned long *flags, unsigned long *status,
- int count);
void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val);
void pnv_tm_init(void);
#else
static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
-static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
- unsigned long flags,
- struct npu_context *(*cb)(struct npu_context *, void *),
- void *priv) { return ERR_PTR(-ENODEV); }
-static inline void pnv_npu2_destroy_context(struct npu_context *context,
- struct pci_dev *gpdev) { }
-
-static inline int pnv_npu2_handle_fault(struct npu_context *context,
- uintptr_t *ea, unsigned long *flags,
- unsigned long *status, int count) {
- return -ENODEV;
-}
static inline void pnv_tm_init(void) { }
#endif
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index f720c5cc0b5e..a5d45a57befa 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -117,8 +117,6 @@ static int radix__init_new_context(struct mm_struct *mm)
*/
asm volatile("ptesync;isync" : : : "memory");
- mm->context.npu_context = NULL;
-
return index;
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index dc23d9d2a7d9..5495d5b786aa 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -22,12 +22,6 @@
#include "pci.h"
-/*
- * spinlock to protect initialisation of an npu_context for a particular
- * mm_struct.
- */
-static DEFINE_SPINLOCK(npu_context_lock);
-
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
struct pci_dn *pdn = PCI_DN(dn);
@@ -362,15 +356,6 @@ struct npu_comp {
/* An NPU descriptor, valid for POWER9 only */
struct npu {
int index;
- __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
- unsigned int mmio_atsd_count;
-
- /* Bitmask for MMIO register usage */
- unsigned long mmio_atsd_usage;
-
- /* Do we need to explicitly flush the nest mmu? */
- bool nmmu_flush;
-
struct npu_comp npucomp;
};
@@ -627,534 +612,8 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
}
#endif /* CONFIG_IOMMU_API */
-/* Maximum number of nvlinks per npu */
-#define NV_MAX_LINKS 6
-
-/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
-static int max_npu2_index;
-
-struct npu_context {
- struct mm_struct *mm;
- struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
- struct mmu_notifier mn;
- struct kref kref;
- bool nmmu_flush;
-
- /* Callback to stop translation requests on a given GPU */
- void (*release_cb)(struct npu_context *context, void *priv);
-
- /*
- * Private pointer passed to the above callback for usage by
- * device drivers.
- */
- void *priv;
-};
-
-struct mmio_atsd_reg {
- struct npu *npu;
- int reg;
-};
-
-/*
- * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
- * if none are available.
- */
-static int get_mmio_atsd_reg(struct npu *npu)
-{
- int i;
-
- for (i = 0; i < npu->mmio_atsd_count; i++) {
- if (!test_bit(i, &npu->mmio_atsd_usage))
- if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
- return i;
- }
-
- return -ENOSPC;
-}
-
-static void put_mmio_atsd_reg(struct npu *npu, int reg)
-{
- clear_bit_unlock(reg, &npu->mmio_atsd_usage);
-}
-
-/* MMIO ATSD register offsets */
-#define XTS_ATSD_LAUNCH 0
-#define XTS_ATSD_AVA 1
-#define XTS_ATSD_STAT 2
-
-static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
-{
- unsigned long launch = 0;
-
- if (psize == MMU_PAGE_COUNT) {
- /* IS set to invalidate entire matching PID */
- launch |= PPC_BIT(12);
- } else {
- /* AP set to invalidate region of psize */
- launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
- }
-
- /* PRS set to process-scoped */
- launch |= PPC_BIT(13);
-
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
-
- /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
-
- return launch;
-}
-
-static void mmio_atsd_regs_write(struct mmio_atsd_reg
- mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
- unsigned long val)
-{
- struct npu *npu;
- int i, reg;
-
- for (i = 0; i <= max_npu2_index; i++) {
- reg = mmio_atsd_reg[i].reg;
- if (reg < 0)
- continue;
-
- npu = mmio_atsd_reg[i].npu;
- __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
- }
-}
-
-static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
- unsigned long pid)
-{
- unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
-
- /* Invalidating the entire process doesn't use a va */
- mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
-}
-
-static void mmio_invalidate_range(struct mmio_atsd_reg
- mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
- unsigned long start, unsigned long psize)
-{
- unsigned long launch = get_atsd_launch_val(pid, psize);
-
- /* Write all VAs first */
- mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
-
- /* Issue one barrier for all address writes */
- eieio();
-
- /* Launch */
- mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
-}
-
-#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
-
-static void mmio_invalidate_wait(
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
-{
- struct npu *npu;
- int i, reg;
-
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- }
-}
-
-/*
- * Acquires all the address translation shootdown (ATSD) registers required to
- * launch an ATSD on all links this npu_context is active on.
- */
-static void acquire_atsd_reg(struct npu_context *npu_context,
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
-{
- int i, j;
- struct npu *npu;
- struct pci_dev *npdev;
-
- for (i = 0; i <= max_npu2_index; i++) {
- mmio_atsd_reg[i].reg = -1;
- for (j = 0; j < NV_MAX_LINKS; j++) {
- /*
- * There are no ordering requirements with respect to
- * the setup of struct npu_context, but to ensure
- * consistent behaviour we need to ensure npdev[][] is
- * only read once.
- */
- npdev = READ_ONCE(npu_context->npdev[i][j]);
- if (!npdev)
- continue;
-
- npu = pci_bus_to_host(npdev->bus)->npu;
- if (!npu)
- continue;
-
- mmio_atsd_reg[i].npu = npu;
- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
- while (mmio_atsd_reg[i].reg < 0) {
- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
- cpu_relax();
- }
- break;
- }
- }
-}
-
-/*
- * Release previously acquired ATSD registers. To avoid deadlocks the registers
- * must be released in the same order they were acquired above in
- * acquire_atsd_reg.
- */
-static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
-{
- int i;
-
- for (i = 0; i <= max_npu2_index; i++) {
- /*
- * We can't rely on npu_context->npdev[][] being the same here
- * as when acquire_atsd_reg() was called, hence we use the
- * values stored in mmio_atsd_reg during the acquire phase
- * rather than re-reading npdev[][].
- */
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
- }
-}
-
-/*
- * Invalidate a virtual address range
- */
-static void mmio_invalidate(struct npu_context *npu_context,
- unsigned long start, unsigned long size)
-{
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
- unsigned long pid = npu_context->mm->context.id;
- unsigned long atsd_start = 0;
- unsigned long end = start + size - 1;
- int atsd_psize = MMU_PAGE_COUNT;
-
- /*
- * Convert the input range into one of the supported sizes. If the range
- * doesn't fit, use the next larger supported size. Invalidation latency
- * is high, so over-invalidation is preferred to issuing multiple
- * invalidates.
- *
- * A 4K page size isn't supported by NPU/GPU ATS, so that case is
- * ignored.
- */
- if (size == SZ_64K) {
- atsd_start = start;
- atsd_psize = MMU_PAGE_64K;
- } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
- atsd_start = ALIGN_DOWN(start, SZ_2M);
- atsd_psize = MMU_PAGE_2M;
- } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
- atsd_start = ALIGN_DOWN(start, SZ_1G);
- atsd_psize = MMU_PAGE_1G;
- }
-
- if (npu_context->nmmu_flush)
- /*
- * Unfortunately the nest mmu does not support flushing specific
- * addresses so we have to flush the whole mm once before
- * shooting down the GPU translation.
- */
- flush_all_mm(npu_context->mm);
-
- /*
- * Loop over all the NPUs this process is active on and launch
- * an invalidate.
- */
- acquire_atsd_reg(npu_context, mmio_atsd_reg);
-
- if (atsd_psize == MMU_PAGE_COUNT)
- mmio_invalidate_pid(mmio_atsd_reg, pid);
- else
- mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
- atsd_psize);
-
- mmio_invalidate_wait(mmio_atsd_reg);
-
- /*
- * The GPU requires two flush ATSDs to ensure all entries have been
- * flushed. We use PID 0 as it will never be used for a process on the
- * GPU.
- */
- mmio_invalidate_pid(mmio_atsd_reg, 0);
- mmio_invalidate_wait(mmio_atsd_reg);
- mmio_invalidate_pid(mmio_atsd_reg, 0);
- mmio_invalidate_wait(mmio_atsd_reg);
-
- release_atsd_reg(mmio_atsd_reg);
-}
-
-static void pnv_npu2_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct npu_context *npu_context = mn_to_npu_context(mn);
-
- /* Call into device driver to stop requests to the NMMU */
- if (npu_context->release_cb)
- npu_context->release_cb(npu_context, npu_context->priv);
-
- /*
- * There should be no more translation requests for this PID, but we
- * need to ensure any entries for it are removed from the TLB.
- */
- mmio_invalidate(npu_context, 0, ~0UL);
-}
-
-static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, start, end - start);
-}
-
-static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
- .release = pnv_npu2_mn_release,
- .invalidate_range = pnv_npu2_mn_invalidate_range,
-};
-
-/*
- * Call into OPAL to setup the nmmu context for the current task in
- * the NPU. This must be called to setup the context tables before the
- * GPU issues ATRs. pdev should be a pointed to PCIe GPU device.
- *
- * A release callback should be registered to allow a device driver to
- * be notified that it should not launch any new translation requests
- * as the final TLB invalidate is about to occur.
- *
- * Returns an error if there no contexts are currently available or a
- * npu_context which should be passed to pnv_npu2_handle_fault().
- *
- * mmap_sem must be held in write mode and must not be called from interrupt
- * context.
- */
-struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
- unsigned long flags,
- void (*cb)(struct npu_context *, void *),
- void *priv)
-{
- int rc;
- u32 nvlink_index;
- struct device_node *nvlink_dn;
- struct mm_struct *mm = current->mm;
- struct npu *npu;
- struct npu_context *npu_context;
- struct pci_controller *hose;
-
- /*
- * At present we don't support GPUs connected to multiple NPUs and I'm
- * not sure the hardware does either.
- */
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
-
- if (!npdev)
- /* No nvlink associated with this GPU device */
- return ERR_PTR(-ENODEV);
-
- /* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */
- if (flags & ~(MSR_DR | MSR_PR | MSR_HV))
- return ERR_PTR(-EINVAL);
-
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return ERR_PTR(-ENODEV);
-
- if (!mm || mm->context.id == 0) {
- /*
- * Kernel thread contexts are not supported and context id 0 is
- * reserved on the GPU.
- */
- return ERR_PTR(-EINVAL);
- }
-
- hose = pci_bus_to_host(npdev->bus);
- npu = hose->npu;
- if (!npu)
- return ERR_PTR(-ENODEV);
-
- /*
- * We store the npu pci device so we can more easily get at the
- * associated npus.
- */
- spin_lock(&npu_context_lock);
- npu_context = mm->context.npu_context;
- if (npu_context) {
- if (npu_context->release_cb != cb ||
- npu_context->priv != priv) {
- spin_unlock(&npu_context_lock);
- return ERR_PTR(-EINVAL);
- }
-
- WARN_ON(!kref_get_unless_zero(&npu_context->kref));
- }
- spin_unlock(&npu_context_lock);
-
- if (!npu_context) {
- /*
- * We can set up these fields without holding the
- * npu_context_lock as the npu_context hasn't been returned to
- * the caller meaning it can't be destroyed. Parallel allocation
- * is protected against by mmap_sem.
- */
- rc = -ENOMEM;
- npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
- if (npu_context) {
- kref_init(&npu_context->kref);
- npu_context->mm = mm;
- npu_context->mn.ops = &nv_nmmu_notifier_ops;
- rc = __mmu_notifier_register(&npu_context->mn, mm);
- }
-
- if (rc) {
- kfree(npu_context);
- return ERR_PTR(rc);
- }
-
- mm->context.npu_context = npu_context;
- }
-
- npu_context->release_cb = cb;
- npu_context->priv = priv;
-
- /*
- * npdev is a pci_dev pointer setup by the PCI code. We assign it to
- * npdev[][] to indicate to the mmu notifiers that an invalidation
- * should also be sent over this nvlink. The notifiers don't use any
- * other fields in npu_context, so we just need to ensure that when they
- * deference npu_context->npdev[][] it is either a valid pointer or
- * NULL.
- */
- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
-
- if (!npu->nmmu_flush) {
- /*
- * If we're not explicitly flushing ourselves we need to mark
- * the thread for global flushes
- */
- npu_context->nmmu_flush = false;
- mm_context_add_copro(mm);
- } else
- npu_context->nmmu_flush = true;
-
- return npu_context;
-}
-EXPORT_SYMBOL(pnv_npu2_init_context);
-
-static void pnv_npu2_release_context(struct kref *kref)
-{
- struct npu_context *npu_context =
- container_of(kref, struct npu_context, kref);
-
- if (!npu_context->nmmu_flush)
- mm_context_remove_copro(npu_context->mm);
-
- npu_context->mm->context.npu_context = NULL;
-}
-
-/*
- * Destroy a context on the given GPU. May free the npu_context if it is no
- * longer active on any GPUs. Must not be called from interrupt context.
- */
-void pnv_npu2_destroy_context(struct npu_context *npu_context,
- struct pci_dev *gpdev)
-{
- int removed;
- struct npu *npu;
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- struct device_node *nvlink_dn;
- u32 nvlink_index;
- struct pci_controller *hose;
-
- if (WARN_ON(!npdev))
- return;
-
- hose = pci_bus_to_host(npdev->bus);
- npu = hose->npu;
- if (!npu)
- return;
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return;
- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
- spin_lock(&npu_context_lock);
- removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
- spin_unlock(&npu_context_lock);
-
- /*
- * We need to do this outside of pnv_npu2_release_context so that it is
- * outside the spinlock as mmu_notifier_destroy uses SRCU.
- */
- if (removed) {
- mmu_notifier_unregister(&npu_context->mn,
- npu_context->mm);
-
- kfree(npu_context);
- }
-
-}
-EXPORT_SYMBOL(pnv_npu2_destroy_context);
-
-/*
- * Assumes mmap_sem is held for the contexts associated mm.
- */
-int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
- unsigned long *flags, unsigned long *status, int count)
-{
- u64 rc = 0, result = 0;
- int i, is_write;
- struct page *page[1];
- const char __user *u;
- char c;
-
- /* mmap_sem should be held so the struct_mm must be present */
- struct mm_struct *mm = context->mm;
-
- WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
-
- for (i = 0; i < count; i++) {
- is_write = flags[i] & NPU2_WRITE;
- rc = get_user_pages_remote(NULL, mm, ea[i], 1,
- is_write ? FOLL_WRITE : 0,
- page, NULL, NULL);
-
- if (rc != 1) {
- status[i] = rc;
- result = -EFAULT;
- continue;
- }
-
- /* Make sure partition scoped tree gets a pte */
- u = page_address(page[0]);
- if (__get_user(c, u))
- result = -EFAULT;
-
- status[i] = 0;
- put_page(page[0]);
- }
-
- return result;
-}
-EXPORT_SYMBOL(pnv_npu2_handle_fault);
-
int pnv_npu2_init(struct pci_controller *hose)
{
- unsigned int i;
- u64 mmio_atsd;
static int npu_index;
struct npu *npu;
int ret;
@@ -1163,33 +622,18 @@ int pnv_npu2_init(struct pci_controller *hose)
if (!npu)
return -ENOMEM;
- npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush");
-
- for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) &&
- !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
- i, &mmio_atsd); i++)
- npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
-
- pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i);
- npu->mmio_atsd_count = i;
- npu->mmio_atsd_usage = 0;
npu_index++;
if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
ret = -ENOSPC;
goto fail_exit;
}
- max_npu2_index = npu_index;
npu->index = npu_index;
hose->npu = npu;
return 0;
fail_exit:
- for (i = 0; i < npu->mmio_atsd_count; ++i)
- iounmap(npu->mmio_atsd_regs[i]);
-
kfree(npu);
-
return ret;
}
--
2.20.1
^ permalink raw reply related
* [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function
From: Christoph Hellwig @ 2019-04-26 12:49 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <20190426124917.23789-1-hch@lst.de>
This function has never been used since it has been added to the tree.
We also now have proper PCIe P2P APIs in the core kernel, and any new
P2P support should be using those.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/include/asm/opal.h | 2 -
arch/powerpc/include/asm/pnv-pci.h | 2 -
arch/powerpc/platforms/powernv/opal-call.c | 1 -
arch/powerpc/platforms/powernv/pci.c | 74 ----------------------
arch/powerpc/platforms/powernv/pci.h | 5 --
5 files changed, 84 deletions(-)
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a55b01c90bb1..16c67f4eeb71 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -279,8 +279,6 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
-int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
- uint64_t desc, uint16_t pe_number);
int64_t opal_imc_counters_init(uint32_t type, uint64_t address,
uint64_t cpu_pir);
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 630eb8b1b7ed..9fcb0bc462c6 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -26,8 +26,6 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state);
extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state);
extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
struct opal_msg *msg);
-extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
- u64 desc);
extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index daad8c45c8e7..a89f0e31b468 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -267,7 +267,6 @@ OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
-OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index ef9448a907c6..8d28f2932c3b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -38,7 +38,6 @@
#include "powernv.h"
#include "pci.h"
-static DEFINE_MUTEX(p2p_mutex);
static DEFINE_MUTEX(tunnel_mutex);
int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
@@ -861,79 +860,6 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus)
}
}
-int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb_init, *phb_target;
- struct pnv_ioda_pe *pe_init;
- int rc;
-
- if (!opal_check_token(OPAL_PCI_SET_P2P))
- return -ENXIO;
-
- hose = pci_bus_to_host(initiator->bus);
- phb_init = hose->private_data;
-
- hose = pci_bus_to_host(target->bus);
- phb_target = hose->private_data;
-
- pe_init = pnv_ioda_get_pe(initiator);
- if (!pe_init)
- return -ENODEV;
-
- /*
- * Configuring the initiator's PHB requires to adjust its
- * TVE#1 setting. Since the same device can be an initiator
- * several times for different target devices, we need to keep
- * a reference count to know when we can restore the default
- * bypass setting on its TVE#1 when disabling. Opal is not
- * tracking PE states, so we add a reference count on the PE
- * in linux.
- *
- * For the target, the configuration is per PHB, so we keep a
- * target reference count on the PHB.
- */
- mutex_lock(&p2p_mutex);
-
- if (desc & OPAL_PCI_P2P_ENABLE) {
- /* always go to opal to validate the configuration */
- rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
- desc, pe_init->pe_number);
-
- if (rc != OPAL_SUCCESS) {
- rc = -EIO;
- goto out;
- }
-
- pe_init->p2p_initiator_count++;
- phb_target->p2p_target_count++;
- } else {
- if (!pe_init->p2p_initiator_count ||
- !phb_target->p2p_target_count) {
- rc = -EINVAL;
- goto out;
- }
-
- if (--pe_init->p2p_initiator_count == 0)
- pnv_pci_ioda2_set_bypass(pe_init, true);
-
- if (--phb_target->p2p_target_count == 0) {
- rc = opal_pci_set_p2p(phb_init->opal_id,
- phb_target->opal_id, desc,
- pe_init->pe_number);
- if (rc != OPAL_SUCCESS) {
- rc = -EIO;
- goto out;
- }
- }
- }
- rc = 0;
-out:
- mutex_unlock(&p2p_mutex);
- return rc;
-}
-EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
-
struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8e36da379252..fbec1c76d7a7 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -78,9 +78,6 @@ struct pnv_ioda_pe {
struct pnv_ioda_pe *master;
struct list_head slaves;
- /* PCI peer-to-peer*/
- int p2p_initiator_count;
-
/* Link in list of PE#s */
struct list_head list;
};
@@ -171,8 +168,6 @@ struct pnv_phb {
/* PHB and hub diagnostics */
unsigned int diag_data_size;
u8 *diag_data;
-
- int p2p_target_count;
};
extern struct pci_ops pnv_pci_ops;
--
2.20.1
^ permalink raw reply related
* remove dead powernv code
From: Christoph Hellwig @ 2019-04-26 12:49 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman; +Cc: linuxppc-dev
Hi all,
the powerpc powernv port has a fairly large chunk of code that never
had any upstream user. We generally strive to not keep dead code
around, and this was affirmed at least years Maintainer summit.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox