* [PATCH v8 11/12] mm/vmalloc: Hugepage vmalloc mappings
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Zefan Li, Jonathan Cameron, linuxppc-dev
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
Support huge page vmalloc mappings. Config option HAVE_ARCH_HUGE_VMALLOC
enables support on architectures that define HAVE_ARCH_HUGE_VMAP and
supports PMD sized vmap mappings.
vmalloc will attempt to allocate PMD-sized pages if allocating PMD size
or larger, and fall back to small pages if that was unsuccessful.
Allocations that do not use PAGE_KERNEL prot are not permitted to use
huge pages, because not all callers expect this (e.g., module
allocations vs strict module rwx).
When hugepage vmalloc mappings are enabled in the next patch, this
reduces TLB misses by nearly 30x on a `git diff` workload on a 2-node
POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%.
This can result in more internal fragmentation and memory overhead for a
given allocation, an option nohugevmalloc is added to disable at boot.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 4 +
include/linux/vmalloc.h | 1 +
mm/page_alloc.c | 5 +-
mm/vmalloc.c | 190 ++++++++++++++++++++++++++++++----------
4 files changed, 154 insertions(+), 46 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..94f5093fa6f5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -662,6 +662,10 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
config HAVE_ARCH_HUGE_VMAP
bool
+config HAVE_ARCH_HUGE_VMALLOC
+ depends on HAVE_ARCH_HUGE_VMAP
+ bool
+
config ARCH_WANT_HUGE_PMD_SHARE
bool
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index a5ae791dc1e0..72b106c2b4da 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -59,6 +59,7 @@ struct vm_struct {
unsigned long size;
unsigned long flags;
struct page **pages;
+ unsigned int page_order;
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eaa227a479e4..35bdc411bd49 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -70,6 +70,7 @@
#include <linux/psi.h>
#include <linux/padata.h>
#include <linux/khugepaged.h>
+#include <linux/vmalloc.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -8171,6 +8172,7 @@ void *__init alloc_large_system_hash(const char *tablename,
void *table = NULL;
gfp_t gfp_flags;
bool virt;
+ bool huge;
/* allow the kernel cmdline to have a say */
if (!numentries) {
@@ -8238,6 +8240,7 @@ void *__init alloc_large_system_hash(const char *tablename,
} else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags);
virt = true;
+ huge = (find_vm_area(table)->page_order > 0);
} else {
/*
* If bucketsize is not a power-of-two, we may free
@@ -8254,7 +8257,7 @@ void *__init alloc_large_system_hash(const char *tablename,
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
- virt ? "vmalloc" : "linear");
+ virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
*_hash_shift = log2qty;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ee9c3bee67f5..f650b26a0e83 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -42,6 +42,19 @@
#include "internal.h"
#include "pgalloc-track.h"
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
+static bool __ro_after_init vmap_allow_huge = true;
+
+static int __init set_nohugevmalloc(char *str)
+{
+ vmap_allow_huge = false;
+ return 0;
+}
+early_param("nohugevmalloc", set_nohugevmalloc);
+#else /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
+static const bool vmap_allow_huge = false;
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
+
bool is_vmalloc_addr(const void *x)
{
unsigned long addr = (unsigned long)x;
@@ -477,31 +490,12 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
return 0;
}
-/**
- * map_kernel_range_noflush - map kernel VM area with the specified pages
- * @addr: start of the VM area to map
- * @size: size of the VM area to map
- * @prot: page protection flags to use
- * @pages: pages to map
- *
- * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should
- * have been allocated using get_vm_area() and its friends.
- *
- * NOTE:
- * This function does NOT do any cache flushing. The caller is responsible for
- * calling flush_cache_vmap() on to-be-mapped areas before calling this
- * function.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int map_kernel_range_noflush(unsigned long addr, unsigned long size,
- pgprot_t prot, struct page **pages)
+static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages)
{
unsigned long start = addr;
- unsigned long end = addr + size;
- unsigned long next;
pgd_t *pgd;
+ unsigned long next;
int err = 0;
int nr = 0;
pgtbl_mod_mask mask = 0;
@@ -523,6 +517,65 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size,
return 0;
}
+static int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int page_shift)
+{
+ unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
+
+ WARN_ON(page_shift < PAGE_SHIFT);
+
+ if (page_shift == PAGE_SHIFT)
+ return vmap_small_pages_range_noflush(addr, end, prot, pages);
+
+ for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
+ int err;
+
+ err = vmap_range_noflush(addr, addr + (1UL << page_shift),
+ __pa(page_address(pages[i])), prot,
+ page_shift);
+ if (err)
+ return err;
+
+ addr += 1UL << page_shift;
+ }
+
+ return 0;
+}
+
+static int vmap_pages_range(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int page_shift)
+{
+ int err;
+
+ err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
+ flush_cache_vmap(addr, end);
+ return err;
+}
+
+/**
+ * map_kernel_range_noflush - map kernel VM area with the specified pages
+ * @addr: start of the VM area to map
+ * @size: size of the VM area to map
+ * @prot: page protection flags to use
+ * @pages: pages to map
+ *
+ * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should
+ * have been allocated using get_vm_area() and its friends.
+ *
+ * NOTE:
+ * This function does NOT do any cache flushing. The caller is responsible for
+ * calling flush_cache_vmap() on to-be-mapped areas before calling this
+ * function.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int map_kernel_range_noflush(unsigned long addr, unsigned long size,
+ pgprot_t prot, struct page **pages)
+{
+ return vmap_pages_range_noflush(addr, addr + size, prot, pages, PAGE_SHIFT);
+}
+
int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
struct page **pages)
{
@@ -2400,6 +2453,7 @@ static inline void set_area_direct_map(const struct vm_struct *area,
{
int i;
+ /* HUGE_VMALLOC passes small pages to set_direct_map */
for (i = 0; i < area->nr_pages; i++)
if (page_address(area->pages[i]))
set_direct_map(area->pages[i]);
@@ -2433,11 +2487,12 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
* map. Find the start and end range of the direct mappings to make sure
* the vm_unmap_aliases() flush includes the direct map.
*/
- for (i = 0; i < area->nr_pages; i++) {
+ for (i = 0; i < area->nr_pages; i += 1U << area->page_order) {
unsigned long addr = (unsigned long)page_address(area->pages[i]);
if (addr) {
+ unsigned long page_size = PAGE_SIZE << area->page_order;
start = min(addr, start);
- end = max(addr + PAGE_SIZE, end);
+ end = max(addr + page_size, end);
flush_dmap = 1;
}
}
@@ -2480,11 +2535,11 @@ static void __vunmap(const void *addr, int deallocate_pages)
if (deallocate_pages) {
int i;
- for (i = 0; i < area->nr_pages; i++) {
+ for (i = 0; i < area->nr_pages; i += 1U << area->page_order) {
struct page *page = area->pages[i];
BUG_ON(!page);
- __free_pages(page, 0);
+ __free_pages(page, area->page_order);
}
atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
@@ -2674,12 +2729,17 @@ EXPORT_SYMBOL_GPL(vmap_pfn);
#endif /* CONFIG_VMAP_PFN */
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
- pgprot_t prot, int node)
+ pgprot_t prot, unsigned int page_shift,
+ int node)
{
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
- unsigned int array_size = nr_pages * sizeof(struct page *), i;
+ unsigned int page_order = page_shift - PAGE_SHIFT;
+ unsigned long addr = (unsigned long)area->addr;
+ unsigned long size = get_vm_area_size(area);
+ unsigned int nr_small_pages = size >> PAGE_SHIFT;
+ unsigned int array_size = nr_small_pages * sizeof(struct page *);
struct page **pages;
+ unsigned int i;
gfp_mask |= __GFP_NOWARN;
if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
@@ -2700,30 +2760,35 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
}
area->pages = pages;
- area->nr_pages = nr_pages;
+ area->nr_pages = nr_small_pages;
+ area->page_order = page_order;
- for (i = 0; i < area->nr_pages; i++) {
+ /*
+ * Careful, we allocate and map page_order pages, but tracking is done
+ * per PAGE_SIZE page so as to keep the vm_struct APIs independent of
+ * the physical/mapped size.
+ */
+ for (i = 0; i < area->nr_pages; i += 1U << page_order) {
struct page *page;
+ int p;
- if (node == NUMA_NO_NODE)
- page = alloc_page(gfp_mask);
- else
- page = alloc_pages_node(node, gfp_mask, 0);
-
+ page = alloc_pages_node(node, gfp_mask, page_order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vfree() */
area->nr_pages = i;
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
goto fail;
}
- area->pages[i] = page;
+
+ for (p = 0; p < (1U << page_order); p++)
+ area->pages[i + p] = page + p;
+
if (gfpflags_allow_blocking(gfp_mask))
cond_resched();
}
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
- prot, pages) < 0)
+ if (vmap_pages_range(addr, addr + size, prot, pages, page_shift) < 0)
goto fail;
return area->addr;
@@ -2731,7 +2796,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
- (area->nr_pages*PAGE_SIZE), area->size);
+ (area->nr_pages*PAGE_SIZE), size);
__vfree(area->addr);
return NULL;
}
@@ -2762,19 +2827,43 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
+ unsigned long real_align = align;
+ unsigned int shift = PAGE_SHIFT;
- size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
- area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
+ if (vmap_allow_huge && arch_vmap_pmd_supported(prot) &&
+ (pgprot_val(prot) == pgprot_val(PAGE_KERNEL))) {
+ unsigned long size_per_node;
+
+ /*
+ * Try huge pages. Only try for PAGE_KERNEL allocations,
+ * others like modules don't yet expect huge pages in
+ * their allocations due to apply_to_page_range not
+ * supporting them.
+ */
+
+ size_per_node = size;
+ if (node == NUMA_NO_NODE)
+ size_per_node /= num_online_nodes();
+ if (size_per_node >= PMD_SIZE) {
+ shift = PMD_SHIFT;
+ align = max(real_align, 1UL << shift);
+ size = ALIGN(real_size, 1UL << shift);
+ }
+ }
+
+again:
+ size = PAGE_ALIGN(size);
+ area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
- addr = __vmalloc_area_node(area, gfp_mask, prot, node);
+ addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
if (!addr)
- return NULL;
+ goto fail;
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
@@ -2788,8 +2877,19 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return addr;
fail:
- warn_alloc(gfp_mask, NULL,
+ if (shift > PAGE_SHIFT) {
+ free_vm_area(area);
+ shift = PAGE_SHIFT;
+ align = real_align;
+ size = real_size;
+ goto again;
+ }
+
+ if (!area) {
+ /* Warn for area allocation, page allocations already warn */
+ warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure: %lu bytes", real_size);
+ }
return NULL;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v8 08/12] x86: inline huge vmap supported functions
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, H. Peter Anvin, x86, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Zefan Li, Borislav Petkov, Jonathan Cameron,
Thomas Gleixner, linuxppc-dev, Ingo Molnar
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
This allows unsupported levels to be constant folded away, and so
p4d_free_pud_page can be removed because it's no longer linked to.
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/x86/include/asm/vmalloc.h | 22 +++++++++++++++++++---
arch/x86/mm/ioremap.c | 19 -------------------
arch/x86/mm/pgtable.c | 13 -------------
3 files changed, 19 insertions(+), 35 deletions(-)
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
index 094ea2b565f3..e714b00fc0ca 100644
--- a/arch/x86/include/asm/vmalloc.h
+++ b/arch/x86/include/asm/vmalloc.h
@@ -1,13 +1,29 @@
#ifndef _ASM_X86_VMALLOC_H
#define _ASM_X86_VMALLOC_H
+#include <asm/cpufeature.h>
#include <asm/page.h>
#include <asm/pgtable_areas.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-bool arch_vmap_p4d_supported(pgprot_t prot);
-bool arch_vmap_pud_supported(pgprot_t prot);
-bool arch_vmap_pmd_supported(pgprot_t prot);
+static inline bool arch_vmap_p4d_supported(pgprot_t prot)
+{
+ return false;
+}
+
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+#ifdef CONFIG_X86_64
+ return boot_cpu_has(X86_FEATURE_GBPAGES);
+#else
+ return false;
+#endif
+}
+
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return boot_cpu_has(X86_FEATURE_PSE);
+}
#endif
#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 762b5ff4edad..12c686c65ea9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -481,25 +481,6 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
-
-bool arch_vmap_pud_supported(pgprot_t prot)
-{
-#ifdef CONFIG_X86_64
- return boot_cpu_has(X86_FEATURE_GBPAGES);
-#else
- return false;
-#endif
-}
-
-bool arch_vmap_pmd_supported(pgprot_t prot)
-{
- return boot_cpu_has(X86_FEATURE_PSE);
-}
-
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index dfd82f51ba66..801c418ee97d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -780,14 +780,6 @@ int pmd_clear_huge(pmd_t *pmd)
return 0;
}
-/*
- * Until we support 512GB pages, skip them in the vmap area.
- */
-int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
-{
- return 0;
-}
-
#ifdef CONFIG_X86_64
/**
* pud_free_pmd_page - Clear pud entry and free pmd page.
@@ -859,11 +851,6 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
#else /* !CONFIG_X86_64 */
-int pud_free_pmd_page(pud_t *pud, unsigned long addr)
-{
- return pud_none(*pud);
-}
-
/*
* Disable free page handling on x86-PAE. This assures that ioremap()
* does not update sync'd pmd entries. See vmalloc_sync_one().
--
2.23.0
^ permalink raw reply related
* [PATCH v8 10/12] mm/vmalloc: add vmap_range_noflush variant
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Zefan Li, Jonathan Cameron, linuxppc-dev
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
As a side-effect, the order of flush_cache_vmap() and
arch_sync_kernel_mappings() calls are switched, but that now matches
the other callers in this file.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/vmalloc.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2f236aeeac24..ee9c3bee67f5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -235,7 +235,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
return 0;
}
-int vmap_range(unsigned long addr, unsigned long end,
+static int vmap_range_noflush(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
unsigned int max_page_shift)
{
@@ -257,14 +257,24 @@ int vmap_range(unsigned long addr, unsigned long end,
break;
} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
- flush_cache_vmap(start, end);
-
if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
arch_sync_kernel_mappings(start, end);
return err;
}
+int vmap_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ int err;
+
+ err = vmap_range_noflush(addr, end, phys_addr, prot, max_page_shift);
+ flush_cache_vmap(addr, end);
+
+ return err;
+}
+
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{
--
2.23.0
^ permalink raw reply related
* [PATCH v8 05/12] mm: HUGE_VMAP arch support cleanup
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, H. Peter Anvin, Will Deacon, Catalin Marinas, x86,
linux-kernel, Nicholas Piggin, Christoph Hellwig, Zefan Li,
Borislav Petkov, Jonathan Cameron, Thomas Gleixner, linuxppc-dev,
Ingo Molnar, linux-arm-kernel
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
This changes the awkward approach where architectures provide init
functions to determine which levels they can provide large mappings for,
to one where the arch is queried for each call.
This removes code and indirection, and allows constant-folding of dead
code for unsupported levels.
This also adds a prot argument to the arch query. This is unused
currently but could help with some architectures (e.g., some powerpc
processors can't map uncacheable memory with large pages).
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/arm64/include/asm/vmalloc.h | 8 +++
arch/arm64/mm/mmu.c | 10 +--
arch/powerpc/include/asm/vmalloc.h | 8 +++
arch/powerpc/mm/book3s64/radix_pgtable.c | 8 +--
arch/x86/include/asm/vmalloc.h | 7 ++
arch/x86/mm/ioremap.c | 10 +--
include/linux/io.h | 9 ---
include/linux/vmalloc.h | 6 ++
init/main.c | 1 -
mm/ioremap.c | 88 +++++++++---------------
10 files changed, 77 insertions(+), 78 deletions(-)
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 2ca708ab9b20..597b40405319 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -1,4 +1,12 @@
#ifndef _ASM_ARM64_VMALLOC_H
#define _ASM_ARM64_VMALLOC_H
+#include <asm/page.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+bool arch_vmap_p4d_supported(pgprot_t prot);
+bool arch_vmap_pud_supported(pgprot_t prot);
+bool arch_vmap_pmd_supported(pgprot_t prot);
+#endif
+
#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ca692a815731..1b60079c1cef 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1315,12 +1315,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
return dt_virt;
}
-int __init arch_ioremap_p4d_supported(void)
+bool arch_vmap_p4d_supported(pgprot_t prot)
{
- return 0;
+ return false;
}
-int __init arch_ioremap_pud_supported(void)
+bool arch_vmap_pud_supported(pgprot_t prot);
{
/*
* Only 4k granule supports level 1 block mappings.
@@ -1330,9 +1330,9 @@ int __init arch_ioremap_pud_supported(void)
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
-int __init arch_ioremap_pmd_supported(void)
+bool arch_vmap_pmd_supported(pgprot_t prot)
{
- /* See arch_ioremap_pud_supported() */
+ /* See arch_vmap_pud_supported() */
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
index b992dfaaa161..105abb73f075 100644
--- a/arch/powerpc/include/asm/vmalloc.h
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -1,4 +1,12 @@
#ifndef _ASM_POWERPC_VMALLOC_H
#define _ASM_POWERPC_VMALLOC_H
+#include <asm/page.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+bool arch_vmap_p4d_supported(pgprot_t prot);
+bool arch_vmap_pud_supported(pgprot_t prot);
+bool arch_vmap_pmd_supported(pgprot_t prot);
+#endif
+
#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 3adcf730f478..ab426fc0cd4b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1121,13 +1121,13 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
}
-int __init arch_ioremap_pud_supported(void)
+bool arch_vmap_pud_supported(pgprot_t prot)
{
/* HPT does not cope with large pages in the vmalloc area */
return radix_enabled();
}
-int __init arch_ioremap_pmd_supported(void)
+bool arch_vmap_pmd_supported(pgprot_t prot)
{
return radix_enabled();
}
@@ -1221,7 +1221,7 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
return 1;
}
-int __init arch_ioremap_p4d_supported(void)
+bool arch_vmap_p4d_supported(pgprot_t prot)
{
- return 0;
+ return false;
}
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
index 29837740b520..094ea2b565f3 100644
--- a/arch/x86/include/asm/vmalloc.h
+++ b/arch/x86/include/asm/vmalloc.h
@@ -1,6 +1,13 @@
#ifndef _ASM_X86_VMALLOC_H
#define _ASM_X86_VMALLOC_H
+#include <asm/page.h>
#include <asm/pgtable_areas.h>
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+bool arch_vmap_p4d_supported(pgprot_t prot);
+bool arch_vmap_pud_supported(pgprot_t prot);
+bool arch_vmap_pmd_supported(pgprot_t prot);
+#endif
+
#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 9e5ccc56f8e0..762b5ff4edad 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -481,21 +481,21 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int __init arch_ioremap_p4d_supported(void)
+bool arch_vmap_p4d_supported(pgprot_t prot)
{
- return 0;
+ return false;
}
-int __init arch_ioremap_pud_supported(void)
+bool arch_vmap_pud_supported(pgprot_t prot)
{
#ifdef CONFIG_X86_64
return boot_cpu_has(X86_FEATURE_GBPAGES);
#else
- return 0;
+ return false;
#endif
}
-int __init arch_ioremap_pmd_supported(void)
+bool arch_vmap_pmd_supported(pgprot_t prot)
{
return boot_cpu_has(X86_FEATURE_PSE);
}
diff --git a/include/linux/io.h b/include/linux/io.h
index 8394c56babc2..f1effd4d7a3c 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -31,15 +31,6 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
}
#endif
-#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-void __init ioremap_huge_init(void);
-int arch_ioremap_p4d_supported(void);
-int arch_ioremap_pud_supported(void);
-int arch_ioremap_pmd_supported(void);
-#else
-static inline void ioremap_huge_init(void) { }
-#endif
-
/*
* Managed iomap interface
*/
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 938eaf9517e2..b3218ba0904d 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -85,6 +85,12 @@ struct vmap_area {
};
};
+#ifndef CONFIG_HAVE_ARCH_HUGE_VMAP
+static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; }
+static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; }
+static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; }
+#endif
+
/*
* Highlevel APIs for driver use
*/
diff --git a/init/main.c b/init/main.c
index 20baced721ad..5bd2f4f41d30 100644
--- a/init/main.c
+++ b/init/main.c
@@ -833,7 +833,6 @@ static void __init mm_init(void)
pgtable_init();
debug_objects_mem_init();
vmalloc_init();
- ioremap_huge_init();
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 3f4d36f9745a..c67f91164401 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -16,49 +16,16 @@
#include "pgalloc-track.h"
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static int __read_mostly ioremap_p4d_capable;
-static int __read_mostly ioremap_pud_capable;
-static int __read_mostly ioremap_pmd_capable;
-static int __read_mostly ioremap_huge_disabled;
+static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
static int __init set_nohugeiomap(char *str)
{
- ioremap_huge_disabled = 1;
+ iomap_max_page_shift = P4D_SHIFT;
return 0;
}
early_param("nohugeiomap", set_nohugeiomap);
-
-void __init ioremap_huge_init(void)
-{
- if (!ioremap_huge_disabled) {
- if (arch_ioremap_p4d_supported())
- ioremap_p4d_capable = 1;
- if (arch_ioremap_pud_supported())
- ioremap_pud_capable = 1;
- if (arch_ioremap_pmd_supported())
- ioremap_pmd_capable = 1;
- }
-}
-
-static inline int ioremap_p4d_enabled(void)
-{
- return ioremap_p4d_capable;
-}
-
-static inline int ioremap_pud_enabled(void)
-{
- return ioremap_pud_capable;
-}
-
-static inline int ioremap_pmd_enabled(void)
-{
- return ioremap_pmd_capable;
-}
-
-#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
-static inline int ioremap_p4d_enabled(void) { return 0; }
-static inline int ioremap_pud_enabled(void) { return 0; }
-static inline int ioremap_pmd_enabled(void) { return 0; }
+#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+static const bool iomap_max_page_shift = PAGE_SHIFT;
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -82,9 +49,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
}
static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
- if (!ioremap_pmd_enabled())
+ if (max_page_shift < PMD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pmd_supported(prot))
return 0;
if ((end - addr) != PMD_SIZE)
@@ -104,7 +75,7 @@ static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
pmd_t *pmd;
unsigned long next;
@@ -115,7 +86,7 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
do {
next = pmd_addr_end(addr, end);
- if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot, max_page_shift)) {
*mask |= PGTBL_PMD_MODIFIED;
continue;
}
@@ -127,9 +98,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
}
static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
- if (!ioremap_pud_enabled())
+ if (max_page_shift < PUD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pud_supported(prot))
return 0;
if ((end - addr) != PUD_SIZE)
@@ -149,7 +124,7 @@ static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
pud_t *pud;
unsigned long next;
@@ -160,21 +135,25 @@ static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
do {
next = pud_addr_end(addr, end);
- if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot, max_page_shift)) {
*mask |= PGTBL_PUD_MODIFIED;
continue;
}
- if (vmap_pmd_range(pud, addr, next, phys_addr, prot, mask))
+ if (vmap_pmd_range(pud, addr, next, phys_addr, prot, max_page_shift, mask))
return -ENOMEM;
} while (pud++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
}
static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
- if (!ioremap_p4d_enabled())
+ if (max_page_shift < P4D_SHIFT)
+ return 0;
+
+ if (!arch_vmap_p4d_supported(prot))
return 0;
if ((end - addr) != P4D_SIZE)
@@ -194,7 +173,7 @@ static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
p4d_t *p4d;
unsigned long next;
@@ -205,19 +184,20 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
do {
next = p4d_addr_end(addr, end);
- if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot, max_page_shift)) {
*mask |= PGTBL_P4D_MODIFIED;
continue;
}
- if (vmap_pud_range(p4d, addr, next, phys_addr, prot, mask))
+ if (vmap_pud_range(p4d, addr, next, phys_addr, prot, max_page_shift, mask))
return -ENOMEM;
} while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
}
static int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
pgd_t *pgd;
unsigned long start;
@@ -232,7 +212,7 @@ static int vmap_range(unsigned long addr, unsigned long end,
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
- err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, &mask);
+ err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, max_page_shift, &mask);
if (err)
break;
} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
@@ -248,7 +228,7 @@ static int vmap_range(unsigned long addr, unsigned long end,
int ioremap_page_range(unsigned long addr,
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
- return vmap_range(addr, end, phys_addr, prot);
+ return vmap_range(addr, end, phys_addr, prot, iomap_max_page_shift);
}
#ifdef CONFIG_GENERIC_IOREMAP
--
2.23.0
^ permalink raw reply related
* [PATCH v8 02/12] mm: apply_to_pte_range warn and fail if a large pte is encountered
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Zefan Li, Jonathan Cameron, linuxppc-dev
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
apply_to_pte_range might mistake a large pte for bad, or treat it as a
page table, resulting in a crash or corruption. Add a test to warn and
return error if large entries are found.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/memory.c | 66 +++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 49 insertions(+), 17 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index c48f8df6e502..3d0f0bc5d573 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2429,13 +2429,21 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
}
do {
next = pmd_addr_end(addr, end);
- if (create || !pmd_none_or_clear_bad(pmd)) {
- err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
- create, mask);
- if (err)
- break;
+ if (pmd_none(*pmd) && !create)
+ continue;
+ if (WARN_ON_ONCE(pmd_leaf(*pmd)))
+ return -EINVAL;
+ if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) {
+ if (!create)
+ continue;
+ pmd_clear_bad(pmd);
}
+ err = apply_to_pte_range(mm, pmd, addr, next,
+ fn, data, create, mask);
+ if (err)
+ break;
} while (pmd++, addr = next, addr != end);
+
return err;
}
@@ -2457,13 +2465,21 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
}
do {
next = pud_addr_end(addr, end);
- if (create || !pud_none_or_clear_bad(pud)) {
- err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
- create, mask);
- if (err)
- break;
+ if (pud_none(*pud) && !create)
+ continue;
+ if (WARN_ON_ONCE(pud_leaf(*pud)))
+ return -EINVAL;
+ if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) {
+ if (!create)
+ continue;
+ pud_clear_bad(pud);
}
+ err = apply_to_pmd_range(mm, pud, addr, next,
+ fn, data, create, mask);
+ if (err)
+ break;
} while (pud++, addr = next, addr != end);
+
return err;
}
@@ -2485,13 +2501,21 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
}
do {
next = p4d_addr_end(addr, end);
- if (create || !p4d_none_or_clear_bad(p4d)) {
- err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
- create, mask);
- if (err)
- break;
+ if (p4d_none(*p4d) && !create)
+ continue;
+ if (WARN_ON_ONCE(p4d_leaf(*p4d)))
+ return -EINVAL;
+ if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) {
+ if (!create)
+ continue;
+ p4d_clear_bad(p4d);
}
+ err = apply_to_pud_range(mm, p4d, addr, next,
+ fn, data, create, mask);
+ if (err)
+ break;
} while (p4d++, addr = next, addr != end);
+
return err;
}
@@ -2511,9 +2535,17 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
- if (!create && pgd_none_or_clear_bad(pgd))
+ if (pgd_none(*pgd) && !create)
continue;
- err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask);
+ if (WARN_ON_ONCE(pgd_leaf(*pgd)))
+ return -EINVAL;
+ if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) {
+ if (!create)
+ continue;
+ pgd_clear_bad(pgd);
+ }
+ err = apply_to_p4d_range(mm, pgd, addr, next,
+ fn, data, create, &mask);
if (err)
break;
} while (pgd++, addr = next, addr != end);
--
2.23.0
^ permalink raw reply related
* [PATCH v8 06/12] powerpc: inline huge vmap supported functions
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Zefan Li, Jonathan Cameron, linuxppc-dev
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
This allows unsupported levels to be constant folded away, and so
p4d_free_pud_page can be removed because it's no longer linked to.
Cc: linuxppc-dev@lists.ozlabs.org
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/vmalloc.h | 19 ++++++++++++++++---
arch/powerpc/mm/book3s64/radix_pgtable.c | 21 ---------------------
2 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
index 105abb73f075..3f0c153befb0 100644
--- a/arch/powerpc/include/asm/vmalloc.h
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -1,12 +1,25 @@
#ifndef _ASM_POWERPC_VMALLOC_H
#define _ASM_POWERPC_VMALLOC_H
+#include <asm/mmu.h>
#include <asm/page.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-bool arch_vmap_p4d_supported(pgprot_t prot);
-bool arch_vmap_pud_supported(pgprot_t prot);
-bool arch_vmap_pmd_supported(pgprot_t prot);
+static inline bool arch_vmap_p4d_supported(pgprot_t prot)
+{
+ return false;
+}
+
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ /* HPT does not cope with large pages in the vmalloc area */
+ return radix_enabled();
+}
+
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return radix_enabled();
+}
#endif
#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index ab426fc0cd4b..de6b558dc187 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1121,22 +1121,6 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
}
-bool arch_vmap_pud_supported(pgprot_t prot)
-{
- /* HPT does not cope with large pages in the vmalloc area */
- return radix_enabled();
-}
-
-bool arch_vmap_pmd_supported(pgprot_t prot)
-{
- return radix_enabled();
-}
-
-int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
-{
- return 0;
-}
-
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
pte_t *ptep = (pte_t *)pud;
@@ -1220,8 +1204,3 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
return 1;
}
-
-bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
--
2.23.0
^ permalink raw reply related
* [PATCH v8 07/12] arm64: inline huge vmap supported functions
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Will Deacon, Catalin Marinas, linux-kernel,
Nicholas Piggin, Christoph Hellwig, Zefan Li, Jonathan Cameron,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
This allows unsupported levels to be constant folded away, and so
p4d_free_pud_page can be removed because it's no longer linked to.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/arm64/include/asm/vmalloc.h | 23 ++++++++++++++++++++---
arch/arm64/mm/mmu.c | 26 --------------------------
2 files changed, 20 insertions(+), 29 deletions(-)
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 597b40405319..fc9a12d6cc1a 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -4,9 +4,26 @@
#include <asm/page.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-bool arch_vmap_p4d_supported(pgprot_t prot);
-bool arch_vmap_pud_supported(pgprot_t prot);
-bool arch_vmap_pmd_supported(pgprot_t prot);
+static inline bool arch_vmap_p4d_supported(pgprot_t prot)
+{
+ return false;
+}
+
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ /*
+ * Only 4k granule supports level 1 block mappings.
+ * SW table walks can't handle removal of intermediate entries.
+ */
+ return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
+ !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
+}
+
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ /* See arch_vmap_pud_supported() */
+ return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
+}
#endif
#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1b60079c1cef..0af5b5cfb9c6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1315,27 +1315,6 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
return dt_virt;
}
-bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
-
-bool arch_vmap_pud_supported(pgprot_t prot);
-{
- /*
- * Only 4k granule supports level 1 block mappings.
- * SW table walks can't handle removal of intermediate entries.
- */
- return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
- !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
-}
-
-bool arch_vmap_pmd_supported(pgprot_t prot)
-{
- /* See arch_vmap_pud_supported() */
- return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
-}
-
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
@@ -1427,11 +1406,6 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
return 1;
}
-int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
-{
- return 0; /* Don't attempt a block mapping */
-}
-
#ifdef CONFIG_MEMORY_HOTPLUG
static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
{
--
2.23.0
^ permalink raw reply related
* [PATCH v8 03/12] mm/vmalloc: rename vmap_*_range vmap_pages_*_range
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Zefan Li, Jonathan Cameron, linuxppc-dev
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
The vmalloc mapper operates on a struct page * array rather than a
linear physical address, re-name it to make this distinction clear.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/vmalloc.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f85124e88bdb..42326dbffaf0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -189,7 +189,7 @@ void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
arch_sync_kernel_mappings(start, end);
}
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -217,7 +217,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
}
-static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -229,13 +229,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr,
return -ENOMEM;
do {
next = pmd_addr_end(addr, end);
- if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask))
return -ENOMEM;
} while (pmd++, addr = next, addr != end);
return 0;
}
-static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
+static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -247,13 +247,13 @@ static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
return -ENOMEM;
do {
next = pud_addr_end(addr, end);
- if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask))
return -ENOMEM;
} while (pud++, addr = next, addr != end);
return 0;
}
-static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
+static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -265,7 +265,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
- if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask))
return -ENOMEM;
} while (p4d++, addr = next, addr != end);
return 0;
@@ -306,7 +306,7 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size,
next = pgd_addr_end(addr, end);
if (pgd_bad(*pgd))
mask |= PGTBL_PGD_MODIFIED;
- err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
+ err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
if (err)
return err;
} while (pgd++, addr = next, addr != end);
--
2.23.0
^ permalink raw reply related
* [PATCH v8 01/12] mm/vmalloc: fix vmalloc_to_page for huge vmap mappings
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Zefan Li, Jonathan Cameron, linuxppc-dev
In-Reply-To: <20201128152559.999540-1-npiggin@gmail.com>
vmalloc_to_page returns NULL for addresses mapped by larger pages[*].
Whether or not a vmap is huge depends on the architecture details,
alignments, boot options, etc., which the caller can not be expected
to know. Therefore HUGE_VMAP is a regression for vmalloc_to_page.
This change teaches vmalloc_to_page about larger pages, and returns
the struct page that corresponds to the offset within the large page.
This makes the API agnostic to mapping implementation details.
[*] As explained by commit 029c54b095995 ("mm/vmalloc.c: huge-vmap:
fail gracefully on unexpected huge vmap mappings")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/vmalloc.c | 41 ++++++++++++++++++++++++++---------------
1 file changed, 26 insertions(+), 15 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6ae491a8b210..f85124e88bdb 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -34,7 +34,7 @@
#include <linux/bitops.h>
#include <linux/rbtree_augmented.h>
#include <linux/overflow.h>
-
+#include <linux/pgtable.h>
#include <linux/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
@@ -343,7 +343,9 @@ int is_vmalloc_or_module_addr(const void *x)
}
/*
- * Walk a vmap address to the struct page it maps.
+ * Walk a vmap address to the struct page it maps. Huge vmap mappings will
+ * return the tail page that corresponds to the base page address, which
+ * matches small vmap mappings.
*/
struct page *vmalloc_to_page(const void *vmalloc_addr)
{
@@ -363,25 +365,33 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
if (pgd_none(*pgd))
return NULL;
+ if (WARN_ON_ONCE(pgd_leaf(*pgd)))
+ return NULL; /* XXX: no allowance for huge pgd */
+ if (WARN_ON_ONCE(pgd_bad(*pgd)))
+ return NULL;
+
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d))
return NULL;
- pud = pud_offset(p4d, addr);
+ if (p4d_leaf(*p4d))
+ return p4d_page(*p4d) + ((addr & ~P4D_MASK) >> PAGE_SHIFT);
+ if (WARN_ON_ONCE(p4d_bad(*p4d)))
+ return NULL;
- /*
- * Don't dereference bad PUD or PMD (below) entries. This will also
- * identify huge mappings, which we may encounter on architectures
- * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
- * identified as vmalloc addresses by is_vmalloc_addr(), but are
- * not [unambiguously] associated with a struct page, so there is
- * no correct value to return for them.
- */
- WARN_ON_ONCE(pud_bad(*pud));
- if (pud_none(*pud) || pud_bad(*pud))
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ return NULL;
+ if (pud_leaf(*pud))
+ return pud_page(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ if (WARN_ON_ONCE(pud_bad(*pud)))
return NULL;
+
pmd = pmd_offset(pud, addr);
- WARN_ON_ONCE(pmd_bad(*pmd));
- if (pmd_none(*pmd) || pmd_bad(*pmd))
+ if (pmd_none(*pmd))
+ return NULL;
+ if (pmd_leaf(*pmd))
+ return pmd_page(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ if (WARN_ON_ONCE(pmd_bad(*pmd)))
return NULL;
ptep = pte_offset_map(pmd, addr);
@@ -389,6 +399,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
if (pte_present(pte))
page = pte_page(pte);
pte_unmap(ptep);
+
return page;
}
EXPORT_SYMBOL(vmalloc_to_page);
--
2.23.0
^ permalink raw reply related
* [PATCH v8 00/12] huge vmalloc mappings
From: Nicholas Piggin @ 2020-11-28 15:25 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Zefan Li, Jonathan Cameron, linuxppc-dev
Hi Andrew,
Please consider this for -mm.
Thanks,
Nick
Since v7:
- Rebase, added some acks, compile fix
- Removed "order=" from vmallocinfo, it's a bit confusing (nr_pages
is in small page size for compatibility).
- Added arch_vmap_pmd_supported() test before starting to allocate
the large page, rather than only testing it when doing the map, to
avoid unsupported configs trying to allocate huge pages for no
reason.
Since v6:
- Fixed a false positive warning introduced in patch 2, found by
kbuild test robot.
Since v5:
- Split arch changes out better and make the constant folding work
- Avoid most of the 80 column wrap, fix a reference to lib/ioremap.c
- Fix compile error on some archs
Since v4:
- Fixed an off-by-page-order bug in v4
- Several minor cleanups.
- Added page order to /proc/vmallocinfo
- Added hugepage to alloc_large_system_hage output.
- Made an architecture config option, powerpc only for now.
Since v3:
- Fixed an off-by-one bug in a loop
- Fix !CONFIG_HAVE_ARCH_HUGE_VMAP build fail
- Hopefully this time fix the arm64 vmap stack bug, thanks Jonathan
Cameron for debugging the cause of this (hopefully).
Since v2:
- Rebased on vmalloc cleanups, split series into simpler pieces.
- Fixed several compile errors and warnings
- Keep the page array and accounting in small page units because
struct vm_struct is an interface (this should fix x86 vmap stack debug
assert). [Thanks Zefan]
*** BLURB HERE ***
Nicholas Piggin (12):
mm/vmalloc: fix vmalloc_to_page for huge vmap mappings
mm: apply_to_pte_range warn and fail if a large pte is encountered
mm/vmalloc: rename vmap_*_range vmap_pages_*_range
mm/ioremap: rename ioremap_*_range to vmap_*_range
mm: HUGE_VMAP arch support cleanup
powerpc: inline huge vmap supported functions
arm64: inline huge vmap supported functions
x86: inline huge vmap supported functions
mm: Move vmap_range from mm/ioremap.c to mm/vmalloc.c
mm/vmalloc: add vmap_range_noflush variant
mm/vmalloc: Hugepage vmalloc mappings
powerpc/64s/radix: Enable huge vmalloc mappings
.../admin-guide/kernel-parameters.txt | 2 +
arch/Kconfig | 4 +
arch/arm64/include/asm/vmalloc.h | 25 +
arch/arm64/mm/mmu.c | 26 -
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/vmalloc.h | 21 +
arch/powerpc/mm/book3s64/radix_pgtable.c | 21 -
arch/x86/include/asm/vmalloc.h | 23 +
arch/x86/mm/ioremap.c | 19 -
arch/x86/mm/pgtable.c | 13 -
include/linux/io.h | 9 -
include/linux/vmalloc.h | 10 +
init/main.c | 1 -
mm/ioremap.c | 225 +--------
mm/memory.c | 66 ++-
mm/page_alloc.c | 5 +-
mm/vmalloc.c | 453 +++++++++++++++---
17 files changed, 529 insertions(+), 395 deletions(-)
--
2.23.0
^ permalink raw reply
* [PATCH v3 19/19] powerpc/64s: power4 nap fixup in C
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 15 +++++++++
arch/powerpc/include/asm/processor.h | 1 +
arch/powerpc/include/asm/thread_info.h | 6 ++++
arch/powerpc/kernel/exceptions-64s.S | 45 --------------------------
arch/powerpc/kernel/idle_book3s.S | 4 +++
5 files changed, 26 insertions(+), 45 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 655fb668e2da..29afc7443d0f 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -8,6 +8,16 @@
#include <asm/ftrace.h>
#include <asm/runlatch.h>
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+ if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+ clear_thread_local_flags(_TLF_NAPPING);
+ regs->nip = (unsigned long)power4_idle_nap_return;
+ }
+#endif
+}
+
struct interrupt_state {
#ifdef CONFIG_PPC_BOOK3E_64
enum ctx_state ctx_state;
@@ -99,6 +109,9 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int
{
irq_exit();
interrupt_exit_prepare(regs, state);
+
+ /* Adjust at exit so the main handler sees the true NIA */
+ nap_adjust_return(regs);
}
struct interrupt_nmi_state {
@@ -150,6 +163,8 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
radix_enabled() || (mfmsr() & MSR_DR))
nmi_exit();
+ nap_adjust_return(regs);
+
#ifdef CONFIG_PPC64
this_cpu_set_ftrace_enabled(state->ftrace_enabled);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index c61c859b51a8..d845850f75e2 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -418,6 +418,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
#ifdef CONFIG_PPC_970_NAP
extern void power4_idle_nap(void);
+void power4_idle_nap_return(void);
#endif
extern unsigned long cpuidle_disable;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index c9443c16e5fb..e7ee220b88ea 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -151,6 +151,12 @@ void arch_setup_new_exec(void);
#ifndef __ASSEMBLY__
+static inline void clear_thread_local_flags(unsigned int flags)
+{
+ struct thread_info *ti = current_thread_info();
+ ti->local_flags &= ~flags;
+}
+
static inline bool test_thread_local_flags(unsigned int flags)
{
struct thread_info *ti = current_thread_info();
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e75849a60578..869ad412517b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r1,GPR1(r1)
.endm
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP \
-BEGIN_FTR_SECTION \
- ld r11, PACA_THREAD_INFO(r13); \
- ld r9,TI_LOCAL_FLAGS(r11); \
- andi. r10,r9,_TLF_NAPPING; \
- bnel power4_fixup_nap; \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
/*
* There are a few constraints to be concerned with.
* - Real mode exceptions code/data must be located at their physical location.
@@ -1248,7 +1229,6 @@ EXC_COMMON_BEGIN(machine_check_common)
*/
GEN_COMMON machine_check
- FINISH_NAP
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
@@ -1573,7 +1553,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -1758,7 +1737,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
EXC_VIRT_END(decrementer, 0x4900, 0x80)
EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl timer_interrupt
b interrupt_return
@@ -1843,7 +1821,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
EXC_COMMON_BEGIN(doorbell_super_common)
GEN_COMMON doorbell_super
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2197,7 +2174,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b interrupt_return
@@ -2226,7 +2202,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
EXC_COMMON_BEGIN(h_doorbell_common)
GEN_COMMON h_doorbell
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2259,7 +2234,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -2305,7 +2279,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
- FINISH_NAP
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
b interrupt_return
@@ -3037,24 +3010,6 @@ USE_FIXED_SECTION(virt_trampolines)
__end_interrupts:
DEFINE_FIXED_SYMBOL(__end_interrupts)
-#ifdef CONFIG_PPC_970_NAP
- /*
- * Called by exception entry code if _TLF_NAPPING was set, this clears
- * the NAPPING flag, and redirects the exception exit to
- * power4_fixup_nap_return.
- */
- .globl power4_fixup_nap
-EXC_COMMON_BEGIN(power4_fixup_nap)
- andc r9,r9,r10
- std r9,TI_LOCAL_FLAGS(r11)
- LOAD_REG_ADDR(r10, power4_idle_nap_return)
- std r10,_NIP(r1)
- blr
-
-power4_idle_nap_return:
- blr
-#endif
-
CLOSE_FIXED_SECTION(real_vectors);
CLOSE_FIXED_SECTION(real_trampolines);
CLOSE_FIXED_SECTION(virt_vectors);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 22f249b6f58d..27d2e6a72ec9 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -201,4 +201,8 @@ _GLOBAL(power4_idle_nap)
mtmsrd r7
isync
b 1b
+
+ .globl power4_idle_nap_return
+power4_idle_nap_return:
+ blr
#endif
--
2.23.0
^ permalink raw reply related
* [PATCH v3 18/19] powerpc/64s: runlatch interrupt handling in C
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 7 +++++++
arch/powerpc/kernel/exceptions-64s.S | 18 ------------------
2 files changed, 7 insertions(+), 18 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 5da7b0de545e..655fb668e2da 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -6,6 +6,7 @@
#include <linux/hardirq.h>
#include <asm/cputime.h>
#include <asm/ftrace.h>
+#include <asm/runlatch.h>
struct interrupt_state {
#ifdef CONFIG_PPC_BOOK3E_64
@@ -84,6 +85,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_CTRL) &&
+ !test_thread_local_flags(_TLF_RUNLATCH))
+ __ppc64_runlatch_on();
+#endif
+
interrupt_enter_prepare(regs, state);
irq_enter();
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index b6d914db01da..e75849a60578 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -692,14 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r1,GPR1(r1)
.endm
-#define RUNLATCH_ON \
-BEGIN_FTR_SECTION \
- ld r3, PACA_THREAD_INFO(r13); \
- ld r4,TI_LOCAL_FLAGS(r3); \
- andi. r0,r4,_TLF_RUNLATCH; \
- beql ppc64_runlatch_on_trampoline; \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
/*
* When the idle code in power4_idle puts the CPU into NAP mode,
* it has to do so in a loop, and relies on the external interrupt
@@ -1582,7 +1574,6 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -1768,7 +1759,6 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80)
EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl timer_interrupt
b interrupt_return
@@ -1854,7 +1844,6 @@ EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
EXC_COMMON_BEGIN(doorbell_super_common)
GEN_COMMON doorbell_super
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2209,7 +2198,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b interrupt_return
@@ -2239,7 +2227,6 @@ EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
EXC_COMMON_BEGIN(h_doorbell_common)
GEN_COMMON h_doorbell
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
@@ -2273,7 +2260,6 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
b interrupt_return
@@ -2320,7 +2306,6 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
FINISH_NAP
- RUNLATCH_ON
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
b interrupt_return
@@ -3035,9 +3020,6 @@ kvmppc_skip_Hinterrupt:
* come here.
*/
-EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
- b __ppc64_runlatch_on
-
USE_FIXED_SECTION(virt_trampolines)
/*
* All code below __end_interrupts is treated as soft-masked. If
--
2.23.0
^ permalink raw reply related
* [PATCH v3 17/19] powerpc/64s: move NMI soft-mask handling to C
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
Saving and restoring soft-mask state can now be done in C using the
interrupt handler wrapper functions.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 26 ++++++++++++
arch/powerpc/kernel/exceptions-64s.S | 60 ----------------------------
2 files changed, 26 insertions(+), 60 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 8508b99d3455..5da7b0de545e 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -96,6 +96,10 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int
struct interrupt_nmi_state {
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
+ u8 irq_soft_mask;
+ u8 irq_happened;
+#endif
u8 ftrace_enabled;
#endif
};
@@ -103,6 +107,21 @@ struct interrupt_nmi_state {
static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
{
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
+ state->irq_soft_mask = local_paca->irq_soft_mask;
+ state->irq_happened = local_paca->irq_happened;
+
+ /*
+ * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
+ * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile
+ * because that goes through irq tracing which we don't want in NMI.
+ */
+ local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /* Don't do any per-CPU operations until interrupt state is fixed */
+ state->ftrace_enabled = this_cpu_get_ftrace_enabled();
+#endif
state->ftrace_enabled = this_cpu_get_ftrace_enabled();
this_cpu_set_ftrace_enabled(0);
#endif
@@ -126,6 +145,13 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
#ifdef CONFIG_PPC64
this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Check we didn't change the pending interrupt mask. */
+ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
+ local_paca->irq_happened = state->irq_happened;
+ local_paca->irq_soft_mask = state->irq_soft_mask;
+#endif
#endif
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2b447dd15d4c..b6d914db01da 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1008,20 +1008,6 @@ EXC_COMMON_BEGIN(system_reset_common)
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY system_reset
- /*
- * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
- * the right thing. We do not want to reconcile because that goes
- * through irq tracing which we don't want in NMI.
- *
- * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS
- * as we are running with MSR[EE]=0.
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,RESULT(r1)
- ori r10,r10,PACA_IRQ_HARD_DIS
- stb r10,PACAIRQHAPPENED(r13)
addi r3,r1,STACK_FRAME_OVERHEAD
bl system_reset_exception
@@ -1037,14 +1023,6 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- /*
- * Restore soft mask settings.
- */
- ld r10,RESULT(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
kuap_restore_amr r9, r10
EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
@@ -1190,30 +1168,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
li r10,MSR_RI
mtmsrd r10,1
- /*
- * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
- * system_reset_common)
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,RESULT(r1)
- ori r10,r10,PACA_IRQ_HARD_DIS
- stb r10,PACAIRQHAPPENED(r13)
-
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
- /*
- * Restore soft mask settings.
- */
- ld r10,RESULT(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
#ifdef CONFIG_PPC_P7_NAP
/*
* Check if thread was in power saving mode. We come here when any
@@ -2815,17 +2774,6 @@ EXC_COMMON_BEGIN(soft_nmi_common)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY soft_nmi
- /*
- * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
- * system_reset_common)
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,RESULT(r1)
- ori r10,r10,PACA_IRQ_HARD_DIS
- stb r10,PACAIRQHAPPENED(r13)
-
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
@@ -2833,14 +2781,6 @@ EXC_COMMON_BEGIN(soft_nmi_common)
li r9,0
mtmsrd r9,1
- /*
- * Restore soft mask settings.
- */
- ld r10,RESULT(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
kuap_restore_amr r9, r10
EXCEPTION_RESTORE_REGS hsrr=0
RFI_TO_KERNEL
--
2.23.0
^ permalink raw reply related
* [PATCH v3 16/19] powerpc: move NMI entry/exit code into wrapper
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
This moves the common NMI entry and exit code into the interrupt handler
wrappers.
This changes the behaviour of soft-NMI (watchdog) and HMI interrupts, and
also MCE interrupts on 64e, by adding missing parts of the NMI entry to
them. It fixes a bug with sreset on pseries HPT guests which shouldn't
call nmi_enter().
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 24 ++++++++++++++++
arch/powerpc/kernel/mce.c | 11 --------
arch/powerpc/kernel/traps.c | 42 +++++-----------------------
arch/powerpc/kernel/watchdog.c | 10 +++----
4 files changed, 35 insertions(+), 52 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 26aa897d251b..8508b99d3455 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -95,14 +95,38 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int
}
struct interrupt_nmi_state {
+#ifdef CONFIG_PPC64
+ u8 ftrace_enabled;
+#endif
};
static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
{
+#ifdef CONFIG_PPC64
+ state->ftrace_enabled = this_cpu_get_ftrace_enabled();
+ this_cpu_set_ftrace_enabled(0);
+#endif
+
+ /*
+ * Do not use nmi_enter() for pseries hash guest taking a real-mode
+ * NMI because not everything it touches is within the RMA limit.
+ */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) ||
+ !firmware_has_feature(FW_FEATURE_LPAR) ||
+ radix_enabled() || (mfmsr() & MSR_DR))
+ nmi_enter();
}
static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
{
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) ||
+ !firmware_has_feature(FW_FEATURE_LPAR) ||
+ radix_enabled() || (mfmsr() & MSR_DR))
+ nmi_exit();
+
+#ifdef CONFIG_PPC64
+ this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+#endif
}
/**
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b84459f45b1a..9f39deed4fca 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -592,12 +592,6 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
{
long handled = 0;
- u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
-
- this_cpu_set_ftrace_enabled(0);
- /* Do not use nmi_enter/exit for pseries hpte guest */
- if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
- nmi_enter();
hv_nmi_check_nonrecoverable(regs);
@@ -607,11 +601,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
- if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
- nmi_exit();
-
- this_cpu_set_ftrace_enabled(ftrace_enabled);
-
return handled;
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 06b638a7f0b0..1b881ef5f07b 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -441,11 +441,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
{
unsigned long hsrr0, hsrr1;
bool saved_hsrrs = false;
- u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
-
- this_cpu_set_ftrace_enabled(0);
-
- nmi_enter();
/*
* System reset can interrupt code where HSRRs are live and MSR[RI]=1.
@@ -517,10 +512,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
mtspr(SPRN_HSRR1, hsrr1);
}
- nmi_exit();
-
- this_cpu_set_ftrace_enabled(ftrace_enabled);
-
/* What should we do here? We could issue a shutdown or hard reset. */
return 0;
@@ -823,6 +814,12 @@ int machine_check_generic(struct pt_regs *regs)
#endif /* everything else */
+/*
+ * BOOK3S_64 does not call this handler as a non-maskable interrupt
+ * (it uses its own early real-mode handler to handle the MCE proper
+ * and then raises irq_work to call this handler when interrupts are
+ * enabled).
+ */
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
#else
@@ -831,20 +828,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
{
int recover = 0;
- /*
- * BOOK3S_64 does not call this handler as a non-maskable interrupt
- * (it uses its own early real-mode handler to handle the MCE proper
- * and then raises irq_work to call this handler when interrupts are
- * enabled).
- *
- * This is silly. The BOOK3S_64 should just call a different function
- * rather than expecting semantics to magically change. Something
- * like 'non_nmi_machine_check_exception()', perhaps?
- */
- const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64);
-
- if (nmi) nmi_enter();
-
__this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -869,24 +852,17 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
if (check_io_access(regs))
goto bail;
- if (nmi) nmi_exit();
-
die("Machine check", regs, SIGBUS);
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
die("Unrecoverable Machine check", regs, SIGBUS);
-#ifdef CONFIG_PPC_BOOK3S_64
bail:
+#ifdef CONFIG_PPC_BOOK3S_64
return;
#else
return 0;
-
-bail:
- if (nmi) nmi_exit();
-
- return 0;
#endif
}
NOKPROBE_SYMBOL(machine_check_exception);
@@ -1902,14 +1878,10 @@ DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
#ifdef CONFIG_PPC64
DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
{
- nmi_enter();
-
__this_cpu_inc(irq_stat.pmu_irqs);
perf_irq(regs);
- nmi_exit();
-
return 0;
}
#endif
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 824b9376ac35..dc39534836a3 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -254,11 +254,12 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
int cpu = raw_smp_processor_id();
u64 tb;
+ /* should only arrive from kernel, with irqs disabled */
+ WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
+
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0;
- nmi_enter();
-
__this_cpu_inc(irq_stat.soft_nmi_irqs);
tb = get_tb();
@@ -266,7 +267,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
wd_smp_lock(&flags);
if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
wd_smp_unlock(&flags);
- goto out;
+ return 0;
}
set_cpu_stuck(cpu, tb);
@@ -290,9 +291,6 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
if (wd_panic_timeout_tb < 0x7fffffff)
mtspr(SPRN_DEC, wd_panic_timeout_tb);
-out:
- nmi_exit();
-
return 0;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v3 15/19] powerpc/64: entry cpu time accounting in C
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 7 +++++++
arch/powerpc/include/asm/ppc_asm.h | 24 ------------------------
arch/powerpc/kernel/exceptions-64e.S | 1 -
arch/powerpc/kernel/exceptions-64s.S | 5 -----
4 files changed, 7 insertions(+), 30 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index c31fa26b3f60..26aa897d251b 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -4,6 +4,7 @@
#include <linux/context_tracking.h>
#include <linux/hardirq.h>
+#include <asm/cputime.h>
#include <asm/ftrace.h>
struct interrupt_state {
@@ -25,6 +26,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
if (user_mode(regs)) {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
+
+ account_cpu_user_entry();
+ account_stolen_time();
} else {
/*
* CT_WARN_ON comes here via program_check_exception,
@@ -38,6 +42,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
#ifdef CONFIG_PPC_BOOK3E_64
state->ctx_state = exception_enter();
#endif
+
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) && user_mode(regs))
+ account_cpu_user_entry();
}
/*
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 511786f0e40d..c1199f6c75a3 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -25,7 +25,6 @@
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)
#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)
-#define ACCOUNT_STOLEN_TIME
#else
#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \
MFTB(ra); /* get timebase */ \
@@ -44,29 +43,6 @@
PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \
add ra,ra,rb; /* add on to system time */ \
PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr)
-
-#ifdef CONFIG_PPC_SPLPAR
-#define ACCOUNT_STOLEN_TIME \
-BEGIN_FW_FTR_SECTION; \
- beq 33f; \
- /* from user - see if there are any DTL entries to process */ \
- ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \
- ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \
- addi r10,r10,LPPACA_DTLIDX; \
- LDX_BE r10,0,r10; /* get log write index */ \
- cmpd cr1,r11,r10; \
- beq+ cr1,33f; \
- bl accumulate_stolen_time; \
- ld r12,_MSR(r1); \
- andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \
-33: \
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-
-#else /* CONFIG_PPC_SPLPAR */
-#define ACCOUNT_STOLEN_TIME
-
-#endif /* CONFIG_PPC_SPLPAR */
-
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index dc728bb1c89a..e6b5a362fc91 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -398,7 +398,6 @@ exc_##n##_common: \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \
2: ld r3,excf+EX_R10(r13); /* get back r10 */ \
ld r4,excf+EX_R11(r13); /* get back r11 */ \
mfspr r5,scratch; /* get back r13 */ \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 647b7743bc9c..2b447dd15d4c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -577,7 +577,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
kuap_save_amr_and_lock r9, r10, cr1, cr0
.endif
beq 101f /* if from kernel mode */
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
BEGIN_FTR_SECTION
ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
std r9,_PPR(r1)
@@ -645,10 +644,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r11,exception_marker@toc(r2)
std r10,RESULT(r1) /* clear regs->result */
std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
-
- .if ISTACK
- ACCOUNT_STOLEN_TIME
- .endif
.endm
/*
--
2.23.0
^ permalink raw reply related
* [PATCH v3 14/19] powerpc/64: move account_stolen_time into its own function
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
This will be used by interrupt entry as well.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/cputime.h | 15 +++++++++++++++
arch/powerpc/kernel/syscall_64.c | 10 +---------
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index ed75d1c318e3..3f61604e1fcf 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -87,6 +87,18 @@ static notrace inline void account_cpu_user_exit(void)
acct->starttime_user = tb;
}
+static notrace inline void account_stolen_time(void)
+{
+#ifdef CONFIG_PPC_SPLPAR
+ if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct lppaca *lp = local_paca->lppaca_ptr;
+
+ if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
+ accumulate_stolen_time();
+ }
+#endif
+}
#endif /* __KERNEL__ */
#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
@@ -96,5 +108,8 @@ static inline void account_cpu_user_entry(void)
static inline void account_cpu_user_exit(void)
{
}
+static notrace inline void account_stolen_time(void)
+{
+}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 668a4557fc3f..8ab5e16d640c 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -45,15 +45,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
account_cpu_user_entry();
-#ifdef CONFIG_PPC_SPLPAR
- if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
- firmware_has_feature(FW_FEATURE_SPLPAR)) {
- struct lppaca *lp = local_paca->lppaca_ptr;
-
- if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
- accumulate_stolen_time();
- }
-#endif
+ account_stolen_time();
/*
* This is not required for the syscall exit path, but makes the
--
2.23.0
^ permalink raw reply related
* [PATCH v3 12/19] powerpc/64s: move context tracking exit to interrupt exit path
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
The interrupt handler wrapper functions are not the ideal place to
maintain context tracking because after they return, the low level exit
code must then determine if there are interrupts to replay, or if the
task should be preempted, etc. Those paths (e.g., schedule_user) include
their own exception_enter/exit pairs to fix this up but it's a bit hacky
(see schedule_user() comments).
Ideally context tracking will go to user mode only when there are no
more interrupts or context switches or other exit processing work to
handle.
64e can not do this because it does not use the C interrupt exit code.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 34 +++++++++++++++++++++++++---
arch/powerpc/kernel/syscall_64.c | 9 ++++++++
2 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 21acf0d0154f..542ad8e9c313 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -7,16 +7,30 @@
#include <asm/ftrace.h>
struct interrupt_state {
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
enum ctx_state ctx_state;
#endif
};
static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
state->ctx_state = exception_enter();
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (user_mode(regs)) {
+ CT_WARN_ON(ct_state() != CONTEXT_USER);
+ user_exit_irqoff();
+ } else {
+ /*
+ * CT_WARN_ON comes here via program_check_exception,
+ * so avoid recursion.
+ */
+ if (TRAP(regs) != 0x700)
+ CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+ }
+#endif
}
/*
@@ -35,9 +49,23 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
*/
static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
exception_exit(state->ctx_state);
#endif
+
+ /*
+ * Book3S exits to user via interrupt_exit_user_prepare(), which does
+ * context tracking, which is a cleaner way to handle PREEMPT=y
+ * and avoid context entry/exit in e.g., preempt_schedule_irq()),
+ * which is likely to be where the core code wants to end up.
+ *
+ * The above comment explains why we can't do the
+ *
+ * if (user_mode(regs))
+ * user_exit_irqoff();
+ *
+ * sequence here.
+ */
}
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 9e855b25bd15..668a4557fc3f 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -275,6 +275,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
BUG_ON(!(regs->msr & MSR_PR));
BUG_ON(!FULL_REGS(regs));
BUG_ON(regs->softe != IRQS_ENABLED);
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
@@ -317,7 +318,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
}
}
+ user_enter_irqoff();
if (unlikely(!prep_irq_for_enabled_exit(true))) {
+ user_exit_irqoff();
local_irq_enable();
local_irq_disable();
goto again;
@@ -358,6 +361,12 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
unrecoverable_exception(regs);
BUG_ON(regs->msr & MSR_PR);
BUG_ON(!FULL_REGS(regs));
+ /*
+ * CT_WARN_ON comes here via program_check_exception,
+ * so avoid recursion.
+ */
+ if (TRAP(regs) != 0x700)
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
amr = kuap_get_and_check_amr();
--
2.23.0
^ permalink raw reply related
* [PATCH v3 11/19] powerpc: handle irq_enter/irq_exit in interrupt handler wrappers
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
Move irq_enter/irq_exit into asynchronous interrupt handler wrappers.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 2 ++
arch/powerpc/kernel/dbell.c | 3 +--
arch/powerpc/kernel/irq.c | 4 ----
arch/powerpc/kernel/tau_6xx.c | 3 ---
arch/powerpc/kernel/time.c | 4 ++--
arch/powerpc/kernel/traps.c | 6 ------
6 files changed, 5 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 8f7db1c8cbe0..21acf0d0154f 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -43,10 +43,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
interrupt_enter_prepare(regs, state);
+ irq_enter();
}
static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+ irq_exit();
interrupt_exit_prepare(regs, state);
}
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index c0f99f8ffa7d..84ee9c511459 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -22,7 +22,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
#ifdef CONFIG_SMP
struct pt_regs *old_regs = set_irq_regs(regs);
- irq_enter();
trace_doorbell_entry(regs);
ppc_msgsync();
@@ -35,7 +34,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
smp_ipi_demux_relaxed(); /* already performed the barrier */
trace_doorbell_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
#else /* CONFIG_SMP */
printk(KERN_WARNING "Received doorbell on non-smp system\n");
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 2055d204d08e..681abb7c0507 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -641,8 +641,6 @@ void __do_irq(struct pt_regs *regs)
{
unsigned int irq;
- irq_enter();
-
trace_irq_entry(regs);
/*
@@ -662,8 +660,6 @@ void __do_irq(struct pt_regs *regs)
generic_handle_irq(irq);
trace_irq_exit(regs);
-
- irq_exit();
}
DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 46b2e5de4ef5..d864f07bab74 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -104,12 +104,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException)
{
int cpu = smp_processor_id();
- irq_enter();
tau[cpu].interrupts++;
TAUupdate(cpu);
-
- irq_exit();
}
#endif /* CONFIG_TAU_INT */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0089df57bb49..7dd0ab55e700 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -585,7 +585,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
#endif
old_regs = set_irq_regs(regs);
- irq_enter();
+
trace_timer_interrupt_entry(regs);
if (test_irq_work_pending()) {
@@ -610,7 +610,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
}
trace_timer_interrupt_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
}
EXPORT_SYMBOL(timer_interrupt);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f40b45086b93..06b638a7f0b0 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1082,7 +1082,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
struct pt_regs *old_regs;
old_regs = set_irq_regs(regs);
- irq_enter();
#ifdef CONFIG_VSX
/* Real mode flagged P9 special emu is needed */
@@ -1102,7 +1101,6 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
if (ppc_md.handle_hmi_exception)
ppc_md.handle_hmi_exception(regs);
- irq_exit();
set_irq_regs(old_regs);
}
@@ -1918,13 +1916,9 @@ DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
{
- irq_enter();
-
__this_cpu_inc(irq_stat.pmu_irqs);
perf_irq(regs);
-
- irq_exit();
}
DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
--
2.23.0
^ permalink raw reply related
* [PATCH v3 13/19] powerpc/64s: reconcile interrupts in C
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
There is no need for this to be in asm, use the new intrrupt entry wrapper.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 15 +++++++++++----
arch/powerpc/kernel/exceptions-64s.S | 26 --------------------------
2 files changed, 11 insertions(+), 30 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 542ad8e9c313..c31fa26b3f60 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -14,11 +14,14 @@ struct interrupt_state {
static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
-#ifdef CONFIG_PPC_BOOK3E_64
- state->ctx_state = exception_enter();
-#endif
-
+ /*
+ * Book3E reconciles irq soft mask in asm
+ */
#ifdef CONFIG_PPC_BOOK3S_64
+ if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
if (user_mode(regs)) {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
@@ -31,6 +34,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
}
#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+ state->ctx_state = exception_enter();
+#endif
}
/*
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f6ec455db13c..647b7743bc9c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -139,7 +139,6 @@ name:
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
#define __ISTACK(name) .L_ISTACK_ ## name
-#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */
#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
#define INT_DEFINE_BEGIN(n) \
@@ -203,9 +202,6 @@ do_define_int n
.ifndef ISTACK
ISTACK=1
.endif
- .ifndef IRECONCILE
- IRECONCILE=1
- .endif
.ifndef IKUAP
IKUAP=1
.endif
@@ -653,10 +649,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.if ISTACK
ACCOUNT_STOLEN_TIME
.endif
-
- .if IRECONCILE
- RECONCILE_IRQ_STATE(r10, r11)
- .endif
.endm
/*
@@ -935,7 +927,6 @@ INT_DEFINE_BEGIN(system_reset)
*/
ISET_RI=0
ISTACK=0
- IRECONCILE=0
IKVM_REAL=1
INT_DEFINE_END(system_reset)
@@ -1123,7 +1114,6 @@ INT_DEFINE_BEGIN(machine_check_early)
ISTACK=0
IDAR=1
IDSISR=1
- IRECONCILE=0
IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
INT_DEFINE_END(machine_check_early)
@@ -1476,7 +1466,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IAREA=PACA_EXSLB
- IRECONCILE=0
IDAR=1
IKVM_SKIP=1
IKVM_REAL=1
@@ -1503,7 +1492,6 @@ MMU_FTR_SECTION_ELSE
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b interrupt_return
@@ -1565,7 +1553,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(instruction_access_slb)
IVEC=0x480
IAREA=PACA_EXSLB
- IRECONCILE=0
IISIDE=1
IDAR=1
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -1594,7 +1581,6 @@ MMU_FTR_SECTION_ELSE
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b interrupt_return
@@ -1754,7 +1740,6 @@ EXC_COMMON_BEGIN(program_check_common)
*/
INT_DEFINE_BEGIN(fp_unavailable)
IVEC=0x800
- IRECONCILE=0
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
@@ -1769,7 +1754,6 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
EXC_COMMON_BEGIN(fp_unavailable_common)
GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
0: trap
@@ -1788,7 +1772,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl fp_unavailable_tm
b interrupt_return
@@ -1853,7 +1836,6 @@ INT_DEFINE_BEGIN(hdecrementer)
IVEC=0x980
IHSRR=1
ISTACK=0
- IRECONCILE=0
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(hdecrementer)
@@ -2227,7 +2209,6 @@ INT_DEFINE_BEGIN(hmi_exception_early)
IHSRR=1
IREALMODE_COMMON=1
ISTACK=0
- IRECONCILE=0
IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
IKVM_REAL=1
INT_DEFINE_END(hmi_exception_early)
@@ -2401,7 +2382,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
*/
INT_DEFINE_BEGIN(altivec_unavailable)
IVEC=0xf20
- IRECONCILE=0
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
@@ -2431,7 +2411,6 @@ BEGIN_FTR_SECTION
b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_tm
b interrupt_return
@@ -2439,7 +2418,6 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
b interrupt_return
@@ -2455,7 +2433,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
*/
INT_DEFINE_BEGIN(vsx_unavailable)
IVEC=0xf40
- IRECONCILE=0
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
@@ -2484,7 +2461,6 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_tm
b interrupt_return
@@ -2492,7 +2468,6 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_exception
b interrupt_return
@@ -2827,7 +2802,6 @@ EXC_VIRT_NONE(0x5800, 0x100)
INT_DEFINE_BEGIN(soft_nmi)
IVEC=0x900
ISTACK=0
- IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */
INT_DEFINE_END(soft_nmi)
/*
--
2.23.0
^ permalink raw reply related
* [PATCH v3 10/19] powerpc/64: add context tracking to asynchronous interrupts
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
Previously context tracking was not done for asynchronous interrupts,
(those that run in interrupt context), and if those would cause a
reschedule when they exit, then scheduling functions (schedule_user,
preempt_schedule_irq) call exception_enter/exit to fix this up and
exit user context.
This is a hack we would like to get away from, so do context tracking
for asynchronous interrupts too.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index e2410f54c6e9..8f7db1c8cbe0 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -42,10 +42,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+ interrupt_enter_prepare(regs, state);
}
static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+ interrupt_exit_prepare(regs, state);
}
struct interrupt_nmi_state {
--
2.23.0
^ permalink raw reply related
* [PATCH v3 09/19] powerpc/64: context tracking move to interrupt wrappers
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
This moves exception_enter/exit calls to wrapper functions for
synchronous interrupts. More interrupt handlers are covered by
this than previously.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 9 ++++
arch/powerpc/kernel/traps.c | 74 ++++++---------------------
arch/powerpc/mm/book3s64/hash_utils.c | 2 -
arch/powerpc/mm/fault.c | 3 --
4 files changed, 26 insertions(+), 62 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 2ab0675c27aa..e2410f54c6e9 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -7,10 +7,16 @@
#include <asm/ftrace.h>
struct interrupt_state {
+#ifdef CONFIG_PPC64
+ enum ctx_state ctx_state;
+#endif
};
static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+#ifdef CONFIG_PPC64
+ state->ctx_state = exception_enter();
+#endif
}
/*
@@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
*/
static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
+#ifdef CONFIG_PPC64
+ exception_exit(state->ctx_state);
+#endif
}
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 8a6cb2838ce6..f40b45086b93 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1108,41 +1108,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
DEFINE_INTERRUPT_HANDLER(unknown_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
_exception(SIGTRAP, regs, TRAP_UNK, 0);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
_exception(SIGTRAP, regs, TRAP_UNK, 0);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
{
- enum ctx_state prev_state = exception_enter();
-
if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_iabr_match(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(RunModeException)
@@ -1152,8 +1139,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException)
DEFINE_INTERRUPT_HANDLER(single_step_exception)
{
- enum ctx_state prev_state = exception_enter();
-
clear_single_step(regs);
clear_br_trace(regs);
@@ -1162,14 +1147,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_sstep(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
NOKPROBE_SYMBOL(single_step_exception);
@@ -1495,7 +1477,6 @@ static inline int emulate_math(struct pt_regs *regs) { return -1; }
DEFINE_INTERRUPT_HANDLER(program_check_exception)
{
- enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
/* We can now get here via a FP Unavailable exception if the core
@@ -1504,22 +1485,22 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
if (reason & REASON_FP) {
/* IEEE FP exception */
parse_fpe(regs);
- goto bail;
+ return;
}
if (reason & REASON_TRAP) {
unsigned long bugaddr;
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
- goto bail;
+ return;
if (kprobe_handler(regs))
- goto bail;
+ return;
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
- goto bail;
+ return;
bugaddr = regs->nip;
/*
@@ -1531,10 +1512,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
- goto bail;
+ return;
}
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
- goto bail;
+ return;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (reason & REASON_TM) {
@@ -1555,7 +1536,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
*/
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
- goto bail;
+ return;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
"at %lx (msr 0x%lx) tm_scratch=%llx\n",
@@ -1586,7 +1567,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
* pattern to occurrences etc. -dgibson 31/Mar/2003
*/
if (!emulate_math(regs))
- goto bail;
+ return;
/* Try to emulate it if we should. */
if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
@@ -1594,10 +1575,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
case 0:
regs->nip += 4;
emulate_single_step(regs);
- goto bail;
+ return;
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- goto bail;
+ return;
}
}
@@ -1606,9 +1587,6 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
NOKPROBE_SYMBOL(program_check_exception);
@@ -1625,14 +1603,12 @@ NOKPROBE_SYMBOL(emulation_assist_interrupt);
DEFINE_INTERRUPT_HANDLER(alignment_exception)
{
- enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
unsigned long reason;
interrupt_cond_local_irq_enable(regs);
reason = get_reason(regs);
-
if (reason & REASON_BOUNDARY) {
sig = SIGBUS;
code = BUS_ADRALN;
@@ -1640,7 +1616,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
}
if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
- goto bail;
+ return;
/* we don't implement logging of alignment exceptions */
if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
@@ -1650,7 +1626,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
/* skip over emulated instruction */
regs->nip += inst_length(reason);
emulate_single_step(regs);
- goto bail;
+ return;
}
/* Operand address was bad */
@@ -1666,9 +1642,6 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
_exception(sig, regs, code, regs->dar);
else
bad_page_fault(regs, sig);
-
-bail:
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(StackOverflow)
@@ -1682,41 +1655,28 @@ DEFINE_INTERRUPT_HANDLER(StackOverflow)
DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
{
- enum ctx_state prev_state = exception_enter();
-
die("Kernel stack overflow", regs, SIGSEGV);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
-
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
if (user_mode(regs)) {
/* A user program has executed an altivec instruction,
but this kernel doesn't support altivec. */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- goto bail;
+ return;
}
printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
-
-bail:
- exception_exit(prev_state);
}
DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index a48c484b9e9b..39dcfaf7ba36 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1289,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
unsigned long flags)
{
bool is_thp;
- enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
unsigned long vsid;
pte_t *ptep;
@@ -1489,7 +1488,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
DBG_LOW(" -> rc=%d\n", rc);
bail:
- exception_exit(prev_state);
return rc;
}
EXPORT_SYMBOL_GPL(hash_page_mm);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 4896f44c8374..2778fe0a41d5 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -550,7 +550,6 @@ NOKPROBE_SYMBOL(__do_page_fault);
DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
{
- enum ctx_state prev_state = exception_enter();
unsigned long address = regs->dar;
unsigned long error_code = regs->dsisr;
long err;
@@ -573,8 +572,6 @@ DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
}
#endif
- exception_exit(prev_state);
-
return err;
}
NOKPROBE_SYMBOL(do_page_fault);
--
2.23.0
^ permalink raw reply related
* [PATCH v3 08/19] powerpc/64: context tracking remove _TIF_NOHZ
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
Add context tracking to the system call handler explicitly, and remove
_TIF_NOHZ.
This saves 35 cycles on gettid system call cost on POWER9 with a
CONFIG_NOHZ_FULL kernel.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 6 ------
arch/powerpc/Kconfig | 1 -
arch/powerpc/include/asm/thread_info.h | 4 +---
arch/powerpc/kernel/ptrace/ptrace.c | 4 ----
arch/powerpc/kernel/signal.c | 4 ----
arch/powerpc/kernel/syscall_64.c | 10 ++++++++++
6 files changed, 11 insertions(+), 18 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..a0b6213f7820 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -618,12 +618,6 @@ config HAVE_CONTEXT_TRACKING
protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal
handling on irq exit still need to be protected.
-config HAVE_TIF_NOHZ
- bool
- help
- Arch relies on TIF_NOHZ and syscall slow path to implement context
- tracking calls to user_enter()/user_exit().
-
config HAVE_VIRT_CPU_ACCOUNTING
bool
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..6eaf12a504f8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -193,7 +193,6 @@ config PPC
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
- select HAVE_TIF_NOHZ if PPC64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 46a210b03d2b..c9443c16e5fb 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -95,7 +95,6 @@ void arch_setup_new_exec(void);
#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
-#define TIF_NOHZ 9 /* in adaptive nohz mode */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
@@ -128,11 +127,10 @@ void arch_setup_new_exec(void);
#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
-#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ | _TIF_SYSCALL_EMU)
+ _TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index f6e51be47c6e..8970400e521c 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -290,8 +290,6 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
u32 flags;
- user_exit();
-
flags = READ_ONCE(current_thread_info()->flags) &
(_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
@@ -368,8 +366,6 @@ void do_syscall_trace_leave(struct pt_regs *regs)
step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
-
- user_enter();
}
void __init pt_regs_check(void);
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index d2c356f37077..44ec7b34b27e 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -310,8 +310,6 @@ static void do_signal(struct task_struct *tsk)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
- user_exit();
-
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -327,8 +325,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}
-
- user_enter();
}
unsigned long get_tm_stackpointer(struct task_struct *tsk)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 16eb2b5dafde..9e855b25bd15 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/context_tracking.h>
#include <linux/err.h>
#include <asm/asm-prototypes.h>
#include <asm/kup.h>
#include <asm/cputime.h>
+#include <asm/interrupt.h>
#include <asm/hw_irq.h>
#include <asm/interrupt.h>
#include <asm/kprobes.h>
@@ -28,6 +30,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+ CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
+ user_exit_irqoff();
+
trace_hardirqs_off(); /* finish reconciling */
if (IS_ENABLED(CONFIG_PPC_BOOK3S))
@@ -158,6 +163,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
unsigned long ti_flags;
unsigned long ret = 0;
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
kuap_check_amr();
regs->result = r3;
@@ -234,8 +241,11 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
}
}
+ user_enter_irqoff();
+
/* scv need not set RI=0 because SRRs are not used */
if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
+ user_exit_irqoff();
local_irq_enable();
goto again;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v3 07/19] powerpc: add interrupt_cond_local_irq_enable helper
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
Simple helper for synchronous interrupt handlers to use to enable
interrupts if they were taken in interrupt-enabled context.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 7 +++++++
arch/powerpc/kernel/traps.c | 24 +++++++-----------------
arch/powerpc/mm/fault.c | 4 +---
3 files changed, 15 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 516b32f5b3cc..2ab0675c27aa 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_INTERRUPT_H
#include <linux/context_tracking.h>
+#include <linux/hardirq.h>
#include <asm/ftrace.h>
struct interrupt_state {
@@ -268,4 +269,10 @@ DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
void replay_system_reset(void);
void replay_soft_interrupts(void);
+static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
+{
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+}
+
#endif /* _ASM_POWERPC_INTERRUPT_H */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e726fe5c384d..8a6cb2838ce6 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -343,8 +343,8 @@ static bool exception_common(int signr, struct pt_regs *regs, int code,
show_signal_msg(signr, regs, code, addr);
- if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
- local_irq_enable();
+ if (arch_irqs_disabled())
+ interrupt_cond_local_irq_enable(regs);
current->thread.trap_nr = code;
@@ -1575,9 +1575,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
if (!user_mode(regs))
goto sigill;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
/* (reason & REASON_ILLEGAL) would be the obvious thing here,
* but there seems to be a hardware bug on the 405GP (RevD)
@@ -1631,9 +1629,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
int sig, code, fixed = 0;
unsigned long reason;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
reason = get_reason(regs);
@@ -1794,9 +1790,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
die("Unexpected facility unavailable exception", regs, SIGABRT);
}
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
if (status == FSCR_DSCR_LG) {
/*
@@ -2176,9 +2170,7 @@ void SPEFloatingPointException(struct pt_regs *regs)
int code = FPE_FLTUNK;
int err;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
flush_spe_to_thread(current);
@@ -2225,9 +2217,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
extern int speround_handler(struct pt_regs *regs);
int err;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
preempt_disable();
if (regs->msr & MSR_SPE)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 93663f7fbe9e..4896f44c8374 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -443,9 +443,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
return bad_area_nosemaphore(regs, address);
}
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
--
2.23.0
^ permalink raw reply related
* [PATCH v3 06/19] powerpc: add interrupt wrapper entry / exit stub functions
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
These will be used by subsequent patches.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 66 ++++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 5f6eeb6d43f2..516b32f5b3cc 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -5,6 +5,50 @@
#include <linux/context_tracking.h>
#include <asm/ftrace.h>
+struct interrupt_state {
+};
+
+static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
+{
+}
+
+/*
+ * Care should be taken to note that interrupt_exit_prepare and
+ * interrupt_async_exit_prepare do not necessarily return immediately to
+ * regs context (e.g., if regs is usermode, we don't necessarily return to
+ * user mode). Other interrupts might be taken between here and return,
+ * context switch / preemption may occur in the exit path after this, or a
+ * signal may be delivered, etc.
+ *
+ * The real interrupt exit code is platform specific, e.g.,
+ * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s.
+ *
+ * However interrupt_nmi_exit_prepare does return directly to regs, because
+ * NMIs do not do "exit work" or replay soft-masked interrupts.
+ */
+static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
+{
+}
+
+static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
+{
+}
+
+static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
+{
+}
+
+struct interrupt_nmi_state {
+};
+
+static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
+{
+}
+
+static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
+{
+}
+
/**
* DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function
* @func: Function name of the entry point
@@ -59,7 +103,13 @@ static __always_inline void ___##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
+ struct interrupt_state state; \
+ \
+ interrupt_enter_prepare(regs, &state); \
+ \
___##func (regs); \
+ \
+ interrupt_exit_prepare(regs, &state); \
} \
\
static __always_inline void ___##func(struct pt_regs *regs)
@@ -87,10 +137,15 @@ static __always_inline long ___##func(struct pt_regs *regs); \
\
__visible noinstr long func(struct pt_regs *regs) \
{ \
+ struct interrupt_state state; \
long ret; \
\
+ interrupt_enter_prepare(regs, &state); \
+ \
ret = ___##func (regs); \
\
+ interrupt_exit_prepare(regs, &state); \
+ \
return ret; \
} \
\
@@ -117,7 +172,13 @@ static __always_inline void ___##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
+ struct interrupt_state state; \
+ \
+ interrupt_async_enter_prepare(regs, &state); \
+ \
___##func (regs); \
+ \
+ interrupt_async_exit_prepare(regs, &state); \
} \
\
static __always_inline void ___##func(struct pt_regs *regs)
@@ -145,10 +206,15 @@ static __always_inline long ___##func(struct pt_regs *regs); \
\
__visible noinstr long func(struct pt_regs *regs) \
{ \
+ struct interrupt_nmi_state state; \
long ret; \
\
+ interrupt_nmi_enter_prepare(regs, &state); \
+ \
ret = ___##func (regs); \
\
+ interrupt_nmi_exit_prepare(regs, &state); \
+ \
return ret; \
} \
\
--
2.23.0
^ permalink raw reply related
* [PATCH v3 05/19] powerpc: interrupt handler wrapper functions
From: Nicholas Piggin @ 2020-11-28 14:41 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201128144114.944000-1-npiggin@gmail.com>
Add wrapper functions (derived from x86 macros) for interrupt handler
functions. This allows interrupt entry code to be written in C.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/asm-prototypes.h | 29 ---
arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 -
arch/powerpc/include/asm/hw_irq.h | 9 -
arch/powerpc/include/asm/interrupt.h | 205 ++++++++++++++++++
arch/powerpc/include/asm/time.h | 2 +
arch/powerpc/kernel/dbell.c | 12 +-
arch/powerpc/kernel/exceptions-64s.S | 7 +-
arch/powerpc/kernel/head_book3s_32.S | 6 +-
arch/powerpc/kernel/irq.c | 3 +-
arch/powerpc/kernel/mce.c | 5 +-
arch/powerpc/kernel/syscall_64.c | 1 +
arch/powerpc/kernel/tau_6xx.c | 2 +-
arch/powerpc/kernel/time.c | 3 +-
arch/powerpc/kernel/traps.c | 90 +++++---
arch/powerpc/kernel/watchdog.c | 7 +-
arch/powerpc/kvm/book3s_hv.c | 1 +
arch/powerpc/kvm/book3s_hv_builtin.c | 1 +
arch/powerpc/kvm/booke.c | 1 +
arch/powerpc/mm/book3s64/hash_utils.c | 3 +-
arch/powerpc/mm/book3s64/slb.c | 5 +-
arch/powerpc/mm/fault.c | 10 +-
arch/powerpc/platforms/powernv/idle.c | 1 +
22 files changed, 311 insertions(+), 93 deletions(-)
create mode 100644 arch/powerpc/include/asm/interrupt.h
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 22c9d08fa3a4..939f3c94c8f3 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -56,35 +56,6 @@ int exit_vmx_usercopy(void);
int enter_vmx_ops(void);
void *exit_vmx_ops(void *dest);
-/* Traps */
-long machine_check_early(struct pt_regs *regs);
-long hmi_exception_realmode(struct pt_regs *regs);
-void SMIException(struct pt_regs *regs);
-void handle_hmi_exception(struct pt_regs *regs);
-void instruction_breakpoint_exception(struct pt_regs *regs);
-void RunModeException(struct pt_regs *regs);
-void single_step_exception(struct pt_regs *regs);
-void program_check_exception(struct pt_regs *regs);
-void alignment_exception(struct pt_regs *regs);
-void StackOverflow(struct pt_regs *regs);
-void stack_overflow_exception(struct pt_regs *regs);
-void kernel_fp_unavailable_exception(struct pt_regs *regs);
-void altivec_unavailable_exception(struct pt_regs *regs);
-void vsx_unavailable_exception(struct pt_regs *regs);
-void fp_unavailable_tm(struct pt_regs *regs);
-void altivec_unavailable_tm(struct pt_regs *regs);
-void vsx_unavailable_tm(struct pt_regs *regs);
-void facility_unavailable_exception(struct pt_regs *regs);
-void TAUException(struct pt_regs *regs);
-void altivec_assist_exception(struct pt_regs *regs);
-void unrecoverable_exception(struct pt_regs *regs);
-void kernel_bad_stack(struct pt_regs *regs);
-void system_reset_exception(struct pt_regs *regs);
-void machine_check_exception(struct pt_regs *regs);
-void emulation_assist_interrupt(struct pt_regs *regs);
-long do_slb_fault(struct pt_regs *regs);
-void do_bad_slb_fault(struct pt_regs *regs);
-
/* signals, syscalls and interrupts */
long sys_swapcontext(struct ucontext __user *old_ctx,
struct ucontext __user *new_ctx,
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e843d0b193d3..683a9c7d1b03 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -453,7 +453,6 @@ static inline unsigned long hpt_hash(unsigned long vpn,
#define HPTE_LOCAL_UPDATE 0x1
#define HPTE_NOHPTE_UPDATE 0x2
-long do_hash_fault(struct pt_regs *regs);
extern int __hash_page_4K(unsigned long ea, unsigned long access,
unsigned long vsid, pte_t *ptep, unsigned long trap,
unsigned long flags, int ssize, int subpage_prot);
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 0363734ff56e..614957f74cee 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -50,15 +50,6 @@
#ifndef __ASSEMBLY__
-extern void replay_system_reset(void);
-extern void replay_soft_interrupts(void);
-
-extern void timer_interrupt(struct pt_regs *);
-extern void timer_broadcast_interrupt(void);
-extern void performance_monitor_exception(struct pt_regs *regs);
-extern void WatchdogException(struct pt_regs *regs);
-extern void unknown_exception(struct pt_regs *regs);
-
#ifdef CONFIG_PPC64
#include <asm/paca.h>
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
new file mode 100644
index 000000000000..5f6eeb6d43f2
--- /dev/null
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_INTERRUPT_H
+#define _ASM_POWERPC_INTERRUPT_H
+
+#include <linux/context_tracking.h>
+#include <asm/ftrace.h>
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RAW(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * This is a plain function which does no tracing, reconciling, etc.
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RAW(func) \
+static __always_inline long ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ ret = ___##func (regs); \
+ \
+ return ret; \
+} \
+ \
+static __always_inline long ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function
+ * @func: Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER(func) \
+ __visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER(func) \
+static __always_inline void ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ ___##func (regs); \
+} \
+ \
+static __always_inline void ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RET(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RET(func) \
+static __always_inline long ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ ret = ___##func (regs); \
+ \
+ return ret; \
+} \
+ \
+static __always_inline long ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function
+ * @func: Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER_ASYNC(func) \
+ __visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \
+static __always_inline void ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ ___##func (regs); \
+} \
+ \
+static __always_inline void ___##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_NMI(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_NMI(func) \
+static __always_inline long ___##func(struct pt_regs *regs); \
+ \
+__visible noinstr long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ ret = ___##func (regs); \
+ \
+ return ret; \
+} \
+ \
+static __always_inline long ___##func(struct pt_regs *regs)
+
+
+/* Interrupt handlers */
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early);
+DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
+DECLARE_INTERRUPT_HANDLER(SMIException);
+DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
+DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception);
+DECLARE_INTERRUPT_HANDLER(RunModeException);
+DECLARE_INTERRUPT_HANDLER(single_step_exception);
+DECLARE_INTERRUPT_HANDLER(program_check_exception);
+DECLARE_INTERRUPT_HANDLER(alignment_exception);
+DECLARE_INTERRUPT_HANDLER(StackOverflow);
+DECLARE_INTERRUPT_HANDLER(stack_overflow_exception);
+DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
+DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
+DECLARE_INTERRUPT_HANDLER(unrecoverable_exception);
+DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
+DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
+#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception);
+#else
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
+#endif
+DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt);
+DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
+DECLARE_INTERRUPT_HANDLER_RET(do_hash_fault);
+DECLARE_INTERRUPT_HANDLER_RET(do_page_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_page_fault);
+
+DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt);
+DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
+DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
+DECLARE_INTERRUPT_HANDLER_RAW(performance_monitor_exception);
+DECLARE_INTERRUPT_HANDLER(WatchdogException);
+DECLARE_INTERRUPT_HANDLER(unknown_exception);
+DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
+
+void replay_system_reset(void);
+void replay_soft_interrupts(void);
+
+#endif /* _ASM_POWERPC_INTERRUPT_H */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 2f566c1a754c..335d6fd589a7 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -131,6 +131,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
+void timer_broadcast_interrupt(void);
+
/* SPLPAR */
void accumulate_stolen_time(void);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 52680cf07c9d..c0f99f8ffa7d 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -12,14 +12,14 @@
#include <linux/hardirq.h>
#include <asm/dbell.h>
+#include <asm/interrupt.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
#include <asm/trace.h>
-#ifdef CONFIG_SMP
-
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
+#ifdef CONFIG_SMP
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
@@ -37,11 +37,7 @@ void doorbell_exception(struct pt_regs *regs)
trace_doorbell_exit(regs);
irq_exit();
set_irq_regs(old_regs);
-}
#else /* CONFIG_SMP */
-void doorbell_exception(struct pt_regs *regs)
-{
printk(KERN_WARNING "Received doorbell on non-smp system\n");
-}
#endif /* CONFIG_SMP */
-
+}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 77b730f515c4..f6ec455db13c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1923,7 +1923,7 @@ EXC_COMMON_BEGIN(doorbell_super_common)
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
#else
- bl unknown_exception
+ bl unknown_async_exception
#endif
b interrupt_return
@@ -2136,8 +2136,7 @@ EXC_COMMON_BEGIN(h_data_storage_common)
GEN_COMMON h_data_storage
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- li r4,SIGSEGV
- bl bad_page_fault
+ bl do_bad_page_fault
MMU_FTR_SECTION_ELSE
bl unknown_exception
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
@@ -2310,7 +2309,7 @@ EXC_COMMON_BEGIN(h_doorbell_common)
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
#else
- bl unknown_exception
+ bl unknown_async_exception
#endif
b interrupt_return
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 5875f8795d5b..2185c6c89f33 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -240,8 +240,8 @@ __secondary_hold_acknowledge:
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
- putting it back to what it was (unknown_exception) when done. */
- EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+ putting it back to what it was (unknown_async_exception) when done. */
+ EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD)
/* Machine check */
/*
@@ -642,7 +642,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
#endif
#ifndef CONFIG_TAU_INT
-#define TAUException unknown_exception
+#define TAUException unknown_async_exception
#endif
EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 6b1eca53e36c..2055d204d08e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -54,6 +54,7 @@
#include <linux/pgtable.h>
#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/cache.h>
@@ -665,7 +666,7 @@ void __do_irq(struct pt_regs *regs)
irq_exit();
}
-void do_IRQ(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
{
struct pt_regs *old_regs = set_irq_regs(regs);
void *cursp, *irqsp, *sirqsp;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 63702c0badb9..b84459f45b1a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -18,6 +18,7 @@
#include <linux/extable.h>
#include <linux/ftrace.h>
+#include <asm/interrupt.h>
#include <asm/machdep.h>
#include <asm/mce.h>
#include <asm/nmi.h>
@@ -588,7 +589,7 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
*
* regs->nip and regs->msr contains srr0 and ssr1.
*/
-long notrace machine_check_early(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
{
long handled = 0;
u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
@@ -722,7 +723,7 @@ long hmi_handle_debugtrig(struct pt_regs *regs)
/*
* Return values:
*/
-long hmi_exception_realmode(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
{
int ret;
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 310bcd768cd5..16eb2b5dafde 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -5,6 +5,7 @@
#include <asm/kup.h>
#include <asm/cputime.h>
#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
#include <asm/kprobes.h>
#include <asm/paca.h>
#include <asm/ptrace.h>
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 0b4694b8d248..46b2e5de4ef5 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -100,7 +100,7 @@ static void TAUupdate(int cpu)
* with interrupts disabled
*/
-void TAUException(struct pt_regs * regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException)
{
int cpu = smp_processor_id();
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 7d372ff3504b..0089df57bb49 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -56,6 +56,7 @@
#include <linux/processor.h>
#include <asm/trace.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/nvram.h>
#include <asm/cache.h>
@@ -545,7 +546,7 @@ void arch_irq_work_raise(void)
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
*/
-void timer_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
{
struct clock_event_device *evt = this_cpu_ptr(&decrementers);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 7dda72eb97cc..e726fe5c384d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -41,6 +41,7 @@
#include <asm/emulated_ops.h>
#include <linux/uaccess.h>
#include <asm/debugfs.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -436,8 +437,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
regs->msr &= ~MSR_RI;
#endif
}
-
-void system_reset_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
{
unsigned long hsrr0, hsrr1;
bool saved_hsrrs = false;
@@ -522,7 +522,10 @@ void system_reset_exception(struct pt_regs *regs)
this_cpu_set_ftrace_enabled(ftrace_enabled);
/* What should we do here? We could issue a shutdown or hard reset. */
+
+ return 0;
}
+NOKPROBE_SYMBOL(system_reset_exception);
/*
* I/O accesses can cause machine checks on powermacs.
@@ -819,7 +822,12 @@ int machine_check_generic(struct pt_regs *regs)
}
#endif /* everything else */
-void machine_check_exception(struct pt_regs *regs)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
+#else
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
+#endif
{
int recover = 0;
@@ -869,13 +877,21 @@ void machine_check_exception(struct pt_regs *regs)
if (!(regs->msr & MSR_RI))
die("Unrecoverable Machine check", regs, SIGBUS);
+#ifdef CONFIG_PPC_BOOK3S_64
+bail:
return;
+#else
+ return 0;
bail:
if (nmi) nmi_exit();
+
+ return 0;
+#endif
}
+NOKPROBE_SYMBOL(machine_check_exception);
-void SMIException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
{
die("System Management Interrupt", regs, SIGABRT);
}
@@ -1061,7 +1077,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
}
#endif /* CONFIG_VSX */
-void handle_hmi_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
{
struct pt_regs *old_regs;
@@ -1090,7 +1106,7 @@ void handle_hmi_exception(struct pt_regs *regs)
set_irq_regs(old_regs);
}
-void unknown_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(unknown_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1102,7 +1118,19 @@ void unknown_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void instruction_breakpoint_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
+{
+ enum ctx_state prev_state = exception_enter();
+
+ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+ regs->nip, regs->msr, regs->trap);
+
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
+
+ exception_exit(prev_state);
+}
+
+DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1117,12 +1145,12 @@ void instruction_breakpoint_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void RunModeException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(RunModeException)
{
_exception(SIGTRAP, regs, TRAP_UNK, 0);
}
-void single_step_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(single_step_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1465,7 +1493,7 @@ static int emulate_math(struct pt_regs *regs)
static inline int emulate_math(struct pt_regs *regs) { return -1; }
#endif
-void program_check_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(program_check_exception)
{
enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
@@ -1590,14 +1618,14 @@ NOKPROBE_SYMBOL(program_check_exception);
* This occurs when running in hypervisor mode on POWER6 or later
* and an illegal instruction is encountered.
*/
-void emulation_assist_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
{
regs->msr |= REASON_ILLEGAL;
program_check_exception(regs);
}
NOKPROBE_SYMBOL(emulation_assist_interrupt);
-void alignment_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(alignment_exception)
{
enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
@@ -1647,7 +1675,7 @@ void alignment_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void StackOverflow(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(StackOverflow)
{
pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
current->comm, task_pid_nr(current), regs->gpr[1]);
@@ -1656,7 +1684,7 @@ void StackOverflow(struct pt_regs *regs)
panic("kernel stack overflow");
}
-void stack_overflow_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1665,7 +1693,7 @@ void stack_overflow_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void kernel_fp_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1676,7 +1704,7 @@ void kernel_fp_unavailable_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void altivec_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
{
enum ctx_state prev_state = exception_enter();
@@ -1695,7 +1723,7 @@ void altivec_unavailable_exception(struct pt_regs *regs)
exception_exit(prev_state);
}
-void vsx_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
{
if (user_mode(regs)) {
/* A user program has executed an vsx instruction,
@@ -1726,7 +1754,7 @@ static void tm_unavailable(struct pt_regs *regs)
die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
}
-void facility_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
{
static char *facility_strings[] = {
[FSCR_FP_LG] = "FPU",
@@ -1846,7 +1874,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void fp_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
{
/* Note: This does not handle any kind of FP laziness. */
@@ -1879,7 +1907,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
tm_recheckpoint(¤t->thread);
}
-void altivec_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
{
/* See the comments in fp_unavailable_tm(). This function operates
* the same way.
@@ -1894,7 +1922,7 @@ void altivec_unavailable_tm(struct pt_regs *regs)
current->thread.used_vr = 1;
}
-void vsx_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
{
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
@@ -1919,7 +1947,8 @@ void vsx_unavailable_tm(struct pt_regs *regs)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-static void performance_monitor_exception_nmi(struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
{
nmi_enter();
@@ -1928,9 +1957,12 @@ static void performance_monitor_exception_nmi(struct pt_regs *regs)
perf_irq(regs);
nmi_exit();
+
+ return 0;
}
+#endif
-static void performance_monitor_exception_async(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
{
irq_enter();
@@ -1941,7 +1973,7 @@ static void performance_monitor_exception_async(struct pt_regs *regs)
irq_exit();
}
-void performance_monitor_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
{
/*
* On 64-bit, if perf interrupts hit in a local_irq_disable
@@ -1953,6 +1985,8 @@ void performance_monitor_exception(struct pt_regs *regs)
performance_monitor_exception_nmi(regs);
else
performance_monitor_exception_async(regs);
+
+ return 0;
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -2086,7 +2120,7 @@ NOKPROBE_SYMBOL(DebugException);
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
#ifdef CONFIG_ALTIVEC
-void altivec_assist_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
{
int err;
@@ -2228,7 +2262,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
* in the MSR is 0. This indicates that SRR0/1 are live, and that
* we therefore lost state by taking this exception.
*/
-void unrecoverable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(unrecoverable_exception)
{
pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
regs->trap, regs->nip, regs->msr);
@@ -2248,7 +2282,7 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
return;
}
-void WatchdogException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */
{
printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
WatchdogHandler(regs);
@@ -2259,7 +2293,7 @@ void WatchdogException(struct pt_regs *regs)
* We enter here if we discover during exception entry that we are
* running in supervisor mode with a userspace value in the stack pointer.
*/
-void kernel_bad_stack(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
{
printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
regs->gpr[1], regs->nip);
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index af3c15a1d41e..824b9376ac35 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/smp.h>
+#include <asm/interrupt.h>
#include <asm/paca.h>
/*
@@ -247,14 +248,14 @@ static void watchdog_timer_interrupt(int cpu)
watchdog_smp_panic(cpu, tb);
}
-void soft_nmi_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
{
unsigned long flags;
int cpu = raw_smp_processor_id();
u64 tb;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
- return;
+ return 0;
nmi_enter();
@@ -291,6 +292,8 @@ void soft_nmi_interrupt(struct pt_regs *regs)
out:
nmi_exit();
+
+ return 0;
}
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e3b1839fc251..01816de0e0ec 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -53,6 +53,7 @@
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 8f58dd20b362..a2f3e6e70361 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -17,6 +17,7 @@
#include <asm/asm-prototypes.h>
#include <asm/cputable.h>
+#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/archrandom.h>
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b1abcb816439..2a9817311ac1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -20,6 +20,7 @@
#include <asm/cputable.h>
#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/cacheflush.h>
#include <asm/dbell.h>
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 731518e7d56f..a48c484b9e9b 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -38,6 +38,7 @@
#include <linux/pgtable.h>
#include <asm/debugfs.h>
+#include <asm/interrupt.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -1510,7 +1511,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-long do_hash_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_RET(do_hash_fault)
{
unsigned long ea = regs->dar;
unsigned long dsisr = regs->dsisr;
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index ae89ad516247..efac69e73ca8 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -10,6 +10,7 @@
*/
#include <asm/asm-prototypes.h>
+#include <asm/interrupt.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
@@ -837,7 +838,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
return slb_insert_entry(ea, context, flags, ssize, false);
}
-long do_slb_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
{
unsigned long ea = regs->dar;
unsigned long id = get_region_id(ea);
@@ -890,7 +891,7 @@ long do_slb_fault(struct pt_regs *regs)
}
}
-void do_bad_slb_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
{
int err = regs->result;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index e11989be8f1c..93663f7fbe9e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -34,6 +34,7 @@
#include <linux/uaccess.h>
#include <asm/firmware.h>
+#include <asm/interrupt.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -549,7 +550,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
}
NOKPROBE_SYMBOL(__do_page_fault);
-long do_page_fault(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
{
enum ctx_state prev_state = exception_enter();
unsigned long address = regs->dar;
@@ -656,3 +657,10 @@ void bad_page_fault(struct pt_regs *regs, int sig)
die("Kernel access of bad area", regs, sig);
}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER(do_bad_page_fault)
+{
+ bad_page_fault(regs, SIGSEGV);
+}
+#endif
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 1ed7c5286487..2fc7049fef53 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -14,6 +14,7 @@
#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
+#include <asm/interrupt.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
--
2.23.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox