* [PATCH v10 09/12] mm: Move vmap_range from mm/ioremap.c to mm/vmalloc.c
From: Nicholas Piggin @ 2021-01-24 8:22 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Zefan Li, Jonathan Cameron, Rick Edgecombe,
linuxppc-dev
In-Reply-To: <20210124082230.2118861-1-npiggin@gmail.com>
This is a generic kernel virtual memory mapper, not specific to ioremap.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
include/linux/vmalloc.h | 3 +
mm/ioremap.c | 197 ----------------------------------------
mm/vmalloc.c | 196 +++++++++++++++++++++++++++++++++++++++
3 files changed, 199 insertions(+), 197 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 00bd62bd701e..40649c4bb5a2 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -178,6 +178,9 @@ extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
#ifdef CONFIG_MMU
+int vmap_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift);
extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
pgprot_t prot, struct page **pages);
int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
diff --git a/mm/ioremap.c b/mm/ioremap.c
index c67f91164401..d1dcc7e744ac 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -28,203 +28,6 @@ early_param("nohugeiomap", set_nohugeiomap);
static const bool iomap_max_page_shift = PAGE_SHIFT;
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
-{
- pte_t *pte;
- u64 pfn;
-
- pfn = phys_addr >> PAGE_SHIFT;
- pte = pte_alloc_kernel_track(pmd, addr, mask);
- if (!pte)
- return -ENOMEM;
- do {
- BUG_ON(!pte_none(*pte));
- set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
- pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- *mask |= PGTBL_PTE_MODIFIED;
- return 0;
-}
-
-static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- if (max_page_shift < PMD_SHIFT)
- return 0;
-
- if (!arch_vmap_pmd_supported(prot))
- return 0;
-
- if ((end - addr) != PMD_SIZE)
- return 0;
-
- if (!IS_ALIGNED(addr, PMD_SIZE))
- return 0;
-
- if (!IS_ALIGNED(phys_addr, PMD_SIZE))
- return 0;
-
- if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
- return 0;
-
- return pmd_set_huge(pmd, phys_addr, prot);
-}
-
-static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift, pgtbl_mod_mask *mask)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
- if (!pmd)
- return -ENOMEM;
- do {
- next = pmd_addr_end(addr, end);
-
- if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot, max_page_shift)) {
- *mask |= PGTBL_PMD_MODIFIED;
- continue;
- }
-
- if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
- return -ENOMEM;
- } while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
- return 0;
-}
-
-static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- if (max_page_shift < PUD_SHIFT)
- return 0;
-
- if (!arch_vmap_pud_supported(prot))
- return 0;
-
- if ((end - addr) != PUD_SIZE)
- return 0;
-
- if (!IS_ALIGNED(addr, PUD_SIZE))
- return 0;
-
- if (!IS_ALIGNED(phys_addr, PUD_SIZE))
- return 0;
-
- if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
- return 0;
-
- return pud_set_huge(pud, phys_addr, prot);
-}
-
-static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift, pgtbl_mod_mask *mask)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_alloc_track(&init_mm, p4d, addr, mask);
- if (!pud)
- return -ENOMEM;
- do {
- next = pud_addr_end(addr, end);
-
- if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot, max_page_shift)) {
- *mask |= PGTBL_PUD_MODIFIED;
- continue;
- }
-
- if (vmap_pmd_range(pud, addr, next, phys_addr, prot, max_page_shift, mask))
- return -ENOMEM;
- } while (pud++, phys_addr += (next - addr), addr = next, addr != end);
- return 0;
-}
-
-static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- if (max_page_shift < P4D_SHIFT)
- return 0;
-
- if (!arch_vmap_p4d_supported(prot))
- return 0;
-
- if ((end - addr) != P4D_SIZE)
- return 0;
-
- if (!IS_ALIGNED(addr, P4D_SIZE))
- return 0;
-
- if (!IS_ALIGNED(phys_addr, P4D_SIZE))
- return 0;
-
- if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
- return 0;
-
- return p4d_set_huge(p4d, phys_addr, prot);
-}
-
-static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift, pgtbl_mod_mask *mask)
-{
- p4d_t *p4d;
- unsigned long next;
-
- p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
- if (!p4d)
- return -ENOMEM;
- do {
- next = p4d_addr_end(addr, end);
-
- if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot, max_page_shift)) {
- *mask |= PGTBL_P4D_MODIFIED;
- continue;
- }
-
- if (vmap_pud_range(p4d, addr, next, phys_addr, prot, max_page_shift, mask))
- return -ENOMEM;
- } while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
- return 0;
-}
-
-static int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- pgd_t *pgd;
- unsigned long start;
- unsigned long next;
- int err;
- pgtbl_mod_mask mask = 0;
-
- might_sleep();
- BUG_ON(addr >= end);
-
- start = addr;
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, end);
- err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, max_page_shift, &mask);
- if (err)
- break;
- } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
-
- flush_cache_vmap(start, end);
-
- if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
- arch_sync_kernel_mappings(start, end);
-
- return err;
-}
-
int ioremap_page_range(unsigned long addr,
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7f2f36116980..5d79148b7fa7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -68,6 +68,202 @@ static void free_work(struct work_struct *w)
}
/*** Page table manipulation functions ***/
+static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ pgtbl_mod_mask *mask)
+{
+ pte_t *pte;
+ u64 pfn;
+
+ pfn = phys_addr >> PAGE_SHIFT;
+ pte = pte_alloc_kernel_track(pmd, addr, mask);
+ if (!pte)
+ return -ENOMEM;
+ do {
+ BUG_ON(!pte_none(*pte));
+ set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
+ pfn++;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ *mask |= PGTBL_PTE_MODIFIED;
+ return 0;
+}
+
+static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ if (max_page_shift < PMD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pmd_supported(prot))
+ return 0;
+
+ if ((end - addr) != PMD_SIZE)
+ return 0;
+
+ if (!IS_ALIGNED(addr, PMD_SIZE))
+ return 0;
+
+ if (!IS_ALIGNED(phys_addr, PMD_SIZE))
+ return 0;
+
+ if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
+ return 0;
+
+ return pmd_set_huge(pmd, phys_addr, prot);
+}
+
+static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+
+ if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot, max_page_shift)) {
+ *mask |= PGTBL_PMD_MODIFIED;
+ continue;
+ }
+
+ if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
+ return -ENOMEM;
+ } while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
+ return 0;
+}
+
+static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ if (max_page_shift < PUD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pud_supported(prot))
+ return 0;
+
+ if ((end - addr) != PUD_SIZE)
+ return 0;
+
+ if (!IS_ALIGNED(addr, PUD_SIZE))
+ return 0;
+
+ if (!IS_ALIGNED(phys_addr, PUD_SIZE))
+ return 0;
+
+ if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
+ return 0;
+
+ return pud_set_huge(pud, phys_addr, prot);
+}
+
+static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc_track(&init_mm, p4d, addr, mask);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+
+ if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot, max_page_shift)) {
+ *mask |= PGTBL_PUD_MODIFIED;
+ continue;
+ }
+
+ if (vmap_pmd_range(pud, addr, next, phys_addr, prot, max_page_shift, mask))
+ return -ENOMEM;
+ } while (pud++, phys_addr += (next - addr), addr = next, addr != end);
+ return 0;
+}
+
+static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ if (max_page_shift < P4D_SHIFT)
+ return 0;
+
+ if (!arch_vmap_p4d_supported(prot))
+ return 0;
+
+ if ((end - addr) != P4D_SIZE)
+ return 0;
+
+ if (!IS_ALIGNED(addr, P4D_SIZE))
+ return 0;
+
+ if (!IS_ALIGNED(phys_addr, P4D_SIZE))
+ return 0;
+
+ if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
+ return 0;
+
+ return p4d_set_huge(p4d, phys_addr, prot);
+}
+
+static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
+{
+ p4d_t *p4d;
+ unsigned long next;
+
+ p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
+ if (!p4d)
+ return -ENOMEM;
+ do {
+ next = p4d_addr_end(addr, end);
+
+ if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot, max_page_shift)) {
+ *mask |= PGTBL_P4D_MODIFIED;
+ continue;
+ }
+
+ if (vmap_pud_range(p4d, addr, next, phys_addr, prot, max_page_shift, mask))
+ return -ENOMEM;
+ } while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
+ return 0;
+}
+
+int vmap_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ pgd_t *pgd;
+ unsigned long start;
+ unsigned long next;
+ int err;
+ pgtbl_mod_mask mask = 0;
+
+ might_sleep();
+ BUG_ON(addr >= end);
+
+ start = addr;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, max_page_shift, &mask);
+ if (err)
+ break;
+ } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
+
+ flush_cache_vmap(start, end);
+
+ if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
+ arch_sync_kernel_mappings(start, end);
+
+ return err;
+}
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
--
2.23.0
^ permalink raw reply related
* [PATCH v10 10/12] mm/vmalloc: add vmap_range_noflush variant
From: Nicholas Piggin @ 2021-01-24 8:22 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Zefan Li, Jonathan Cameron, Rick Edgecombe,
linuxppc-dev
In-Reply-To: <20210124082230.2118861-1-npiggin@gmail.com>
As a side-effect, the order of flush_cache_vmap() and
arch_sync_kernel_mappings() calls are switched, but that now matches
the other callers in this file.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/vmalloc.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5d79148b7fa7..0377e1d059e5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -235,7 +235,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
return 0;
}
-int vmap_range(unsigned long addr, unsigned long end,
+static int vmap_range_noflush(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
unsigned int max_page_shift)
{
@@ -257,14 +257,24 @@ int vmap_range(unsigned long addr, unsigned long end,
break;
} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
- flush_cache_vmap(start, end);
-
if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
arch_sync_kernel_mappings(start, end);
return err;
}
+int vmap_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ int err;
+
+ err = vmap_range_noflush(addr, end, phys_addr, prot, max_page_shift);
+ flush_cache_vmap(addr, end);
+
+ return err;
+}
+
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{
--
2.23.0
^ permalink raw reply related
* [PATCH v10 11/12] mm/vmalloc: Hugepage vmalloc mappings
From: Nicholas Piggin @ 2021-01-24 8:22 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Zefan Li, Jonathan Cameron, Rick Edgecombe,
linuxppc-dev
In-Reply-To: <20210124082230.2118861-1-npiggin@gmail.com>
Support huge page vmalloc mappings. Config option HAVE_ARCH_HUGE_VMALLOC
enables support on architectures that define HAVE_ARCH_HUGE_VMAP and
supports PMD sized vmap mappings.
vmalloc will attempt to allocate PMD-sized pages if allocating PMD size
or larger, and fall back to small pages if that was unsuccessful.
Architectures must ensure that any arch specific vmalloc allocations
that require PAGE_SIZE mappings (e.g., module allocations vs strict
module rwx) use the VM_NOHUGE flag to inhibit larger mappings.
When hugepage vmalloc mappings are enabled in the next patch, this
reduces TLB misses by nearly 30x on a `git diff` workload on a 2-node
POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%.
This can result in more internal fragmentation and memory overhead for a
given allocation, an option nohugevmalloc is added to disable at boot.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 10 +++
include/linux/vmalloc.h | 18 ++++
mm/page_alloc.c | 5 +-
mm/vmalloc.c | 192 ++++++++++++++++++++++++++++++----------
4 files changed, 177 insertions(+), 48 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 24862d15f3a3..f87feb616184 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -724,6 +724,16 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
config HAVE_ARCH_HUGE_VMAP
bool
+config HAVE_ARCH_HUGE_VMALLOC
+ depends on HAVE_ARCH_HUGE_VMAP
+ bool
+ help
+ Archs that select this would be capable of PMD-sized vmaps (i.e.,
+ arch_vmap_pmd_supported() returns true), and they must make no
+ assumptions that vmalloc memory is mapped with PAGE_SIZE ptes. The
+ VM_NOHUGE flag can be used to prohibit arch-specific allocations from
+ using hugepages to help with this (e.g., modules may require it).
+
config ARCH_WANT_HUGE_PMD_SHARE
bool
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 40649c4bb5a2..2ba023daf188 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -25,6 +25,7 @@ struct notifier_block; /* in notifier.h */
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */
+#define VM_NOHUGE 0x00000200 /* force PAGE_SIZE pte mapping */
/*
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
@@ -59,6 +60,7 @@ struct vm_struct {
unsigned long size;
unsigned long flags;
struct page **pages;
+ unsigned int page_order;
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
@@ -194,6 +196,18 @@ static inline void set_vm_flush_reset_perms(void *addr)
if (vm)
vm->flags |= VM_FLUSH_RESET_PERMS;
}
+
+static inline bool is_vm_area_hugepages(const void *addr)
+{
+ /*
+ * This may not 100% tell if the area is mapped with > PAGE_SIZE
+ * page table entries, if for some reason the architecture indicates
+ * larger sizes are available but decides not to use them, nothing
+ * prevents that. This only indicates the size of the physical page
+ * allocated in the vmalloc layer.
+ */
+ return (find_vm_area(addr)->page_order > 0);
+}
#else
static inline int
map_kernel_range_noflush(unsigned long start, unsigned long size,
@@ -210,6 +224,10 @@ unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
static inline void set_vm_flush_reset_perms(void *addr)
{
}
+static inline bool is_vm_area_hugepages(const void *addr)
+{
+ return false;
+}
#endif
/* for /dev/kmem */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 027f6481ba59..b7a9661fa232 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -72,6 +72,7 @@
#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
+#include <linux/vmalloc.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -8238,6 +8239,7 @@ void *__init alloc_large_system_hash(const char *tablename,
void *table = NULL;
gfp_t gfp_flags;
bool virt;
+ bool huge;
/* allow the kernel cmdline to have a say */
if (!numentries) {
@@ -8305,6 +8307,7 @@ void *__init alloc_large_system_hash(const char *tablename,
} else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags);
virt = true;
+ huge = is_vm_area_hugepages(table);
} else {
/*
* If bucketsize is not a power-of-two, we may free
@@ -8321,7 +8324,7 @@ void *__init alloc_large_system_hash(const char *tablename,
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
- virt ? "vmalloc" : "linear");
+ virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
*_hash_shift = log2qty;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0377e1d059e5..eef61e0f5170 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -42,6 +42,19 @@
#include "internal.h"
#include "pgalloc-track.h"
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
+static bool __ro_after_init vmap_allow_huge = true;
+
+static int __init set_nohugevmalloc(char *str)
+{
+ vmap_allow_huge = false;
+ return 0;
+}
+early_param("nohugevmalloc", set_nohugevmalloc);
+#else /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
+static const bool vmap_allow_huge = false;
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
+
bool is_vmalloc_addr(const void *x)
{
unsigned long addr = (unsigned long)x;
@@ -477,31 +490,12 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
return 0;
}
-/**
- * map_kernel_range_noflush - map kernel VM area with the specified pages
- * @addr: start of the VM area to map
- * @size: size of the VM area to map
- * @prot: page protection flags to use
- * @pages: pages to map
- *
- * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should
- * have been allocated using get_vm_area() and its friends.
- *
- * NOTE:
- * This function does NOT do any cache flushing. The caller is responsible for
- * calling flush_cache_vmap() on to-be-mapped areas before calling this
- * function.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int map_kernel_range_noflush(unsigned long addr, unsigned long size,
- pgprot_t prot, struct page **pages)
+static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages)
{
unsigned long start = addr;
- unsigned long end = addr + size;
- unsigned long next;
pgd_t *pgd;
+ unsigned long next;
int err = 0;
int nr = 0;
pgtbl_mod_mask mask = 0;
@@ -523,6 +517,65 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size,
return 0;
}
+static int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int page_shift)
+{
+ unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
+
+ WARN_ON(page_shift < PAGE_SHIFT);
+
+ if (page_shift == PAGE_SHIFT)
+ return vmap_small_pages_range_noflush(addr, end, prot, pages);
+
+ for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
+ int err;
+
+ err = vmap_range_noflush(addr, addr + (1UL << page_shift),
+ __pa(page_address(pages[i])), prot,
+ page_shift);
+ if (err)
+ return err;
+
+ addr += 1UL << page_shift;
+ }
+
+ return 0;
+}
+
+static int vmap_pages_range(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int page_shift)
+{
+ int err;
+
+ err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
+ flush_cache_vmap(addr, end);
+ return err;
+}
+
+/**
+ * map_kernel_range_noflush - map kernel VM area with the specified pages
+ * @addr: start of the VM area to map
+ * @size: size of the VM area to map
+ * @prot: page protection flags to use
+ * @pages: pages to map
+ *
+ * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should
+ * have been allocated using get_vm_area() and its friends.
+ *
+ * NOTE:
+ * This function does NOT do any cache flushing. The caller is responsible for
+ * calling flush_cache_vmap() on to-be-mapped areas before calling this
+ * function.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int map_kernel_range_noflush(unsigned long addr, unsigned long size,
+ pgprot_t prot, struct page **pages)
+{
+ return vmap_pages_range_noflush(addr, addr + size, prot, pages, PAGE_SHIFT);
+}
+
int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
struct page **pages)
{
@@ -2416,6 +2469,7 @@ static inline void set_area_direct_map(const struct vm_struct *area,
{
int i;
+ /* HUGE_VMALLOC passes small pages to set_direct_map */
for (i = 0; i < area->nr_pages; i++)
if (page_address(area->pages[i]))
set_direct_map(area->pages[i]);
@@ -2449,11 +2503,12 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
* map. Find the start and end range of the direct mappings to make sure
* the vm_unmap_aliases() flush includes the direct map.
*/
- for (i = 0; i < area->nr_pages; i++) {
+ for (i = 0; i < area->nr_pages; i += 1U << area->page_order) {
unsigned long addr = (unsigned long)page_address(area->pages[i]);
if (addr) {
+ unsigned long page_size = PAGE_SIZE << area->page_order;
start = min(addr, start);
- end = max(addr + PAGE_SIZE, end);
+ end = max(addr + page_size, end);
flush_dmap = 1;
}
}
@@ -2496,11 +2551,11 @@ static void __vunmap(const void *addr, int deallocate_pages)
if (deallocate_pages) {
int i;
- for (i = 0; i < area->nr_pages; i++) {
+ for (i = 0; i < area->nr_pages; i += 1U << area->page_order) {
struct page *page = area->pages[i];
BUG_ON(!page);
- __free_pages(page, 0);
+ __free_pages(page, area->page_order);
}
atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
@@ -2691,15 +2746,18 @@ EXPORT_SYMBOL_GPL(vmap_pfn);
#endif /* CONFIG_VMAP_PFN */
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
- pgprot_t prot, int node)
+ pgprot_t prot, unsigned int page_shift,
+ int node)
{
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
- unsigned long array_size;
- unsigned int i;
+ unsigned int page_order = page_shift - PAGE_SHIFT;
+ unsigned long addr = (unsigned long)area->addr;
+ unsigned long size = get_vm_area_size(area);
+ unsigned int nr_small_pages = size >> PAGE_SHIFT;
struct page **pages;
+ unsigned int i;
- array_size = (unsigned long)nr_pages * sizeof(struct page *);
+ array_size = (unsigned long)nr_small_pages * sizeof(struct page *);
gfp_mask |= __GFP_NOWARN;
if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
gfp_mask |= __GFP_HIGHMEM;
@@ -2718,30 +2776,35 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
}
area->pages = pages;
- area->nr_pages = nr_pages;
+ area->nr_pages = nr_small_pages;
+ area->page_order = page_order;
- for (i = 0; i < area->nr_pages; i++) {
+ /*
+ * Careful, we allocate and map page_order pages, but tracking is done
+ * per PAGE_SIZE page so as to keep the vm_struct APIs independent of
+ * the physical/mapped size.
+ */
+ for (i = 0; i < area->nr_pages; i += 1U << page_order) {
struct page *page;
+ int p;
- if (node == NUMA_NO_NODE)
- page = alloc_page(gfp_mask);
- else
- page = alloc_pages_node(node, gfp_mask, 0);
-
+ page = alloc_pages_node(node, gfp_mask, page_order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vfree() */
area->nr_pages = i;
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
goto fail;
}
- area->pages[i] = page;
+
+ for (p = 0; p < (1U << page_order); p++)
+ area->pages[i + p] = page + p;
+
if (gfpflags_allow_blocking(gfp_mask))
cond_resched();
}
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
- prot, pages) < 0)
+ if (vmap_pages_range(addr, addr + size, prot, pages, page_shift) < 0)
goto fail;
return area->addr;
@@ -2749,7 +2812,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
- (area->nr_pages*PAGE_SIZE), area->size);
+ (area->nr_pages*PAGE_SIZE), size);
__vfree(area->addr);
return NULL;
}
@@ -2780,19 +2843,44 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
+ unsigned long real_align = align;
+ unsigned int shift = PAGE_SHIFT;
- size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
- area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
+ if (vmap_allow_huge && !(vm_flags & VM_NOHUGE) &&
+ arch_vmap_pmd_supported(prot) &&
+ (pgprot_val(prot) == pgprot_val(PAGE_KERNEL))) {
+ unsigned long size_per_node;
+
+ /*
+ * Try huge pages. Only try for PAGE_KERNEL allocations,
+ * others like modules don't yet expect huge pages in
+ * their allocations due to apply_to_page_range not
+ * supporting them.
+ */
+
+ size_per_node = size;
+ if (node == NUMA_NO_NODE)
+ size_per_node /= num_online_nodes();
+ if (size_per_node >= PMD_SIZE) {
+ shift = PMD_SHIFT;
+ align = max(real_align, 1UL << shift);
+ size = ALIGN(real_size, 1UL << shift);
+ }
+ }
+
+again:
+ size = PAGE_ALIGN(size);
+ area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
- addr = __vmalloc_area_node(area, gfp_mask, prot, node);
+ addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
if (!addr)
- return NULL;
+ goto fail;
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
@@ -2806,8 +2894,18 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return addr;
fail:
- warn_alloc(gfp_mask, NULL,
+ if (shift > PAGE_SHIFT) {
+ shift = PAGE_SHIFT;
+ align = real_align;
+ size = real_size;
+ goto again;
+ }
+
+ if (!area) {
+ /* Warn for area allocation, page allocations already warn */
+ warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure: %lu bytes", real_size);
+ }
return NULL;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v10 12/12] powerpc/64s/radix: Enable huge vmalloc mappings
From: Nicholas Piggin @ 2021-01-24 8:22 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Zefan Li, Jonathan Cameron, Rick Edgecombe,
linuxppc-dev
In-Reply-To: <20210124082230.2118861-1-npiggin@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Documentation/admin-guide/kernel-parameters.txt | 2 ++
arch/powerpc/Kconfig | 1 +
arch/powerpc/kernel/module.c | 13 +++++++++++--
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a10b545c2070..d62df53e5200 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3225,6 +3225,8 @@
nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
+ nohugevmalloc [PPC] Disable kernel huge vmalloc mappings.
+
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 107bb4319e0e..781da6829ab7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -181,6 +181,7 @@ config PPC
select GENERIC_GETTIMEOFDAY
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
+ select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index a211b0253cdb..bc2695eeeb4c 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -92,8 +92,17 @@ void *module_alloc(unsigned long size)
{
BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ /*
+ * Don't do huge page allocations for modules yet until more testing
+ * is done. STRICT_MODULE_RWX may require extra work to support this
+ * too.
+ */
+
+ return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL,
+ PAGE_KERNEL_EXEC,
+ VM_NOHUGE | VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE,
__builtin_return_address(0));
}
#endif
--
2.23.0
^ permalink raw reply related
* Re: [PATCH v10 01/12] mm/vmalloc: fix vmalloc_to_page for huge vmap mappings
From: Christoph Hellwig @ 2021-01-24 11:31 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Jonathan Cameron, Andrew Morton,
Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124082230.2118861-2-npiggin@gmail.com>
On Sun, Jan 24, 2021 at 06:22:19PM +1000, Nicholas Piggin wrote:
> vmalloc_to_page returns NULL for addresses mapped by larger pages[*].
> Whether or not a vmap is huge depends on the architecture details,
> alignments, boot options, etc., which the caller can not be expected
> to know. Therefore HUGE_VMAP is a regression for vmalloc_to_page.
>
> This change teaches vmalloc_to_page about larger pages, and returns
> the struct page that corresponds to the offset within the large page.
> This makes the API agnostic to mapping implementation details.
Maybe enable instead of fix would be better in the subject line?
Otherwise this looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply
* Re: [PATCH v10 02/12] mm: apply_to_pte_range warn and fail if a large pte is encountered
From: Christoph Hellwig @ 2021-01-24 11:32 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Jonathan Cameron, Andrew Morton,
Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124082230.2118861-3-npiggin@gmail.com>
On Sun, Jan 24, 2021 at 06:22:20PM +1000, Nicholas Piggin wrote:
> apply_to_pte_range might mistake a large pte for bad, or treat it as a
> page table, resulting in a crash or corruption. Add a test to warn and
> return error if large entries are found.
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply
* Re: [PATCH v10 03/12] mm/vmalloc: rename vmap_*_range vmap_pages_*_range
From: Christoph Hellwig @ 2021-01-24 11:34 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Jonathan Cameron, Andrew Morton,
Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124082230.2118861-4-npiggin@gmail.com>
On Sun, Jan 24, 2021 at 06:22:21PM +1000, Nicholas Piggin wrote:
> The vmalloc mapper operates on a struct page * array rather than a
> linear physical address, re-name it to make this distinction clear.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply
* Re: [PATCH v10 04/12] mm/ioremap: rename ioremap_*_range to vmap_*_range
From: Christoph Hellwig @ 2021-01-24 11:36 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Jonathan Cameron, Andrew Morton,
Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124082230.2118861-5-npiggin@gmail.com>
On Sun, Jan 24, 2021 at 06:22:22PM +1000, Nicholas Piggin wrote:
> This will be used as a generic kernel virtual mapping function, so
> re-name it in preparation.
The new name looks ok, but shouldn't it also move to vmalloc.c with
the more generic name and purpose?
^ permalink raw reply
* Re: [PATCH v10 05/12] mm: HUGE_VMAP arch support cleanup
From: Christoph Hellwig @ 2021-01-24 11:40 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, x86, H. Peter Anvin, Will Deacon, Ingo Molnar,
Catalin Marinas, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Borislav Petkov, Jonathan Cameron,
Andrew Morton, Rick Edgecombe, linuxppc-dev, Thomas Gleixner,
linux-arm-kernel
In-Reply-To: <20210124082230.2118861-6-npiggin@gmail.com>
> diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
> index 2ca708ab9b20..597b40405319 100644
> --- a/arch/arm64/include/asm/vmalloc.h
> +++ b/arch/arm64/include/asm/vmalloc.h
> @@ -1,4 +1,12 @@
> #ifndef _ASM_ARM64_VMALLOC_H
> #define _ASM_ARM64_VMALLOC_H
>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> +bool arch_vmap_p4d_supported(pgprot_t prot);
> +bool arch_vmap_pud_supported(pgprot_t prot);
> +bool arch_vmap_pmd_supported(pgprot_t prot);
> +#endif
Shouldn't the be inlines or macros? Also it would be useful
if the architectures would not have to override all functions
but just those that are it actually implements?
Also lots of > 80 char lines in the patch.
^ permalink raw reply
* Re: [PATCH] powerpc/64s: fix scv entry fallback flush vs interrupt
From: Michael Ellerman @ 2021-01-24 11:54 UTC (permalink / raw)
To: linuxppc-dev, Nicholas Piggin
Cc: Tulio Magno Quites Machado Filho, Daniel Axtens
In-Reply-To: <20210111062408.287092-1-npiggin@gmail.com>
On Mon, 11 Jan 2021 16:24:08 +1000, Nicholas Piggin wrote:
> The L1D flush fallback functions are not recoverable vs interrupts,
> yet the scv entry flush runs with MSR[EE]=1. This can result in a
> timer (soft-NMI) or MCE or SRESET interrupt hitting here and overwriting
> the EXRFI save area, which ends up corrupting userspace registers for
> scv return.
>
> Fix this by disabling RI and EE for the scv entry fallback flush.
Applied to powerpc/fixes.
[1/1] powerpc/64s: fix scv entry fallback flush vs interrupt
https://git.kernel.org/powerpc/c/08685be7761d69914f08c3d6211c543a385a5b9c
cheers
^ permalink raw reply
* Re: [PATCH v10 04/12] mm/ioremap: rename ioremap_*_range to vmap_*_range
From: Nicholas Piggin @ 2021-01-24 12:04 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arch, Ding Tianhong, linux-kernel, linux-mm, Zefan Li,
Jonathan Cameron, Andrew Morton, Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124113636.GD694255@infradead.org>
Excerpts from Christoph Hellwig's message of January 24, 2021 9:36 pm:
> On Sun, Jan 24, 2021 at 06:22:22PM +1000, Nicholas Piggin wrote:
>> This will be used as a generic kernel virtual mapping function, so
>> re-name it in preparation.
>
> The new name looks ok, but shouldn't it also move to vmalloc.c with
> the more generic name and purpose?
>
Yes, I moved it in a later patch to make reviewing easier. Rename in
this one then the move patch is cut and paste.
Thanks,
Nick
^ permalink raw reply
* [GIT PULL] Please pull powerpc/linux.git powerpc-5.11-5 tag
From: Michael Ellerman @ 2021-01-24 12:15 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linuxppc-dev, linux-kernel, npiggin, sandipan
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256
Hi Linus,
Please pull some more powerpc fixes for 5.11:
The following changes since commit 41131a5e54ae7ba5a2bb8d7b30d1818b3f5b13d2:
powerpc/vdso: Fix clock_gettime_fallback for vdso32 (2021-01-14 15:56:44 +1100)
are available in the git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.11-5
for you to fetch changes up to 08685be7761d69914f08c3d6211c543a385a5b9c:
powerpc/64s: fix scv entry fallback flush vs interrupt (2021-01-20 15:58:19 +1100)
- ------------------------------------------------------------------
powerpc fixes for 5.11 #5
Fix a bad interaction between the scv handling and the fallback L1D flush, which
could lead to user register corruption. Only affects people using scv (~no one)
on machines with old firmware that are missing the L1D flush.
Two small selftest fixes.
Thanks to Eirik Fuller, Libor Pechacek, Nicholas Piggin, Sandipan Das, Tulio
Magno Quites Machado Filho.
- ------------------------------------------------------------------
Michael Ellerman (1):
selftests/powerpc: Only test lwm/stmw on big endian
Nicholas Piggin (1):
powerpc/64s: fix scv entry fallback flush vs interrupt
Sandipan Das (1):
selftests/powerpc: Fix exit status of pkey tests
arch/powerpc/include/asm/exception-64s.h | 13 +++++++++++
arch/powerpc/include/asm/feature-fixups.h | 10 ++++++++
arch/powerpc/kernel/entry_64.S | 2 +-
arch/powerpc/kernel/exceptions-64s.S | 19 ++++++++++++++++
arch/powerpc/kernel/vmlinux.lds.S | 7 ++++++
arch/powerpc/lib/feature-fixups.c | 24 +++++++++++++++++---
tools/testing/selftests/powerpc/alignment/alignment_handler.c | 5 +++-
tools/testing/selftests/powerpc/mm/pkey_exec_prot.c | 2 +-
tools/testing/selftests/powerpc/mm/pkey_siginfo.c | 2 +-
9 files changed, 77 insertions(+), 7 deletions(-)
-----BEGIN PGP SIGNATURE-----
iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmANYaoACgkQUevqPMjh
pYDyFRAAqwsxxbbCe+AlggURQi7nap5JL4qHV0bEYPR34IEIPs9blDOb5ECQNbNt
fbxDK9y3ij5ceETsdzM6d3gkocBo/O8JMa9scfmHNFpQLWQk013MUg3YJQnycDkE
vpmaXPMdkcZv82VXdYe4DonhlS3FBTpbL1jPVZn6KIJGpiWfuS7vgptLeBqtMMZz
Mz4lAkzMKbSw/NmKe+Iq3Rc8zsw4C6gXPIhkNsD32s5U+lVMKLpFpxtwhxcGFxDy
sTUBWXJn+mW4+XJVNHQOvLN3gTPNgEcg2xoKkQiwB5/y+GKgPco24Ep6bUalYfNG
dViUAEgzpyhwTfkBxwwV8bpxSaw9HAQRjVC18QJ7sLM+ogHEJm7ejipAOmAfAzuf
+BwQgkSZ2I/peJJDNvVjC3vRIDl29LEA73ZORcp4ynDP/cKuhgvaYBTPCVCzcc0r
+bPXFEfS0OofLBkLekHIdSRfCLQjmQF/TB3CVkDAlDKjiMwTJk/khTn0+0RD6DRK
i/iBkCXjOBuizXkIzRUAit6YMMoO6Yt/nuyrPhDetBFpMPmZgAuLZCs1UI+qUR/L
lS4jOSUQnZqLXsDJqT7uUIdaWZPODdV1U8XEl1+C9xAZ5A4Juy9fFr2K91OtBa2e
/45tUCpDCmtt5aXZXWgwghJeQteBI0Ng5U4NH0asH2W8oVDFyRM=
=f+xY
-----END PGP SIGNATURE-----
^ permalink raw reply
* Re: [PATCH v10 05/12] mm: HUGE_VMAP arch support cleanup
From: Nicholas Piggin @ 2021-01-24 12:22 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arch, x86, Thomas Gleixner, Will Deacon, Catalin Marinas,
Ding Tianhong, linux-kernel, linux-mm, Zefan Li, Borislav Petkov,
Jonathan Cameron, H. Peter Anvin, Andrew Morton, Rick Edgecombe,
linuxppc-dev, Ingo Molnar, linux-arm-kernel
In-Reply-To: <20210124114008.GE694255@infradead.org>
Excerpts from Christoph Hellwig's message of January 24, 2021 9:40 pm:
>> diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
>> index 2ca708ab9b20..597b40405319 100644
>> --- a/arch/arm64/include/asm/vmalloc.h
>> +++ b/arch/arm64/include/asm/vmalloc.h
>> @@ -1,4 +1,12 @@
>> #ifndef _ASM_ARM64_VMALLOC_H
>> #define _ASM_ARM64_VMALLOC_H
>>
>> +#include <asm/page.h>
>> +
>> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
>> +bool arch_vmap_p4d_supported(pgprot_t prot);
>> +bool arch_vmap_pud_supported(pgprot_t prot);
>> +bool arch_vmap_pmd_supported(pgprot_t prot);
>> +#endif
>
> Shouldn't the be inlines or macros? Also it would be useful
> if the architectures would not have to override all functions
> but just those that are it actually implements?
It gets better in the next patches. I did it this way again to avoid
moving a lot of code at the same time as changing name / prototype
slightly.
I didn't see individual generic fallbacks being all that useful really
at this scale. I don't mind keeping the explicit false.
> Also lots of > 80 char lines in the patch.
Yeah there's a few, I can reduce those.
Thanks,
Nick
^ permalink raw reply
* Re: [PATCH v10 09/12] mm: Move vmap_range from mm/ioremap.c to mm/vmalloc.c
From: Christoph Hellwig @ 2021-01-24 14:49 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Jonathan Cameron, Andrew Morton,
Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124082230.2118861-10-npiggin@gmail.com>
On Sun, Jan 24, 2021 at 06:22:27PM +1000, Nicholas Piggin wrote:
> This is a generic kernel virtual memory mapper, not specific to ioremap.
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
Although it would be nice if you could fix up the > 80 lines while
you're at it.
^ permalink raw reply
* Re: [PATCH v10 10/12] mm/vmalloc: add vmap_range_noflush variant
From: Christoph Hellwig @ 2021-01-24 14:51 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Jonathan Cameron, Andrew Morton,
Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124082230.2118861-11-npiggin@gmail.com>
On Sun, Jan 24, 2021 at 06:22:28PM +1000, Nicholas Piggin wrote:
> As a side-effect, the order of flush_cache_vmap() and
> arch_sync_kernel_mappings() calls are switched, but that now matches
> the other callers in this file.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply
* Re: [PATCH v10 11/12] mm/vmalloc: Hugepage vmalloc mappings
From: Christoph Hellwig @ 2021-01-24 15:07 UTC (permalink / raw)
To: Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, Christoph Hellwig,
linux-mm, Zefan Li, Jonathan Cameron, Andrew Morton,
Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124082230.2118861-12-npiggin@gmail.com>
On Sun, Jan 24, 2021 at 06:22:29PM +1000, Nicholas Piggin wrote:
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 24862d15f3a3..f87feb616184 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -724,6 +724,16 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
> config HAVE_ARCH_HUGE_VMAP
> bool
>
> +config HAVE_ARCH_HUGE_VMALLOC
> + depends on HAVE_ARCH_HUGE_VMAP
> + bool
> + help
> + Archs that select this would be capable of PMD-sized vmaps (i.e.,
> + arch_vmap_pmd_supported() returns true), and they must make no
> + assumptions that vmalloc memory is mapped with PAGE_SIZE ptes. The
> + VM_NOHUGE flag can be used to prohibit arch-specific allocations from
> + using hugepages to help with this (e.g., modules may require it).
help texts don't make sense for options that aren't user visible.
More importantly, is there any good reason to keep the option and not
just go the extra step and enable huge page vmalloc for arm64 and x86
as well?
> +static inline bool is_vm_area_hugepages(const void *addr)
> +{
> + /*
> + * This may not 100% tell if the area is mapped with > PAGE_SIZE
> + * page table entries, if for some reason the architecture indicates
> + * larger sizes are available but decides not to use them, nothing
> + * prevents that. This only indicates the size of the physical page
> + * allocated in the vmalloc layer.
> + */
> + return (find_vm_area(addr)->page_order > 0);
No need for the braces here.
> }
>
> +static int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
> + pgprot_t prot, struct page **pages, unsigned int page_shift)
> +{
> + unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
> +
> + WARN_ON(page_shift < PAGE_SHIFT);
> +
> + if (page_shift == PAGE_SHIFT)
> + return vmap_small_pages_range_noflush(addr, end, prot, pages);
This begs for a IS_ENABLED check to disable the hugepage code for
architectures that don't need it.
> +int map_kernel_range_noflush(unsigned long addr, unsigned long size,
> + pgprot_t prot, struct page **pages)
> +{
> + return vmap_pages_range_noflush(addr, addr + size, prot, pages, PAGE_SHIFT);
> +}
Please just kill off map_kernel_range_noflush and map_kernel_range
off entirely in favor of the vmap versions.
> + for (i = 0; i < area->nr_pages; i += 1U << area->page_order) {
Maybe using a helper that takes the vm_area_struct and either returns
area->page_order or always 0 based on IS_ENABLED?
^ permalink raw reply
* Re: [PATCH v10 11/12] mm/vmalloc: Hugepage vmalloc mappings
From: Randy Dunlap @ 2021-01-24 18:06 UTC (permalink / raw)
To: Christoph Hellwig, Nicholas Piggin
Cc: linux-arch, Ding Tianhong, linux-kernel, linux-mm, Zefan Li,
Jonathan Cameron, Andrew Morton, Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210124150729.GC733865@infradead.org>
On 1/24/21 7:07 AM, Christoph Hellwig wrote:
>> +config HAVE_ARCH_HUGE_VMALLOC
>> + depends on HAVE_ARCH_HUGE_VMAP
>> + bool
>> + help
>> + Archs that select this would be capable of PMD-sized vmaps (i.e.,
>> + arch_vmap_pmd_supported() returns true), and they must make no
>> + assumptions that vmalloc memory is mapped with PAGE_SIZE ptes. The
>> + VM_NOHUGE flag can be used to prohibit arch-specific allocations from
>> + using hugepages to help with this (e.g., modules may require it).
> help texts don't make sense for options that aren't user visible.
It's good that the Kconfig symbol is documented and it's better here
than having to dig thru git commit logs IMO.
It could be done as "# Arhcs that select" style comments instead
of Kconfig help text.
--
~Randy
^ permalink raw reply
* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.11-5 tag
From: pr-tracker-bot @ 2021-01-24 18:34 UTC (permalink / raw)
To: Michael Ellerman
Cc: linuxppc-dev, Linus Torvalds, linux-kernel, npiggin, sandipan
In-Reply-To: <87tur6pm2v.fsf@mpe.ellerman.id.au>
The pull request you sent on Sun, 24 Jan 2021 23:15:52 +1100:
> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.11-5
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/14c50a66183856672d822f25dbb73ad26d1e8f11
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply
* Re: [PATCH v4 2/2] powerpc/mce: Remove per cpu variables from MCE handlers
From: kernel test robot @ 2021-01-24 19:45 UTC (permalink / raw)
To: Ganesh Goudar, linuxppc-dev, mpe
Cc: clang-built-linux, Ganesh Goudar, kbuild-all, mahesh, npiggin
In-Reply-To: <20210122123244.34033-2-ganeshgr@linux.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 5011 bytes --]
Hi Ganesh,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.11-rc4 next-20210122]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Ganesh-Goudar/powerpc-mce-Reduce-the-size-of-event-arrays/20210124-191230
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r005-20210124 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project bd3a387ee76f58caa0d7901f3f84e9bb3d006f27)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc cross compiling tool for clang build
# apt-get install binutils-powerpc-linux-gnu
# https://github.com/0day-ci/linux/commit/fab6401db419da33d1757ebf519f030ab758ae7a
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Ganesh-Goudar/powerpc-mce-Reduce-the-size-of-event-arrays/20210124-191230
git checkout fab6401db419da33d1757ebf519f030ab758ae7a
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> arch/powerpc/kernel/setup-common.c:940:2: error: implicit declaration of function 'mce_init' [-Werror,-Wimplicit-function-declaration]
mce_init();
^
1 error generated.
vim +/mce_init +940 arch/powerpc/kernel/setup-common.c
847
848 /*
849 * Called into from start_kernel this initializes memblock, which is used
850 * to manage page allocation until mem_init is called.
851 */
852 void __init setup_arch(char **cmdline_p)
853 {
854 kasan_init();
855
856 *cmdline_p = boot_command_line;
857
858 /* Set a half-reasonable default so udelay does something sensible */
859 loops_per_jiffy = 500000000 / HZ;
860
861 /* Unflatten the device-tree passed by prom_init or kexec */
862 unflatten_device_tree();
863
864 /*
865 * Initialize cache line/block info from device-tree (on ppc64) or
866 * just cputable (on ppc32).
867 */
868 initialize_cache_info();
869
870 /* Initialize RTAS if available. */
871 rtas_initialize();
872
873 /* Check if we have an initrd provided via the device-tree. */
874 check_for_initrd();
875
876 /* Probe the machine type, establish ppc_md. */
877 probe_machine();
878
879 /* Setup panic notifier if requested by the platform. */
880 setup_panic();
881
882 /*
883 * Configure ppc_md.power_save (ppc32 only, 64-bit machines do
884 * it from their respective probe() function.
885 */
886 setup_power_save();
887
888 /* Discover standard serial ports. */
889 find_legacy_serial_ports();
890
891 /* Register early console with the printk subsystem. */
892 register_early_udbg_console();
893
894 /* Setup the various CPU maps based on the device-tree. */
895 smp_setup_cpu_maps();
896
897 /* Initialize xmon. */
898 xmon_setup();
899
900 /* Check the SMT related command line arguments (ppc64). */
901 check_smt_enabled();
902
903 /* Parse memory topology */
904 mem_topology_setup();
905
906 /*
907 * Release secondary cpus out of their spinloops at 0x60 now that
908 * we can map physical -> logical CPU ids.
909 *
910 * Freescale Book3e parts spin in a loop provided by firmware,
911 * so smp_release_cpus() does nothing for them.
912 */
913 #ifdef CONFIG_SMP
914 smp_setup_pacas();
915
916 /* On BookE, setup per-core TLB data structures. */
917 setup_tlb_core_data();
918 #endif
919 /* Print various info about the machine that has been gathered so far. */
920 print_system_info();
921
922 /* Reserve large chunks of memory for use by CMA for KVM. */
923 kvm_cma_reserve();
924
925 /* Reserve large chunks of memory for us by CMA for hugetlb */
926 gigantic_hugetlb_cma_reserve();
927
928 klp_init_thread_info(&init_task);
929
930 init_mm.start_code = (unsigned long)_stext;
931 init_mm.end_code = (unsigned long) _etext;
932 init_mm.end_data = (unsigned long) _edata;
933 init_mm.brk = klimit;
934
935 mm_iommu_init(&init_mm);
936 irqstack_early_init();
937 exc_lvl_early_init();
938 emergency_stack_init();
939
> 940 mce_init();
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 25713 bytes --]
^ permalink raw reply
* [PATCH] powerpc: Update broken clean & mrproper
From: Andrew Delgadillo @ 2021-01-24 22:30 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
linuxppc-dev
Cc: Andrew Delgadillo
commit cc8a51ca6f05 ("kbuild: always create directories of targets") breaks
mrproper on ppc. arch/powerpc/boot/arch/ is not cleaned up when running
mrproper. Before this patch:
$ make ARCH=powerpc
$ make ARCH=powerpc mrproper
$ git clean -ndxf
Would remove arch/powerpc/boot/arch
After this patch, the directory reported by git clean is no longer
reported.
Fixes: cc8a51ca6f05 ("kbuild: always create directories of targets")
Signed-off-by: Andrew Delgadillo <adelg@google.com>
---
arch/powerpc/boot/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 2b8da923ceca..3cc762cce1a4 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -454,7 +454,7 @@ PHONY += install zInstall
clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \
zImage.miboot zImage.pmac zImage.pseries \
- zImage.maple simpleImage.* otheros.bld
+ zImage.maple simpleImage.* otheros.bld arch
# clean up files cached by wrapper
clean-kernel-base := vmlinux.strip vmlinux.bin
--
2.30.0.280.ga3ce27912f-goog
^ permalink raw reply related
* Re: [PATCH v10 11/12] mm/vmalloc: Hugepage vmalloc mappings
From: Nicholas Piggin @ 2021-01-24 23:17 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arch, Randy Dunlap, Ding Tianhong, linux-kernel, linux-mm,
Zefan Li, Jonathan Cameron, Andrew Morton, Rick Edgecombe,
linuxppc-dev
In-Reply-To: <20210124150729.GC733865@infradead.org>
Excerpts from Christoph Hellwig's message of January 25, 2021 1:07 am:
> On Sun, Jan 24, 2021 at 06:22:29PM +1000, Nicholas Piggin wrote:
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index 24862d15f3a3..f87feb616184 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -724,6 +724,16 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
>> config HAVE_ARCH_HUGE_VMAP
>> bool
>>
>> +config HAVE_ARCH_HUGE_VMALLOC
>> + depends on HAVE_ARCH_HUGE_VMAP
>> + bool
>> + help
>> + Archs that select this would be capable of PMD-sized vmaps (i.e.,
>> + arch_vmap_pmd_supported() returns true), and they must make no
>> + assumptions that vmalloc memory is mapped with PAGE_SIZE ptes. The
>> + VM_NOHUGE flag can be used to prohibit arch-specific allocations from
>> + using hugepages to help with this (e.g., modules may require it).
>
> help texts don't make sense for options that aren't user visible.
Yeah it was supposed to just be a comment but if it was user visible
then similar kind of thing would not make sense in help text, so I'll
just turn it into a real comment as per Randy's suggestion.
> More importantly, is there any good reason to keep the option and not
> just go the extra step and enable huge page vmalloc for arm64 and x86
> as well?
Yes they need to ensure they exclude vmallocs that can't be huge one
way or another (VM_ flag or prot arg).
After they're converted we can fold it into HUGE_VMAP.
>> +static inline bool is_vm_area_hugepages(const void *addr)
>> +{
>> + /*
>> + * This may not 100% tell if the area is mapped with > PAGE_SIZE
>> + * page table entries, if for some reason the architecture indicates
>> + * larger sizes are available but decides not to use them, nothing
>> + * prevents that. This only indicates the size of the physical page
>> + * allocated in the vmalloc layer.
>> + */
>> + return (find_vm_area(addr)->page_order > 0);
>
> No need for the braces here.
>
>> }
>>
>> +static int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
>> + pgprot_t prot, struct page **pages, unsigned int page_shift)
>> +{
>> + unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
>> +
>> + WARN_ON(page_shift < PAGE_SHIFT);
>> +
>> + if (page_shift == PAGE_SHIFT)
>> + return vmap_small_pages_range_noflush(addr, end, prot, pages);
>
> This begs for a IS_ENABLED check to disable the hugepage code for
> architectures that don't need it.
Yeah good point.
>> +int map_kernel_range_noflush(unsigned long addr, unsigned long size,
>> + pgprot_t prot, struct page **pages)
>> +{
>> + return vmap_pages_range_noflush(addr, addr + size, prot, pages, PAGE_SHIFT);
>> +}
>
> Please just kill off map_kernel_range_noflush and map_kernel_range
> off entirely in favor of the vmap versions.
I can do a cleanup patch on top of it.
>> + for (i = 0; i < area->nr_pages; i += 1U << area->page_order) {
>
> Maybe using a helper that takes the vm_area_struct and either returns
> area->page_order or always 0 based on IS_ENABLED?
I'll see how it looks.
Thanks,
Nick
^ permalink raw reply
* [powerpc:fixes-test] BUILD SUCCESS 4025c784c573cab7e3f84746cc82b8033923ec62
From: kernel test robot @ 2021-01-25 3:57 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: 4025c784c573cab7e3f84746cc82b8033923ec62 powerpc/64s: prevent recursive replay_soft_interrupts causing superfluous interrupt
elapsed time: 956m
configs tested: 147
configs skipped: 2
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
mips tb0287_defconfig
mips mpc30x_defconfig
arm h5000_defconfig
sh rsk7264_defconfig
powerpc linkstation_defconfig
arm pxa255-idp_defconfig
arm am200epdkit_defconfig
mips pistachio_defconfig
xtensa cadence_csp_defconfig
arm aspeed_g5_defconfig
powerpc mpc832x_mds_defconfig
arm pcm027_defconfig
mips qi_lb60_defconfig
mips decstation_64_defconfig
powerpc chrp32_defconfig
arm mxs_defconfig
mips cu1000-neo_defconfig
powerpc tqm8560_defconfig
powerpc64 alldefconfig
sh sh7757lcr_defconfig
sh kfr2r09_defconfig
arm cns3420vb_defconfig
powerpc ppa8548_defconfig
m68k multi_defconfig
sh rts7751r2d1_defconfig
mips tb0219_defconfig
mips ip27_defconfig
m68k apollo_defconfig
arc nsimosci_defconfig
powerpc mpc885_ads_defconfig
s390 debug_defconfig
arm iop32x_defconfig
arm tango4_defconfig
mips nlm_xlr_defconfig
arm pxa3xx_defconfig
arm hackkit_defconfig
sh shmin_defconfig
powerpc mpc512x_defconfig
arm integrator_defconfig
arm cm_x300_defconfig
powerpc mpc8540_ads_defconfig
sh r7785rp_defconfig
arm sunxi_defconfig
h8300 h8s-sim_defconfig
powerpc mgcoge_defconfig
sh sh7710voipgw_defconfig
arm imote2_defconfig
mips loongson1b_defconfig
arm dove_defconfig
arm mps2_defconfig
sh rts7751r2dplus_defconfig
mips workpad_defconfig
powerpc walnut_defconfig
arm sama5_defconfig
mips ath79_defconfig
sh se7751_defconfig
mips bigsur_defconfig
csky alldefconfig
arm pxa168_defconfig
ia64 allmodconfig
ia64 defconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
m68k allyesconfig
nios2 defconfig
arc allyesconfig
nds32 allnoconfig
c6x allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
s390 defconfig
i386 allyesconfig
sparc allyesconfig
sparc defconfig
i386 tinyconfig
i386 defconfig
mips allmodconfig
mips allyesconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc allnoconfig
i386 randconfig-a001-20210124
i386 randconfig-a002-20210124
i386 randconfig-a003-20210124
i386 randconfig-a004-20210124
i386 randconfig-a006-20210124
i386 randconfig-a005-20210124
i386 randconfig-a001-20210125
i386 randconfig-a002-20210125
i386 randconfig-a004-20210125
i386 randconfig-a006-20210125
i386 randconfig-a005-20210125
i386 randconfig-a003-20210125
x86_64 randconfig-a012-20210124
x86_64 randconfig-a016-20210124
x86_64 randconfig-a015-20210124
x86_64 randconfig-a011-20210124
x86_64 randconfig-a013-20210124
x86_64 randconfig-a014-20210124
i386 randconfig-a013-20210124
i386 randconfig-a011-20210124
i386 randconfig-a012-20210124
i386 randconfig-a015-20210124
i386 randconfig-a014-20210124
i386 randconfig-a016-20210124
x86_64 randconfig-a003-20210125
x86_64 randconfig-a002-20210125
x86_64 randconfig-a001-20210125
x86_64 randconfig-a005-20210125
x86_64 randconfig-a006-20210125
x86_64 randconfig-a004-20210125
riscv nommu_k210_defconfig
riscv allyesconfig
riscv nommu_virt_defconfig
riscv allnoconfig
riscv defconfig
riscv rv32_defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 defconfig
x86_64 rhel-8.3
x86_64 rhel-8.3-kbuiltin
x86_64 kexec
clang tested configs:
x86_64 randconfig-a003-20210124
x86_64 randconfig-a002-20210124
x86_64 randconfig-a001-20210124
x86_64 randconfig-a005-20210124
x86_64 randconfig-a006-20210124
x86_64 randconfig-a004-20210124
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:merge] BUILD SUCCESS 44158b256b30415079588d0fcb1bccbdc2ccd009
From: kernel test robot @ 2021-01-25 3:58 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: 44158b256b30415079588d0fcb1bccbdc2ccd009 Automatic merge of 'fixes' into merge (2021-01-24 09:52)
elapsed time: 954m
configs tested: 140
configs skipped: 2
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
mips tb0287_defconfig
mips mpc30x_defconfig
arm h5000_defconfig
sh rsk7264_defconfig
powerpc linkstation_defconfig
arm pxa255-idp_defconfig
arm am200epdkit_defconfig
mips pistachio_defconfig
xtensa cadence_csp_defconfig
powerpc chrp32_defconfig
arm mxs_defconfig
mips cu1000-neo_defconfig
powerpc tqm8560_defconfig
powerpc64 alldefconfig
sh sh7757lcr_defconfig
sh kfr2r09_defconfig
arm cns3420vb_defconfig
powerpc ppa8548_defconfig
m68k multi_defconfig
sh rts7751r2d1_defconfig
mips tb0219_defconfig
mips ip27_defconfig
m68k apollo_defconfig
arc nsimosci_defconfig
powerpc mpc885_ads_defconfig
s390 debug_defconfig
arm iop32x_defconfig
arm tango4_defconfig
mips nlm_xlr_defconfig
arm pxa3xx_defconfig
arm hackkit_defconfig
arm pcm027_defconfig
sh shmin_defconfig
powerpc mpc512x_defconfig
arm integrator_defconfig
h8300 h8s-sim_defconfig
powerpc mgcoge_defconfig
arm aspeed_g5_defconfig
sh sh7710voipgw_defconfig
arm imote2_defconfig
mips loongson1b_defconfig
arm dove_defconfig
arm mps2_defconfig
sh rts7751r2dplus_defconfig
mips workpad_defconfig
powerpc walnut_defconfig
arm sama5_defconfig
mips ath79_defconfig
sh se7751_defconfig
mips bigsur_defconfig
csky alldefconfig
arm pxa168_defconfig
ia64 allmodconfig
ia64 defconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
m68k allyesconfig
nios2 defconfig
arc allyesconfig
nds32 allnoconfig
c6x allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
s390 defconfig
i386 allyesconfig
sparc allyesconfig
sparc defconfig
i386 tinyconfig
i386 defconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc allnoconfig
i386 randconfig-a001-20210124
i386 randconfig-a002-20210124
i386 randconfig-a003-20210124
i386 randconfig-a001-20210125
i386 randconfig-a002-20210125
i386 randconfig-a004-20210125
i386 randconfig-a006-20210125
i386 randconfig-a005-20210125
i386 randconfig-a003-20210125
i386 randconfig-a004-20210124
i386 randconfig-a006-20210124
i386 randconfig-a005-20210124
x86_64 randconfig-a012-20210124
x86_64 randconfig-a016-20210124
x86_64 randconfig-a015-20210124
x86_64 randconfig-a011-20210124
x86_64 randconfig-a013-20210124
x86_64 randconfig-a014-20210124
i386 randconfig-a013-20210124
i386 randconfig-a011-20210124
i386 randconfig-a012-20210124
i386 randconfig-a015-20210124
i386 randconfig-a014-20210124
i386 randconfig-a016-20210124
x86_64 randconfig-a003-20210125
x86_64 randconfig-a002-20210125
x86_64 randconfig-a001-20210125
x86_64 randconfig-a005-20210125
x86_64 randconfig-a006-20210125
x86_64 randconfig-a004-20210125
riscv nommu_k210_defconfig
riscv allyesconfig
riscv nommu_virt_defconfig
riscv allnoconfig
riscv defconfig
riscv rv32_defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 defconfig
x86_64 rhel-8.3
x86_64 rhel-8.3-kbuiltin
x86_64 kexec
clang tested configs:
x86_64 randconfig-a003-20210124
x86_64 randconfig-a002-20210124
x86_64 randconfig-a001-20210124
x86_64 randconfig-a005-20210124
x86_64 randconfig-a006-20210124
x86_64 randconfig-a004-20210124
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* Re: [PATCH] lib/sstep: Fix incorrect return from analyze_instr()
From: Ananth N Mavinakayanahalli @ 2021-01-25 4:52 UTC (permalink / raw)
To: Michael Ellerman, linuxppc-dev
Cc: naveen.n.rao, ravi.bangoria, paulus, sandipan
In-Reply-To: <87zh10pk50.fsf@mpe.ellerman.id.au>
On 1/23/21 6:03 AM, Michael Ellerman wrote:
> Ananth N Mavinakayanahalli <ananth@linux.ibm.com> writes:
>> We currently just percolate the return value from analyze_instr()
>> to the caller of emulate_step(), especially if it is a -1.
>>
>> For one particular case (opcode = 4) for instructions that
>> aren't currently emulated, we are returning 'should not be
>> single-stepped' while we should have returned 0 which says
>> 'did not emulate, may have to single-step'.
>>
>> Signed-off-by: Ananth N Mavinakayanahalli <ananth@linux.ibm.com>
>> Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
>> ---
>> arch/powerpc/lib/sstep.c | 49 +++++++++++++++++++++++++---------------------
>> 1 file changed, 27 insertions(+), 22 deletions(-)
>>
>> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
>> index 5a425a4a1d88..a3a0373843cd 100644
>> --- a/arch/powerpc/lib/sstep.c
>> +++ b/arch/powerpc/lib/sstep.c
>> @@ -1445,34 +1445,39 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
>>
>> #ifdef __powerpc64__
>> case 4:
>> - if (!cpu_has_feature(CPU_FTR_ARCH_300))
>> - return -1;
>> -
>> - switch (word & 0x3f) {
>> - case 48: /* maddhd */
>> - asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
>> - "=r" (op->val) : "r" (regs->gpr[ra]),
>> - "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
>> - goto compute_done;
>> + /*
>> + * There are very many instructions with this primary opcode
>> + * introduced in the ISA as early as v2.03. However, the ones
>> + * we currently emulate were all introduced with ISA 3.0
>> + */
>> + if (cpu_has_feature(CPU_FTR_ARCH_300)) {
>> + switch (word & 0x3f) {
>> + case 48: /* maddhd */
>> + asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
>> + "=r" (op->val) : "r" (regs->gpr[ra]),
>> + "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
>> + goto compute_done;
>
> Indenting everything makes this patch harder to read, and I think makes
> the resulting code harder to read too. We already have two levels of
> switch here, and we're inside a ~1700 line function, so keeping things
> simple is important I think.
>
> Doesn't this achieve the same result?
>
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index bf7a7d62ae8b..d631baaf1da2 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -1443,8 +1443,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
>
> #ifdef __powerpc64__
> case 4:
> - if (!cpu_has_feature(CPU_FTR_ARCH_300))
> - return -1;
> + if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
> + op->type = UNKNOWN;
> + return 0;
> + }
>
> switch (word & 0x3f) {
> case 48: /* maddhd */
> @@ -1470,7 +1472,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
> * There are other instructions from ISA 3.0 with the same
> * primary opcode which do not have emulation support yet.
> */
> - return -1;
> + op->type = UNKNOWN;
> + return 0;
> #endif
>
> case 7: /* mulli */
>
Looks good to me.
Acked-by: Ananth N Mavinakayanahalli <ananth@linux.ibm.com>
--
Ananth
^ permalink raw reply
* Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Jon Masters @ 2021-01-25 5:26 UTC (permalink / raw)
To: Florian Fainelli, Konrad Rzeszutek Wilk, Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, will,
Christoph Hellwig, Marek Szyprowski, sstabellini, Saravana Kannan,
Frank Rowand, Joerg Roedel, rafael.j.wysocki, mingo,
Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
dan.j.williams, Robin Murphy, Rob Herring, boris.ostrovsky,
Andy Shevchenko, jgross, Nicolas Boichat, Greg KH, rdunlap, lkml,
Tomasz Figa, iommu, xypron.glpk, linuxppc-dev, bauerman
In-Reply-To: <aa5af7d1-779e-f0f6-e6ba-8040e603523f@gmail.com>
On 1/7/21 1:09 PM, Florian Fainelli wrote:
> On 1/7/21 9:57 AM, Konrad Rzeszutek Wilk wrote:
>> On Fri, Jan 08, 2021 at 01:39:18AM +0800, Claire Chang wrote:
>>> Hi Greg and Konrad,
>>>
>>> This change is intended to be non-arch specific. Any arch that lacks DMA access
>>> control and has devices not behind an IOMMU can make use of it. Could you share
>>> why you think this should be arch specific?
>>
>> The idea behind non-arch specific code is it to be generic. The devicetree
>> is specific to PowerPC, Sparc, and ARM, and not to x86 - hence it should
>> be in arch specific code.
>
> In premise the same code could be used with an ACPI enabled system with
> an appropriate service to identify the restricted DMA regions and unlock
> them.
>
> More than 1 architecture requiring this function (ARM and ARM64 are the
> two I can think of needing this immediately) sort of calls for making
> the code architecture agnostic since past 2, you need something that scales.
>
> There is already code today under kernel/dma/contiguous.c that is only
> activated on a CONFIG_OF=y && CONFIG_OF_RESERVED_MEM=y system, this is
> no different.
<unrelated to these patches, which are useful for the case cited>
Just a note for history/archives that this approach would not be
appropriate on general purpose Arm systems, such as SystemReady-ES
edge/non-server platforms seeking to run general purpose distros. I want
to have that in the record before someone at Arm (or NVidia, or a bunch
of others that come to mind who have memory firewalls) gets an idea.
If you're working at an Arm vendor and come looking at this later
thinking "wow, what a great idea!", please fix your hardware to have a
real IOMMU/SMMU and real PCIe. You'll be pointed at this reply.
Jon.
--
Computer Architect
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox