* [RFC 1/2] mm/sparsemem: Add vmem_altmap support in vmemmap_populate_basepages()
2019-06-28 4:44 [RFC 0/2] arm64: Enable vmemmap from device memory Anshuman Khandual
@ 2019-06-28 4:44 ` Anshuman Khandual
2019-07-31 16:10 ` Will Deacon
2019-06-28 4:44 ` [RFC 2/2] arm64/mm: Enable device memory allocation and free for vmemmap mapping Anshuman Khandual
1 sibling, 1 reply; 7+ messages in thread
From: Anshuman Khandual @ 2019-06-28 4:44 UTC (permalink / raw)
To: linux-mm
Cc: Anshuman Khandual, Catalin Marinas, Will Deacon, Tony Luck,
Fenghua Yu, Dave Hansen, Andy Lutomirski, Andrew Morton,
linux-arm-kernel, linux-ia64, x86, linux-kernel
Generic vmemmap_populate_basepages() is used across platforms for vmemmap
as standard or as fallback when huge pages mapping fails. On arm64 it is
used for configs with ARM64_SWAPPER_USES_SECTION_MAPS applicable both for
ARM64_16K_PAGES and ARM64_64K_PAGES which cannot use huge pages because of
alignment requirements.
This prevents those configs from allocating from device memory for vmemap
mapping as vmemmap_populate_basepages() does not support vmem_altmap. This
enables that required support. Each architecture should evaluate and decide
on enabling device based base page allocation when appropriate. Hence this
keeps it disabled for all architectures to preserve the existing semantics.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-ia64@vger.kernel.org
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
arch/arm64/mm/mmu.c | 2 +-
arch/ia64/mm/discontig.c | 2 +-
arch/x86/mm/init_64.c | 4 ++--
include/linux/mm.h | 5 +++--
mm/sparse-vmemmap.c | 16 +++++++++++-----
5 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 194c84e..39e18d1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -982,7 +982,7 @@ static void remove_pagetable(unsigned long start, unsigned long end,
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- return vmemmap_populate_basepages(start, end, node);
+ return vmemmap_populate_basepages(start, end, node, NULL);
}
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 05490dd..faefd7e 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -660,7 +660,7 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- return vmemmap_populate_basepages(start, end, node);
+ return vmemmap_populate_basepages(start, end, node, NULL);
}
void vmemmap_free(unsigned long start, unsigned long end,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8335ac6..c67ad5d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1509,7 +1509,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
vmemmap_verify((pte_t *)pmd, node, addr, next);
continue;
}
- if (vmemmap_populate_basepages(addr, next, node))
+ if (vmemmap_populate_basepages(addr, next, node, NULL))
return -ENOMEM;
}
return 0;
@@ -1527,7 +1527,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
__func__);
err = -ENOMEM;
} else
- err = vmemmap_populate_basepages(start, end, node);
+ err = vmemmap_populate_basepages(start, end, node, NULL);
if (!err)
sync_global_pgds(start, end - 1);
return err;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c6ae9eb..dda9bd4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2758,14 +2758,15 @@ pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
-pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
+pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
+ struct vmem_altmap *altmap);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node);
void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
- int node);
+ int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 7fec057..d333b75 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -140,12 +140,18 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
start, end - 1);
}
-pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
+pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
+ struct vmem_altmap *altmap)
{
pte_t *pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte)) {
pte_t entry;
- void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
+ void *p;
+
+ if (altmap)
+ p = altmap_alloc_block_buf(PAGE_SIZE, altmap);
+ else
+ p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
if (!p)
return NULL;
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -213,8 +219,8 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
return pgd;
}
-int __meminit vmemmap_populate_basepages(unsigned long start,
- unsigned long end, int node)
+int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap)
{
unsigned long addr = start;
pgd_t *pgd;
@@ -236,7 +242,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
pmd = vmemmap_pmd_populate(pud, addr, node);
if (!pmd)
return -ENOMEM;
- pte = vmemmap_pte_populate(pmd, addr, node);
+ pte = vmemmap_pte_populate(pmd, addr, node, altmap);
if (!pte)
return -ENOMEM;
vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC 2/2] arm64/mm: Enable device memory allocation and free for vmemmap mapping
2019-06-28 4:44 [RFC 0/2] arm64: Enable vmemmap from device memory Anshuman Khandual
2019-06-28 4:44 ` [RFC 1/2] mm/sparsemem: Add vmem_altmap support in vmemmap_populate_basepages() Anshuman Khandual
@ 2019-06-28 4:44 ` Anshuman Khandual
2019-07-31 16:11 ` Will Deacon
1 sibling, 1 reply; 7+ messages in thread
From: Anshuman Khandual @ 2019-06-28 4:44 UTC (permalink / raw)
To: linux-mm
Cc: Anshuman Khandual, Catalin Marinas, Will Deacon, Mark Rutland,
linux-arm-kernel, linux-kernel
This enables vmemmap_populate() and vmemmap_free() functions to incorporate
struct vmem_altmap based device memory allocation and free requests. With
this device memory with specific atlmap configuration can be hot plugged
and hot removed as ZONE_DEVICE memory on arm64 platforms.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
arch/arm64/mm/mmu.c | 57 ++++++++++++++++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 39e18d1..8867bbd 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -735,15 +735,26 @@ int kern_addr_valid(unsigned long addr)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-static void free_hotplug_page_range(struct page *page, size_t size)
+static void free_hotplug_page_range(struct page *page, size_t size,
+ struct vmem_altmap *altmap)
{
- WARN_ON(!page || PageReserved(page));
- free_pages((unsigned long)page_address(page), get_order(size));
+ if (altmap) {
+ /*
+ * vmemmap_populate() creates vmemmap mapping either at pte
+ * or pmd level. Unmapping request at any other level would
+ * be a problem.
+ */
+ WARN_ON((size != PAGE_SIZE) && (size != PMD_SIZE));
+ vmem_altmap_free(altmap, size >> PAGE_SHIFT);
+ } else {
+ WARN_ON(!page || PageReserved(page));
+ free_pages((unsigned long)page_address(page), get_order(size));
+ }
}
static void free_hotplug_pgtable_page(struct page *page)
{
- free_hotplug_page_range(page, PAGE_SIZE);
+ free_hotplug_page_range(page, PAGE_SIZE, NULL);
}
static void free_pte_table(pmd_t *pmdp, unsigned long addr)
@@ -807,7 +818,8 @@ static void free_pud_table(pgd_t *pgdp, unsigned long addr)
}
static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
- unsigned long end, bool sparse_vmap)
+ unsigned long end, bool sparse_vmap,
+ struct vmem_altmap *altmap)
{
struct page *page;
pte_t *ptep, pte;
@@ -823,12 +835,13 @@ static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
pte_clear(&init_mm, addr, ptep);
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
if (sparse_vmap)
- free_hotplug_page_range(page, PAGE_SIZE);
+ free_hotplug_page_range(page, PAGE_SIZE, altmap);
} while (addr += PAGE_SIZE, addr < end);
}
static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
- unsigned long end, bool sparse_vmap)
+ unsigned long end, bool sparse_vmap,
+ struct vmem_altmap *altmap)
{
unsigned long next;
struct page *page;
@@ -847,16 +860,17 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
pmd_clear(pmdp);
flush_tlb_kernel_range(addr, next);
if (sparse_vmap)
- free_hotplug_page_range(page, PMD_SIZE);
+ free_hotplug_page_range(page, PMD_SIZE, altmap);
continue;
}
WARN_ON(!pmd_table(pmd));
- unmap_hotplug_pte_range(pmdp, addr, next, sparse_vmap);
+ unmap_hotplug_pte_range(pmdp, addr, next, sparse_vmap, altmap);
} while (addr = next, addr < end);
}
static void unmap_hotplug_pud_range(pgd_t *pgdp, unsigned long addr,
- unsigned long end, bool sparse_vmap)
+ unsigned long end, bool sparse_vmap,
+ struct vmem_altmap *altmap)
{
unsigned long next;
struct page *page;
@@ -875,16 +889,16 @@ static void unmap_hotplug_pud_range(pgd_t *pgdp, unsigned long addr,
pud_clear(pudp);
flush_tlb_kernel_range(addr, next);
if (sparse_vmap)
- free_hotplug_page_range(page, PUD_SIZE);
+ free_hotplug_page_range(page, PUD_SIZE, altmap);
continue;
}
WARN_ON(!pud_table(pud));
- unmap_hotplug_pmd_range(pudp, addr, next, sparse_vmap);
+ unmap_hotplug_pmd_range(pudp, addr, next, sparse_vmap, altmap);
} while (addr = next, addr < end);
}
static void unmap_hotplug_range(unsigned long addr, unsigned long end,
- bool sparse_vmap)
+ bool sparse_vmap, struct vmem_altmap *altmap)
{
unsigned long next;
pgd_t *pgdp, pgd;
@@ -897,7 +911,7 @@ static void unmap_hotplug_range(unsigned long addr, unsigned long end,
continue;
WARN_ON(!pgd_present(pgd));
- unmap_hotplug_pud_range(pgdp, addr, next, sparse_vmap);
+ unmap_hotplug_pud_range(pgdp, addr, next, sparse_vmap, altmap);
} while (addr = next, addr < end);
}
@@ -970,9 +984,9 @@ static void free_empty_tables(unsigned long addr, unsigned long end)
}
static void remove_pagetable(unsigned long start, unsigned long end,
- bool sparse_vmap)
+ bool sparse_vmap, struct vmem_altmap *altmap)
{
- unmap_hotplug_range(start, end, sparse_vmap);
+ unmap_hotplug_range(start, end, sparse_vmap, altmap);
free_empty_tables(start, end);
}
#endif
@@ -982,7 +996,7 @@ static void remove_pagetable(unsigned long start, unsigned long end,
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- return vmemmap_populate_basepages(start, end, node, NULL);
+ return vmemmap_populate_basepages(start, end, node, altmap);
}
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
@@ -1009,7 +1023,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
if (pmd_none(READ_ONCE(*pmdp))) {
void *p = NULL;
- p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+ if (altmap)
+ p = altmap_alloc_block_buf(PMD_SIZE, altmap);
+ else
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (!p)
return -ENOMEM;
@@ -1043,7 +1060,7 @@ void vmemmap_free(unsigned long start, unsigned long end,
* given vmemmap range being hot-removed. Just unmap and free the
* range instead.
*/
- unmap_hotplug_range(start, end, true);
+ unmap_hotplug_range(start, end, true, altmap);
#endif
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
@@ -1336,7 +1353,7 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
unsigned long end = start + size;
WARN_ON(pgdir != init_mm.pgd);
- remove_pagetable(start, end, false);
+ remove_pagetable(start, end, false, NULL);
}
int arch_add_memory(int nid, u64 start, u64 size,
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread