linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2
@ 2017-03-17 18:55 Kirill A. Shutemov
  2017-03-17 18:55 ` [PATCH 1/6] x86/kexec: Add 5-level paging support Kirill A. Shutemov
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-17 18:55 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Kirill A. Shutemov

Here's the second bunch of patches of 5-level patchset.

These patches finish switching x86 from <asm-generic/5level-fixup.h>
to <asm-generic/pgtable-nop4d.h>.

Please review and consider applying.

Kirill A. Shutemov (6):
  x86/kexec: Add 5-level paging support
  x86/efi: Add 5-level paging support
  x86/mm/pat: Add 5-level paging support
  x86/kasan: Prepare clear_pgds() to switch to
    <asm-generic/pgtable-nop4d.h>
  x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d
  x86: Convert the rest of the code to support p4d_t

 arch/x86/include/asm/kexec.h          |   1 +
 arch/x86/include/asm/paravirt.h       |  33 ++-
 arch/x86/include/asm/paravirt_types.h |  12 +-
 arch/x86/include/asm/pgalloc.h        |  35 ++-
 arch/x86/include/asm/pgtable.h        |  59 ++++-
 arch/x86/include/asm/pgtable_64.h     |  12 +-
 arch/x86/include/asm/pgtable_types.h  |  10 +-
 arch/x86/include/asm/xen/page.h       |   8 +-
 arch/x86/kernel/machine_kexec_32.c    |   4 +-
 arch/x86/kernel/machine_kexec_64.c    |  14 +-
 arch/x86/kernel/paravirt.c            |  10 +-
 arch/x86/mm/init_64.c                 | 183 ++++++++++++----
 arch/x86/mm/kasan_init_64.c           |  15 +-
 arch/x86/mm/pageattr.c                |  54 +++--
 arch/x86/platform/efi/efi_64.c        |  36 ++-
 arch/x86/xen/mmu.c                    | 397 ++++++++++++++++++++--------------
 arch/x86/xen/mmu.h                    |   1 +
 include/trace/events/xen.h            |  28 +--
 18 files changed, 646 insertions(+), 266 deletions(-)

-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/6] x86/kexec: Add 5-level paging support
  2017-03-17 18:55 [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2 Kirill A. Shutemov
@ 2017-03-17 18:55 ` Kirill A. Shutemov
  2017-03-17 18:55 ` [PATCH 2/6] x86/efi: " Kirill A. Shutemov
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-17 18:55 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Kirill A. Shutemov

Handle additional page table level in kexec code.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/kexec.h       |  1 +
 arch/x86/kernel/machine_kexec_32.c |  4 +++-
 arch/x86/kernel/machine_kexec_64.c | 14 ++++++++++++--
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 282630e4c6ea..70ef205489f0 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -164,6 +164,7 @@ struct kimage_arch {
 };
 #else
 struct kimage_arch {
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 469b23d6acc2..5f43cec296c5 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -103,6 +103,7 @@ static void machine_kexec_page_table_set_one(
 	pgd_t *pgd, pmd_t *pmd, pte_t *pte,
 	unsigned long vaddr, unsigned long paddr)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 
 	pgd += pgd_index(vaddr);
@@ -110,7 +111,8 @@ static void machine_kexec_page_table_set_one(
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT))
 		set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
 #endif
-	pud = pud_offset(pgd, vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	if (!(pmd_val(*pmd) & _PAGE_PRESENT))
 		set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 857cdbd02867..085c3b300d32 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -36,6 +36,7 @@ static struct kexec_file_ops *kexec_file_loaders[] = {
 
 static void free_transition_pgtable(struct kimage *image)
 {
+	free_page((unsigned long)image->arch.p4d);
 	free_page((unsigned long)image->arch.pud);
 	free_page((unsigned long)image->arch.pmd);
 	free_page((unsigned long)image->arch.pte);
@@ -43,6 +44,7 @@ static void free_transition_pgtable(struct kimage *image)
 
 static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -53,13 +55,21 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 	paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
 	pgd += pgd_index(vaddr);
 	if (!pgd_present(*pgd)) {
+		p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
+		if (!p4d)
+			goto err;
+		image->arch.p4d = p4d;
+		set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+	}
+	p4d = p4d_offset(pgd, vaddr);
+	if (!p4d_present(*p4d)) {
 		pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
 		if (!pud)
 			goto err;
 		image->arch.pud = pud;
-		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
 	}
-	pud = pud_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	if (!pud_present(*pud)) {
 		pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
 		if (!pmd)
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/6] x86/efi: Add 5-level paging support
  2017-03-17 18:55 [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2 Kirill A. Shutemov
  2017-03-17 18:55 ` [PATCH 1/6] x86/kexec: Add 5-level paging support Kirill A. Shutemov
@ 2017-03-17 18:55 ` Kirill A. Shutemov
  2017-03-17 18:55 ` [PATCH 3/6] x86/mm/pat: " Kirill A. Shutemov
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-17 18:55 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Kirill A. Shutemov

Allocate additional page table level and ajdust efi_sync_low_kernel_mappings()
to work with additional page table level.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
---
 arch/x86/platform/efi/efi_64.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 7b5202bc7b39..6b6b8e8d4ae7 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -135,6 +135,7 @@ static pgd_t *efi_pgd;
 int __init efi_alloc_page_tables(void)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	gfp_t gfp_mask;
 
@@ -147,15 +148,20 @@ int __init efi_alloc_page_tables(void)
 		return -ENOMEM;
 
 	pgd = efi_pgd + pgd_index(EFI_VA_END);
+	p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
+	if (!p4d) {
+		free_page((unsigned long)efi_pgd);
+		return -ENOMEM;
+	}
 
-	pud = pud_alloc_one(NULL, 0);
+	pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
 	if (!pud) {
+		if (CONFIG_PGTABLE_LEVELS > 4)
+			free_page((unsigned long) pgd_page_vaddr(*pgd));
 		free_page((unsigned long)efi_pgd);
 		return -ENOMEM;
 	}
 
-	pgd_populate(NULL, pgd, pud);
-
 	return 0;
 }
 
@@ -191,26 +197,36 @@ void efi_sync_low_kernel_mappings(void)
 	memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
 
 	/*
+	 * As with PGDs, we share all P4D entries apart from the one entry
+	 * that covers the EFI runtime mapping space.
+	 */
+	BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
+	BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
+
+	pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
+	pgd_k = pgd_offset_k(EFI_VA_END);
+	p4d_efi = p4d_offset(pgd_efi, 0);
+	p4d_k = p4d_offset(pgd_k, 0);
+
+	num_entries = p4d_index(EFI_VA_END);
+	memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries);
+
+	/*
 	 * We share all the PUD entries apart from those that map the
 	 * EFI regions. Copy around them.
 	 */
 	BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
 	BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
 
-	pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
-	p4d_efi = p4d_offset(pgd_efi, 0);
+	p4d_efi = p4d_offset(pgd_efi, EFI_VA_END);
+	p4d_k = p4d_offset(pgd_k, EFI_VA_END);
 	pud_efi = pud_offset(p4d_efi, 0);
-
-	pgd_k = pgd_offset_k(EFI_VA_END);
-	p4d_k = p4d_offset(pgd_k, 0);
 	pud_k = pud_offset(p4d_k, 0);
 
 	num_entries = pud_index(EFI_VA_END);
 	memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
 
-	p4d_efi = p4d_offset(pgd_efi, EFI_VA_START);
 	pud_efi = pud_offset(p4d_efi, EFI_VA_START);
-	p4d_k = p4d_offset(pgd_k, EFI_VA_START);
 	pud_k = pud_offset(p4d_k, EFI_VA_START);
 
 	num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/6] x86/mm/pat: Add 5-level paging support
  2017-03-17 18:55 [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2 Kirill A. Shutemov
  2017-03-17 18:55 ` [PATCH 1/6] x86/kexec: Add 5-level paging support Kirill A. Shutemov
  2017-03-17 18:55 ` [PATCH 2/6] x86/efi: " Kirill A. Shutemov
@ 2017-03-17 18:55 ` Kirill A. Shutemov
  2017-03-17 19:49   ` Thomas Gleixner
  2017-03-17 18:55 ` [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h> Kirill A. Shutemov
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-17 18:55 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Kirill A. Shutemov

Straight-forward extension of existing code to support additional page
table level.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/mm/pageattr.c | 54 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a57e8e02f457..56b22fa504df 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -346,6 +346,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
 			     unsigned int *level)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -354,7 +355,15 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
 	if (pgd_none(*pgd))
 		return NULL;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d))
+		return NULL;
+
+	*level = PG_LEVEL_512G;
+	if (p4d_large(*p4d) || !p4d_present(*p4d))
+		return (pte_t *)p4d;
+
+	pud = pud_offset(p4d, address);
 	if (pud_none(*pud))
 		return NULL;
 
@@ -406,13 +415,18 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
 pmd_t *lookup_pmd_address(unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 
 	pgd = pgd_offset_k(address);
 	if (pgd_none(*pgd))
 		return NULL;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, address);
 	if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
 		return NULL;
 
@@ -477,11 +491,13 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 
 		list_for_each_entry(page, &pgd_list, lru) {
 			pgd_t *pgd;
+			p4d_t *p4d;
 			pud_t *pud;
 			pmd_t *pmd;
 
 			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			pud = pud_offset(pgd, address);
+			p4d = p4d_offset(pgd, address);
+			pud = pud_offset(p4d, address);
 			pmd = pmd_offset(pud, address);
 			set_pte_atomic((pte_t *)pmd, pte);
 		}
@@ -836,9 +852,9 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
 			pud_clear(pud);
 }
 
-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
 {
-	pud_t *pud = pud_offset(pgd, start);
+	pud_t *pud = pud_offset(p4d, start);
 
 	/*
 	 * Not on a GB page boundary?
@@ -1004,8 +1020,8 @@ static long populate_pmd(struct cpa_data *cpa,
 	return num_pages;
 }
 
-static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
-			 pgprot_t pgprot)
+static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
+			pgprot_t pgprot)
 {
 	pud_t *pud;
 	unsigned long end;
@@ -1026,7 +1042,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
 		cur_pages = (pre_end - start) >> PAGE_SHIFT;
 		cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
 
-		pud = pud_offset(pgd, start);
+		pud = pud_offset(p4d, start);
 
 		/*
 		 * Need a PMD page?
@@ -1047,7 +1063,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
 	if (cpa->numpages == cur_pages)
 		return cur_pages;
 
-	pud = pud_offset(pgd, start);
+	pud = pud_offset(p4d, start);
 	pud_pgprot = pgprot_4k_2_large(pgprot);
 
 	/*
@@ -1067,7 +1083,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
 	if (start < end) {
 		long tmp;
 
-		pud = pud_offset(pgd, start);
+		pud = pud_offset(p4d, start);
 		if (pud_none(*pud))
 			if (alloc_pmd_page(pud))
 				return -1;
@@ -1090,33 +1106,43 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 {
 	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
 	pud_t *pud = NULL;	/* shut up gcc */
+	p4d_t *p4d;
 	pgd_t *pgd_entry;
 	long ret;
 
 	pgd_entry = cpa->pgd + pgd_index(addr);
 
+	if (pgd_none(*pgd_entry)) {
+		p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+		if (!p4d)
+			return -1;
+
+		set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
+	}
+
 	/*
 	 * Allocate a PUD page and hand it down for mapping.
 	 */
-	if (pgd_none(*pgd_entry)) {
+	p4d = p4d_offset(pgd_entry, addr);
+	if (p4d_none(*p4d)) {
 		pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
 		if (!pud)
 			return -1;
 
-		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
+		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
 	}
 
 	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
 	pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
 
-	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
+	ret = populate_pud(cpa, addr, p4d, pgprot);
 	if (ret < 0) {
 		/*
 		 * Leave the PUD page in place in case some other CPU or thread
 		 * already found it, but remove any useless entries we just
 		 * added to it.
 		 */
-		unmap_pud_range(pgd_entry, addr,
+		unmap_pud_range(p4d, addr,
 				addr + (cpa->numpages << PAGE_SHIFT));
 		return ret;
 	}
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h>
  2017-03-17 18:55 [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2 Kirill A. Shutemov
                   ` (2 preceding siblings ...)
  2017-03-17 18:55 ` [PATCH 3/6] x86/mm/pat: " Kirill A. Shutemov
@ 2017-03-17 18:55 ` Kirill A. Shutemov
  2017-03-20 16:21   ` Andrey Ryabinin
  2017-03-17 18:55 ` [PATCH 5/6] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d Kirill A. Shutemov
  2017-03-17 18:55 ` [PATCH 6/6] x86: Convert the rest of the code to support p4d_t Kirill A. Shutemov
  5 siblings, 1 reply; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-17 18:55 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Kirill A. Shutemov,
	Dmitry Vyukov

With folded p4d, pgd_clear() is nop. Change clear_pgds() to use
p4d_clear() instead.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
---
 arch/x86/mm/kasan_init_64.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0a56059a95c7..b775ffd7989d 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -35,8 +35,19 @@ static int __init map_range(struct range *range)
 static void __init clear_pgds(unsigned long start,
 			unsigned long end)
 {
-	for (; start < end; start += PGDIR_SIZE)
-		pgd_clear(pgd_offset_k(start));
+	pgd_t *pgd;
+
+	for (; start < end; start += PGDIR_SIZE) {
+		pgd = pgd_offset_k(start);
+		/*
+		 * With folded p4d, pgd_clear() is nop, use p4d_clear()
+		 * instead.
+		 */
+		if (CONFIG_PGTABLE_LEVELS < 5)
+			p4d_clear(p4d_offset(pgd, start));
+		else
+			pgd_clear(pgd);
+	}
 }
 
 static void __init kasan_map_early_shadow(pgd_t *pgd)
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/6] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d
  2017-03-17 18:55 [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2 Kirill A. Shutemov
                   ` (3 preceding siblings ...)
  2017-03-17 18:55 ` [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h> Kirill A. Shutemov
@ 2017-03-17 18:55 ` Kirill A. Shutemov
  2017-03-27  6:34   ` Ingo Molnar
  2017-03-17 18:55 ` [PATCH 6/6] x86: Convert the rest of the code to support p4d_t Kirill A. Shutemov
  5 siblings, 1 reply; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-17 18:55 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Kirill A. Shutemov,
	Xiong Zhang

Split these helpers into few per-level functions and add support of
additional page table level.

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
[kirill.shutemov@linux.intel.com: split off into separate patch]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/xen/mmu.c | 245 ++++++++++++++++++++++++++++++++---------------------
 arch/x86/xen/mmu.h |   1 +
 2 files changed, 150 insertions(+), 96 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 7adda9be6189..b3c4f13e48a4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -593,6 +593,64 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 }
 #endif	/* CONFIG_PGTABLE_LEVELS == 4 */
 
+static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+		bool last, unsigned long limit)
+{
+	int i, nr, flush = 0;
+
+	nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
+	for (i = 0; i < nr; i++) {
+		if (!pmd_none(pmd[i]))
+			flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
+	}
+	return flush;
+}
+
+static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
+		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+		bool last, unsigned long limit)
+{
+	int i, nr, flush = 0;
+
+	nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
+	for (i = 0; i < nr; i++) {
+		pmd_t *pmd;
+
+		if (pud_none(pud[i]))
+			continue;
+
+		pmd = pmd_offset(&pud[i], 0);
+		if (PTRS_PER_PMD > 1)
+			flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
+		flush |= xen_pmd_walk(mm, pmd, func,
+				last && i == nr - 1, limit);
+	}
+	return flush;
+}
+
+static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+		bool last, unsigned long limit)
+{
+	int i, nr, flush = 0;
+
+	nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
+	for (i = 0; i < nr; i++) {
+		pud_t *pud;
+
+		if (p4d_none(p4d[i]))
+			continue;
+
+		pud = pud_offset(&p4d[i], 0);
+		if (PTRS_PER_PUD > 1)
+			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+		flush |= xen_pud_walk(mm, pud, func,
+				last && i == nr - 1, limit);
+	}
+	return flush;
+}
+
 /*
  * (Yet another) pagetable walker.  This one is intended for pinning a
  * pagetable.  This means that it walks a pagetable and calls the
@@ -613,10 +671,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 				      enum pt_level),
 			  unsigned long limit)
 {
-	int flush = 0;
+	int i, nr, flush = 0;
 	unsigned hole_low, hole_high;
-	unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
-	unsigned pgdidx, pudidx, pmdidx;
 
 	/* The limit is the last byte to be touched */
 	limit--;
@@ -633,65 +689,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 	hole_low = pgd_index(USER_LIMIT);
 	hole_high = pgd_index(PAGE_OFFSET);
 
-	pgdidx_limit = pgd_index(limit);
-#if PTRS_PER_PUD > 1
-	pudidx_limit = pud_index(limit);
-#else
-	pudidx_limit = 0;
-#endif
-#if PTRS_PER_PMD > 1
-	pmdidx_limit = pmd_index(limit);
-#else
-	pmdidx_limit = 0;
-#endif
-
-	for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
-		pud_t *pud;
+	nr = pgd_index(limit) + 1;
+	for (i = 0; i < nr; i++) {
+		p4d_t *p4d;
 
-		if (pgdidx >= hole_low && pgdidx < hole_high)
+		if (i >= hole_low && i < hole_high)
 			continue;
 
-		if (!pgd_val(pgd[pgdidx]))
+		if (pgd_none(pgd[i]))
 			continue;
 
-		pud = pud_offset(&pgd[pgdidx], 0);
-
-		if (PTRS_PER_PUD > 1) /* not folded */
-			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-
-		for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
-			pmd_t *pmd;
-
-			if (pgdidx == pgdidx_limit &&
-			    pudidx > pudidx_limit)
-				goto out;
-
-			if (pud_none(pud[pudidx]))
-				continue;
-
-			pmd = pmd_offset(&pud[pudidx], 0);
-
-			if (PTRS_PER_PMD > 1) /* not folded */
-				flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
-
-			for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
-				struct page *pte;
-
-				if (pgdidx == pgdidx_limit &&
-				    pudidx == pudidx_limit &&
-				    pmdidx > pmdidx_limit)
-					goto out;
-
-				if (pmd_none(pmd[pmdidx]))
-					continue;
-
-				pte = pmd_page(pmd[pmdidx]);
-				flush |= (*func)(mm, pte, PT_PTE);
-			}
-		}
+		p4d = p4d_offset(&pgd[i], 0);
+		if (PTRS_PER_P4D > 1)
+			flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
+		flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
 	}
 
-out:
 	/* Do the top level last, so that the callbacks can use it as
 	   a cue to do final things like tlb flushes. */
 	flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
@@ -1150,57 +1163,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
 	xen_free_ro_pages(pa, PAGE_SIZE);
 }
 
+static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
+{
+	unsigned long pa;
+	pte_t *pte_tbl;
+	int i;
+
+	if (pmd_large(*pmd)) {
+		pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
+		xen_free_ro_pages(pa, PMD_SIZE);
+		return;
+	}
+
+	pte_tbl = pte_offset_kernel(pmd, 0);
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		if (pte_none(pte_tbl[i]))
+			continue;
+		pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
+		xen_free_ro_pages(pa, PAGE_SIZE);
+	}
+	set_pmd(pmd, __pmd(0));
+	xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
+{
+	unsigned long pa;
+	pmd_t *pmd_tbl;
+	int i;
+
+	if (pud_large(*pud)) {
+		pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+		xen_free_ro_pages(pa, PUD_SIZE);
+		return;
+	}
+
+	pmd_tbl = pmd_offset(pud, 0);
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (pmd_none(pmd_tbl[i]))
+			continue;
+		xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
+	}
+	set_pud(pud, __pud(0));
+	xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
+{
+	unsigned long pa;
+	pud_t *pud_tbl;
+	int i;
+
+	if (p4d_large(*p4d)) {
+		pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
+		xen_free_ro_pages(pa, P4D_SIZE);
+		return;
+	}
+
+	pud_tbl = pud_offset(p4d, 0);
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		if (pud_none(pud_tbl[i]))
+			continue;
+		xen_cleanmfnmap_pud(pud_tbl + i, unpin);
+	}
+	set_p4d(p4d, __p4d(0));
+	xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
+}
+
 /*
  * Since it is well isolated we can (and since it is perhaps large we should)
  * also free the page tables mapping the initial P->M table.
  */
 static void __init xen_cleanmfnmap(unsigned long vaddr)
 {
-	unsigned long va = vaddr & PMD_MASK;
-	unsigned long pa;
-	pgd_t *pgd = pgd_offset_k(va);
-	pud_t *pud_page = pud_offset(pgd, 0);
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgd;
+	p4d_t *p4d;
 	unsigned int i;
 	bool unpin;
 
 	unpin = (vaddr == 2 * PGDIR_SIZE);
-	set_pgd(pgd, __pgd(0));
-	do {
-		pud = pud_page + pud_index(va);
-		if (pud_none(*pud)) {
-			va += PUD_SIZE;
-		} else if (pud_large(*pud)) {
-			pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
-			xen_free_ro_pages(pa, PUD_SIZE);
-			va += PUD_SIZE;
-		} else {
-			pmd = pmd_offset(pud, va);
-			if (pmd_large(*pmd)) {
-				pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
-				xen_free_ro_pages(pa, PMD_SIZE);
-			} else if (!pmd_none(*pmd)) {
-				pte = pte_offset_kernel(pmd, va);
-				set_pmd(pmd, __pmd(0));
-				for (i = 0; i < PTRS_PER_PTE; ++i) {
-					if (pte_none(pte[i]))
-						break;
-					pa = pte_pfn(pte[i]) << PAGE_SHIFT;
-					xen_free_ro_pages(pa, PAGE_SIZE);
-				}
-				xen_cleanmfnmap_free_pgtbl(pte, unpin);
-			}
-			va += PMD_SIZE;
-			if (pmd_index(va))
-				continue;
-			set_pud(pud, __pud(0));
-			xen_cleanmfnmap_free_pgtbl(pmd, unpin);
-		}
-
-	} while (pud_index(va) || pmd_index(va));
-	xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
+	vaddr &= PMD_MASK;
+	pgd = pgd_offset_k(vaddr);
+	p4d = p4d_offset(pgd, 0);
+	for (i = 0; i < PTRS_PER_P4D; i++) {
+		if (p4d_none(p4d[i]))
+			continue;
+		xen_cleanmfnmap_p4d(p4d + i, unpin);
+	}
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		set_pgd(pgd, __pgd(0));
+		xen_cleanmfnmap_free_pgtbl(p4d, unpin);
+	}
 }
 
 static void __init xen_pagetable_p2m_free(void)
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb951b4..3fe2b3292915 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -5,6 +5,7 @@
 
 enum pt_level {
 	PT_PGD,
+	PT_P4D,
 	PT_PUD,
 	PT_PMD,
 	PT_PTE
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/6] x86: Convert the rest of the code to support p4d_t
  2017-03-17 18:55 [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2 Kirill A. Shutemov
                   ` (4 preceding siblings ...)
  2017-03-17 18:55 ` [PATCH 5/6] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d Kirill A. Shutemov
@ 2017-03-17 18:55 ` Kirill A. Shutemov
  5 siblings, 0 replies; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-17 18:55 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Kirill A. Shutemov

This patch converts x86 to use proper folding of new page table level
with <asm-generic/pgtable-nop4d.h>.

That's a bit of kitchen sink, but I don't see how to split it further
without hurting bisectability.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/paravirt.h       |  33 +++++-
 arch/x86/include/asm/paravirt_types.h |  12 ++-
 arch/x86/include/asm/pgalloc.h        |  35 ++++++-
 arch/x86/include/asm/pgtable.h        |  59 ++++++++++-
 arch/x86/include/asm/pgtable_64.h     |  12 ++-
 arch/x86/include/asm/pgtable_types.h  |  10 +-
 arch/x86/include/asm/xen/page.h       |   8 +-
 arch/x86/kernel/paravirt.c            |  10 +-
 arch/x86/mm/init_64.c                 | 183 +++++++++++++++++++++++++++-------
 arch/x86/xen/mmu.c                    | 152 ++++++++++++++++------------
 include/trace/events/xen.h            |  28 +++---
 11 files changed, 401 insertions(+), 141 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0489884fdc44..158d877ce9e9 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -536,7 +536,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
 		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
 			    val);
 }
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 static inline pud_t __pud(pudval_t val)
 {
 	pudval_t ret;
@@ -565,6 +565,32 @@ static inline pudval_t pud_val(pud_t pud)
 	return ret;
 }
 
+static inline void pud_clear(pud_t *pudp)
+{
+	set_pud(pudp, __pud(0));
+}
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+	p4dval_t val = native_p4d_val(p4d);
+
+	if (sizeof(p4dval_t) > sizeof(long))
+		PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp,
+			    val, (u64)val >> 32);
+	else
+		PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp,
+			    val);
+}
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+	set_p4d(p4dp, __p4d(0));
+}
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+
+#error FIXME
+
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
 	pgdval_t val = native_pgd_val(pgd);
@@ -582,10 +608,7 @@ static inline void pgd_clear(pgd_t *pgdp)
 	set_pgd(pgdp, __pgd(0));
 }
 
-static inline void pud_clear(pud_t *pudp)
-{
-	set_pud(pudp, __pud(0));
-}
+#endif  /* CONFIG_PGTABLE_LEVELS == 5 */
 
 #endif	/* CONFIG_PGTABLE_LEVELS == 4 */
 
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b060f962d581..93c49cf09b63 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -279,12 +279,18 @@ struct pv_mmu_ops {
 	struct paravirt_callee_save pmd_val;
 	struct paravirt_callee_save make_pmd;
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 	struct paravirt_callee_save pud_val;
 	struct paravirt_callee_save make_pud;
 
-	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
+	void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#error FIXME
+#endif	/* CONFIG_PGTABLE_LEVELS >= 5 */
+
+#endif	/* CONFIG_PGTABLE_LEVELS >= 4 */
+
 #endif	/* CONFIG_PGTABLE_LEVELS >= 3 */
 
 	struct pv_lazy_ops lazy_mode;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index b6d425999f99..2f585054c63c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -121,10 +121,10 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 #endif	/* CONFIG_X86_PAE */
 
 #if CONFIG_PGTABLE_LEVELS > 3
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
 	paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
-	set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+	set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -150,6 +150,37 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 	___pud_free_tlb(tlb, pud);
 }
 
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+	paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
+	set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	gfp_t gfp = GFP_KERNEL_ACCOUNT;
+
+	if (mm == &init_mm)
+		gfp &= ~__GFP_ACCOUNT;
+	return (p4d_t *)get_zeroed_page(gfp);
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+	free_page((unsigned long)p4d);
+}
+
+extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
+
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+				  unsigned long address)
+{
+	___p4d_free_tlb(tlb, p4d);
+}
+
+#endif	/* CONFIG_PGTABLE_LEVELS > 4 */
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d5854bd86f0a..adcebc271764 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -51,11 +51,19 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 
 #define set_pmd(pmdp, pmd)		native_set_pmd(pmdp, pmd)
 
-#ifndef __PAGETABLE_PUD_FOLDED
+#ifndef __PAGETABLE_P4D_FOLDED
 #define set_pgd(pgdp, pgd)		native_set_pgd(pgdp, pgd)
 #define pgd_clear(pgd)			native_pgd_clear(pgd)
 #endif
 
+#ifndef set_p4d
+# define set_p4d(p4dp, p4d)		native_set_p4d(p4dp, p4d)
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define p4d_clear(p4d)			native_p4d_clear(p4d)
+#endif
+
 #ifndef set_pud
 # define set_pud(pudp, pud)		native_set_pud(pudp, pud)
 #endif
@@ -72,6 +80,11 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 #define pgd_val(x)	native_pgd_val(x)
 #define __pgd(x)	native_make_pgd(x)
 
+#ifndef __PAGETABLE_P4D_FOLDED
+#define p4d_val(x)	native_p4d_val(x)
+#define __p4d(x)	native_make_p4d(x)
+#endif
+
 #ifndef __PAGETABLE_PUD_FOLDED
 #define pud_val(x)	native_pud_val(x)
 #define __pud(x)	native_make_pud(x)
@@ -547,6 +560,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 #define pte_pgprot(x) __pgprot(pte_flags(x))
 #define pmd_pgprot(x) __pgprot(pmd_flags(x))
 #define pud_pgprot(x) __pgprot(pud_flags(x))
+#define p4d_pgprot(x) __pgprot(p4d_flags(x))
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
@@ -784,12 +798,47 @@ static inline unsigned long pud_index(unsigned long address)
 	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
 }
 
+#if CONFIG_PGTABLE_LEVELS > 3
+static inline int p4d_none(p4d_t p4d)
+{
+	return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+	return p4d_flags(p4d) & _PAGE_PRESENT;
+}
+
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+	return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define p4d_page(p4d)		\
+	pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+
+/* Find an entry in the third-level page table.. */
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+	return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+	return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+#endif  /* CONFIG_PGTABLE_LEVELS > 3 */
+
 static inline unsigned long p4d_index(unsigned long address)
 {
 	return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
 }
 
-#if CONFIG_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 4
 static inline int pgd_present(pgd_t pgd)
 {
 	return pgd_flags(pgd) & _PAGE_PRESENT;
@@ -807,9 +856,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 #define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
 
 /* to find an entry in a page-table-directory. */
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
-	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+	return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
 }
 
 static inline int pgd_bad(pgd_t pgd)
@@ -827,7 +876,7 @@ static inline int pgd_none(pgd_t pgd)
 	 */
 	return !native_pgd_val(pgd);
 }
-#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 4 */
 
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 73c7ccc38912..79396bfdc791 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -41,9 +41,9 @@ extern void paging_init(void);
 
 struct mm_struct;
 
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
 void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
 
-
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 				    pte_t *ptep)
 {
@@ -121,6 +121,16 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 #endif
 }
 
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+	*p4dp = p4d;
+}
+
+static inline void native_p4d_clear(p4d_t *p4d)
+{
+	native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
+}
+
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
 	*pgdp = pgd;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index df08535f774a..4930afe9df0a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -277,11 +277,11 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
 #error FIXME
 
 #else
-#include <asm-generic/5level-fixup.h>
+#include <asm-generic/pgtable-nop4d.h>
 
 static inline p4dval_t native_p4d_val(p4d_t p4d)
 {
-	return native_pgd_val(p4d);
+	return native_pgd_val(p4d.pgd);
 }
 #endif
 
@@ -298,12 +298,11 @@ static inline pudval_t native_pud_val(pud_t pud)
 	return pud.pud;
 }
 #else
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 static inline pudval_t native_pud_val(pud_t pud)
 {
-	return native_pgd_val(pud.pgd);
+	return native_pgd_val(pud.p4d.pgd);
 }
 #endif
 
@@ -320,12 +319,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 	return pmd.pmd;
 }
 #else
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pmdval_t native_pmd_val(pmd_t pmd)
 {
-	return native_pgd_val(pmd.pud.pgd);
+	return native_pgd_val(pmd.pud.p4d.pgd);
 }
 #endif
 
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 64c5e745ebad..8a5a02b1dfba 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -280,13 +280,17 @@ static inline pte_t __pte_ma(pteval_t x)
 
 #define pmd_val_ma(v) ((v).pmd)
 #ifdef __PAGETABLE_PUD_FOLDED
-#define pud_val_ma(v) ((v).pgd.pgd)
+#define pud_val_ma(v) ((v).p4d.pgd.pgd)
 #else
 #define pud_val_ma(v) ((v).pud)
 #endif
 #define __pmd_ma(x)	((pmd_t) { (x) } )
 
-#define pgd_val_ma(x)	((x).pgd)
+#ifdef __PAGETABLE_P4D_FOLDED
+#define p4d_val_ma(x)	((x).pgd.pgd)
+#else
+#define p4d_val_ma(x)	((x).p4d)
+#endif
 
 void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 4797e87b0fb6..110daf22f5c7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -430,12 +430,16 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 	.pmd_val = PTE_IDENT,
 	.make_pmd = PTE_IDENT,
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 	.pud_val = PTE_IDENT,
 	.make_pud = PTE_IDENT,
 
-	.set_pgd = native_set_pgd,
-#endif
+	.set_p4d = native_set_p4d,
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#error FIXME
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
 #endif /* CONFIG_PGTABLE_LEVELS >= 3 */
 
 	.pte_val = PTE_IDENT,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f6da869810a8..a242139df8fe 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -97,28 +97,38 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 	unsigned long address;
 
 	for (address = start; address <= end; address += PGDIR_SIZE) {
-		const pgd_t *pgd_ref = pgd_offset_k(address);
+		pgd_t *pgd_ref = pgd_offset_k(address);
+		const p4d_t *p4d_ref;
 		struct page *page;
 
-		if (pgd_none(*pgd_ref))
+		/*
+		 * With folded p4d, pgd_none() is always false, we need to
+		 * handle synchonization on p4d level.
+		 */
+		BUILD_BUG_ON(pgd_none(*pgd_ref));
+		p4d_ref = p4d_offset(pgd_ref, address);
+
+		if (p4d_none(*p4d_ref))
 			continue;
 
 		spin_lock(&pgd_lock);
 		list_for_each_entry(page, &pgd_list, lru) {
 			pgd_t *pgd;
+			p4d_t *p4d;
 			spinlock_t *pgt_lock;
 
 			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			p4d = p4d_offset(pgd, address);
 			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);
 
-			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
-				BUG_ON(pgd_page_vaddr(*pgd)
-				       != pgd_page_vaddr(*pgd_ref));
+			if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
+				BUG_ON(p4d_page_vaddr(*p4d)
+				       != p4d_page_vaddr(*p4d_ref));
 
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
+			if (p4d_none(*p4d))
+				set_p4d(p4d, *p4d_ref);
 
 			spin_unlock(pgt_lock);
 		}
@@ -149,16 +159,28 @@ static __ref void *spp_getpage(void)
 	return ptr;
 }
 
-static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
+static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
 {
 	if (pgd_none(*pgd)) {
-		pud_t *pud = (pud_t *)spp_getpage();
-		pgd_populate(&init_mm, pgd, pud);
-		if (pud != pud_offset(pgd, 0))
+		p4d_t *p4d = (p4d_t *)spp_getpage();
+		pgd_populate(&init_mm, pgd, p4d);
+		if (p4d != p4d_offset(pgd, 0))
 			printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
-			       pud, pud_offset(pgd, 0));
+			       p4d, p4d_offset(pgd, 0));
+	}
+	return p4d_offset(pgd, vaddr);
+}
+
+static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
+{
+	if (p4d_none(*p4d)) {
+		pud_t *pud = (pud_t *)spp_getpage();
+		p4d_populate(&init_mm, p4d, pud);
+		if (pud != pud_offset(p4d, 0))
+			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+			       pud, pud_offset(p4d, 0));
 	}
-	return pud_offset(pgd, vaddr);
+	return pud_offset(p4d, vaddr);
 }
 
 static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
@@ -167,7 +189,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
 		pmd_t *pmd = (pmd_t *) spp_getpage();
 		pud_populate(&init_mm, pud, pmd);
 		if (pmd != pmd_offset(pud, 0))
-			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+			printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
 			       pmd, pmd_offset(pud, 0));
 	}
 	return pmd_offset(pud, vaddr);
@@ -179,20 +201,15 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
 		pte_t *pte = (pte_t *) spp_getpage();
 		pmd_populate_kernel(&init_mm, pmd, pte);
 		if (pte != pte_offset_kernel(pmd, 0))
-			printk(KERN_ERR "PAGETABLE BUG #02!\n");
+			printk(KERN_ERR "PAGETABLE BUG #03!\n");
 	}
 	return pte_offset_kernel(pmd, vaddr);
 }
 
-void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
 {
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	pud = pud_page + pud_index(vaddr);
-	pmd = fill_pmd(pud, vaddr);
-	pte = fill_pte(pmd, vaddr);
+	pmd_t *pmd = fill_pmd(pud, vaddr);
+	pte_t *pte = fill_pte(pmd, vaddr);
 
 	set_pte(pte, new_pte);
 
@@ -203,10 +220,25 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
 	__flush_tlb_one(vaddr);
 }
 
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
+{
+	p4d_t *p4d = p4d_page + p4d_index(vaddr);
+	pud_t *pud = fill_pud(p4d, vaddr);
+
+	__set_pte_vaddr(pud, vaddr, new_pte);
+}
+
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+{
+	pud_t *pud = pud_page + pud_index(vaddr);
+
+	__set_pte_vaddr(pud, vaddr, new_pte);
+}
+
 void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 {
 	pgd_t *pgd;
-	pud_t *pud_page;
+	p4d_t *p4d_page;
 
 	pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
 
@@ -216,17 +248,20 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 			"PGD FIXMAP MISSING, it should be setup in head.S!\n");
 		return;
 	}
-	pud_page = (pud_t*)pgd_page_vaddr(*pgd);
-	set_pte_vaddr_pud(pud_page, vaddr, pteval);
+
+	p4d_page = p4d_offset(pgd, 0);
+	set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
 }
 
 pmd_t * __init populate_extra_pmd(unsigned long vaddr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 
 	pgd = pgd_offset_k(vaddr);
-	pud = fill_pud(pgd, vaddr);
+	p4d = fill_p4d(pgd, vaddr);
+	pud = fill_pud(p4d, vaddr);
 	return fill_pmd(pud, vaddr);
 }
 
@@ -245,6 +280,7 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
 					enum page_cache_mode cache)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pgprot_t prot;
@@ -255,11 +291,17 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
 	for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
 		pgd = pgd_offset_k((unsigned long)__va(phys));
 		if (pgd_none(*pgd)) {
+			p4d = (p4d_t *) spp_getpage();
+			set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
+						_PAGE_USER));
+		}
+		p4d = p4d_offset(pgd, (unsigned long)__va(phys));
+		if (p4d_none(*p4d)) {
 			pud = (pud_t *) spp_getpage();
-			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
+			set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
 						_PAGE_USER));
 		}
-		pud = pud_offset(pgd, (unsigned long)__va(phys));
+		pud = pud_offset(p4d, (unsigned long)__va(phys));
 		if (pud_none(*pud)) {
 			pmd = (pmd_t *) spp_getpage();
 			set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
@@ -563,12 +605,15 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 
 	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
 		pgd_t *pgd = pgd_offset_k(vaddr);
+		p4d_t *p4d;
 		pud_t *pud;
 
 		vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
 
-		if (pgd_val(*pgd)) {
-			pud = (pud_t *)pgd_page_vaddr(*pgd);
+		BUILD_BUG_ON(pgd_none(*pgd));
+		p4d = p4d_offset(pgd, vaddr);
+		if (p4d_val(*p4d)) {
+			pud = (pud_t *)p4d_page_vaddr(*p4d);
 			paddr_last = phys_pud_init(pud, __pa(vaddr),
 						   __pa(vaddr_end),
 						   page_size_mask);
@@ -580,7 +625,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 					   page_size_mask);
 
 		spin_lock(&init_mm.page_table_lock);
-		pgd_populate(&init_mm, pgd, pud);
+		p4d_populate(&init_mm, p4d, pud);
 		spin_unlock(&init_mm.page_table_lock);
 		pgd_changed = true;
 	}
@@ -726,6 +771,24 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 	spin_unlock(&init_mm.page_table_lock);
 }
 
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+	pud_t *pud;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		pud = pud_start + i;
+		if (!pud_none(*pud))
+			return;
+	}
+
+	/* free a pud talbe */
+	free_pagetable(p4d_page(*p4d), 0);
+	spin_lock(&init_mm.page_table_lock);
+	p4d_clear(p4d);
+	spin_unlock(&init_mm.page_table_lock);
+}
+
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 		 bool direct)
@@ -908,6 +971,32 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
 		update_page_count(PG_LEVEL_1G, -pages);
 }
 
+static void __meminit
+remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
+		 bool direct)
+{
+	unsigned long next, pages = 0;
+	pud_t *pud_base;
+	p4d_t *p4d;
+
+	p4d = p4d_start + p4d_index(addr);
+	for (; addr < end; addr = next, p4d++) {
+		next = p4d_addr_end(addr, end);
+
+		if (!p4d_present(*p4d))
+			continue;
+
+		BUILD_BUG_ON(p4d_large(*p4d));
+
+		pud_base = (pud_t *)p4d_page_vaddr(*p4d);
+		remove_pud_table(pud_base, addr, next, direct);
+		free_pud_table(pud_base, p4d);
+	}
+
+	if (direct)
+		update_page_count(PG_LEVEL_512G, -pages);
+}
+
 /* start and end are both virtual address. */
 static void __meminit
 remove_pagetable(unsigned long start, unsigned long end, bool direct)
@@ -915,7 +1004,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 	unsigned long next;
 	unsigned long addr;
 	pgd_t *pgd;
-	pud_t *pud;
+	p4d_t *p4d;
 
 	for (addr = start; addr < end; addr = next) {
 		next = pgd_addr_end(addr, end);
@@ -924,8 +1013,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 		if (!pgd_present(*pgd))
 			continue;
 
-		pud = (pud_t *)pgd_page_vaddr(*pgd);
-		remove_pud_table(pud, addr, next, direct);
+		p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+		remove_p4d_table(p4d, addr, next, direct);
 	}
 
 	flush_tlb_all();
@@ -1090,6 +1179,7 @@ int kern_addr_valid(unsigned long addr)
 {
 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -1101,7 +1191,11 @@ int kern_addr_valid(unsigned long addr)
 	if (pgd_none(*pgd))
 		return 0;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d))
+		return 0;
+
+	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud))
 		return 0;
 
@@ -1158,6 +1252,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
 	unsigned long addr;
 	unsigned long next;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -1168,7 +1263,11 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
 		if (!pgd)
 			return -ENOMEM;
 
-		pud = vmemmap_pud_populate(pgd, addr, node);
+		p4d = vmemmap_p4d_populate(pgd, addr, node);
+		if (!p4d)
+			return -ENOMEM;
+
+		pud = vmemmap_pud_populate(p4d, addr, node);
 		if (!pud)
 			return -ENOMEM;
 
@@ -1236,6 +1335,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
 	unsigned long end = (unsigned long)(start_page + size);
 	unsigned long next;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	unsigned int nr_pages;
@@ -1251,7 +1351,14 @@ void register_page_bootmem_memmap(unsigned long section_nr,
 		}
 		get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
 
-		pud = pud_offset(pgd, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (p4d_none(*p4d)) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			continue;
+		}
+		get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
+
+		pud = pud_offset(p4d, addr);
 		if (pud_none(*pud)) {
 			next = (addr + PAGE_SIZE) & PAGE_MASK;
 			continue;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b3c4f13e48a4..f226038a39ca 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -535,40 +535,41 @@ static pgd_t *xen_get_user_pgd(pgd_t *pgd)
 	return user_ptr;
 }
 
-static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
 {
 	struct mmu_update u;
 
 	u.ptr = virt_to_machine(ptr).maddr;
-	u.val = pgd_val_ma(val);
+	u.val = p4d_val_ma(val);
 	xen_extend_mmu_update(&u);
 }
 
 /*
- * Raw hypercall-based set_pgd, intended for in early boot before
+ * Raw hypercall-based set_p4d, intended for in early boot before
  * there's a page structure.  This implies:
  *  1. The only existing pagetable is the kernel's
  *  2. It is always pinned
  *  3. It has no user pagetable attached to it
  */
-static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
 {
 	preempt_disable();
 
 	xen_mc_batch();
 
-	__xen_set_pgd_hyper(ptr, val);
+	__xen_set_p4d_hyper(ptr, val);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
 	preempt_enable();
 }
 
-static void xen_set_pgd(pgd_t *ptr, pgd_t val)
+static void xen_set_p4d(p4d_t *ptr, p4d_t val)
 {
-	pgd_t *user_ptr = xen_get_user_pgd(ptr);
+	pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr);
+	pgd_t pgd_val;
 
-	trace_xen_mmu_set_pgd(ptr, user_ptr, val);
+	trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val);
 
 	/* If page is not pinned, we can just update the entry
 	   directly */
@@ -576,7 +577,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 		*ptr = val;
 		if (user_ptr) {
 			WARN_ON(xen_page_pinned(user_ptr));
-			*user_ptr = val;
+			pgd_val.pgd = p4d_val_ma(val);
+			*user_ptr = pgd_val;
 		}
 		return;
 	}
@@ -585,9 +587,9 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 	   user updates together. */
 	xen_mc_batch();
 
-	__xen_set_pgd_hyper(ptr, val);
+	__xen_set_p4d_hyper(ptr, val);
 	if (user_ptr)
-		__xen_set_pgd_hyper(user_ptr, val);
+		__xen_set_p4d_hyper((p4d_t *)user_ptr, val);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
@@ -1591,7 +1593,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
 		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
 	}
 #endif
-
 	return ret;
 }
 
@@ -1783,7 +1784,7 @@ static void xen_release_pmd(unsigned long pfn)
 	xen_release_ptpage(pfn, PT_PMD);
 }
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -2124,21 +2125,27 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
  */
 void __init xen_relocate_p2m(void)
 {
-	phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
+	phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
 	unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
-	int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
+	int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
 	pte_t *pt;
 	pmd_t *pmd;
 	pud_t *pud;
+	p4d_t *p4d = NULL;
 	pgd_t *pgd;
 	unsigned long *new_p2m;
+	int save_pud;
 
 	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
 	n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
 	n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
 	n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
-	n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
-	n_frames = n_pte + n_pt + n_pmd + n_pud;
+	n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
+	if (PTRS_PER_P4D > 1)
+		n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
+	else
+		n_p4d = 0;
+	n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
 
 	new_area = xen_find_free_area(PFN_PHYS(n_frames));
 	if (!new_area) {
@@ -2154,55 +2161,76 @@ void __init xen_relocate_p2m(void)
 	 * To avoid any possible virtual address collision, just use
 	 * 2 * PUD_SIZE for the new area.
 	 */
-	pud_phys = new_area;
+	p4d_phys = new_area;
+	pud_phys = p4d_phys + PFN_PHYS(n_p4d);
 	pmd_phys = pud_phys + PFN_PHYS(n_pud);
 	pt_phys = pmd_phys + PFN_PHYS(n_pmd);
 	p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
 
 	pgd = __va(read_cr3());
 	new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
-	for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
-		pud = early_memremap(pud_phys, PAGE_SIZE);
-		clear_page(pud);
-		for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
-		     idx_pmd++) {
-			pmd = early_memremap(pmd_phys, PAGE_SIZE);
-			clear_page(pmd);
-			for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
-			     idx_pt++) {
-				pt = early_memremap(pt_phys, PAGE_SIZE);
-				clear_page(pt);
-				for (idx_pte = 0;
-				     idx_pte < min(n_pte, PTRS_PER_PTE);
-				     idx_pte++) {
-					set_pte(pt + idx_pte,
-						pfn_pte(p2m_pfn, PAGE_KERNEL));
-					p2m_pfn++;
+	idx_p4d = 0;
+	save_pud = n_pud;
+	do {
+		if (n_p4d > 0) {
+			p4d = early_memremap(p4d_phys, PAGE_SIZE);
+			clear_page(p4d);
+			n_pud = min(save_pud, PTRS_PER_P4D);
+		}
+		for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
+			pud = early_memremap(pud_phys, PAGE_SIZE);
+			clear_page(pud);
+			for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
+				 idx_pmd++) {
+				pmd = early_memremap(pmd_phys, PAGE_SIZE);
+				clear_page(pmd);
+				for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
+					 idx_pt++) {
+					pt = early_memremap(pt_phys, PAGE_SIZE);
+					clear_page(pt);
+					for (idx_pte = 0;
+						 idx_pte < min(n_pte, PTRS_PER_PTE);
+						 idx_pte++) {
+						set_pte(pt + idx_pte,
+								pfn_pte(p2m_pfn, PAGE_KERNEL));
+						p2m_pfn++;
+					}
+					n_pte -= PTRS_PER_PTE;
+					early_memunmap(pt, PAGE_SIZE);
+					make_lowmem_page_readonly(__va(pt_phys));
+					pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
+							PFN_DOWN(pt_phys));
+					set_pmd(pmd + idx_pt,
+							__pmd(_PAGE_TABLE | pt_phys));
+					pt_phys += PAGE_SIZE;
 				}
-				n_pte -= PTRS_PER_PTE;
-				early_memunmap(pt, PAGE_SIZE);
-				make_lowmem_page_readonly(__va(pt_phys));
-				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
-						  PFN_DOWN(pt_phys));
-				set_pmd(pmd + idx_pt,
-					__pmd(_PAGE_TABLE | pt_phys));
-				pt_phys += PAGE_SIZE;
+				n_pt -= PTRS_PER_PMD;
+				early_memunmap(pmd, PAGE_SIZE);
+				make_lowmem_page_readonly(__va(pmd_phys));
+				pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
+						PFN_DOWN(pmd_phys));
+				set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+				pmd_phys += PAGE_SIZE;
 			}
-			n_pt -= PTRS_PER_PMD;
-			early_memunmap(pmd, PAGE_SIZE);
-			make_lowmem_page_readonly(__va(pmd_phys));
-			pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
-					  PFN_DOWN(pmd_phys));
-			set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
-			pmd_phys += PAGE_SIZE;
+			n_pmd -= PTRS_PER_PUD;
+			early_memunmap(pud, PAGE_SIZE);
+			make_lowmem_page_readonly(__va(pud_phys));
+			pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
+			if (n_p4d > 0)
+				set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
+			else
+				set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
+			pud_phys += PAGE_SIZE;
 		}
-		n_pmd -= PTRS_PER_PUD;
-		early_memunmap(pud, PAGE_SIZE);
-		make_lowmem_page_readonly(__va(pud_phys));
-		pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
-		set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
-		pud_phys += PAGE_SIZE;
-	}
+		if (n_p4d > 0) {
+			save_pud -= PTRS_PER_P4D;
+			early_memunmap(p4d, PAGE_SIZE);
+			make_lowmem_page_readonly(__va(p4d_phys));
+			pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
+			set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
+			p4d_phys += PAGE_SIZE;
+		}
+	} while (++idx_p4d < n_p4d);
 
 	/* Now copy the old p2m info to the new area. */
 	memcpy(new_p2m, xen_p2m_addr, size);
@@ -2432,8 +2460,8 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
-#if CONFIG_PGTABLE_LEVELS == 4
-	pv_mmu_ops.set_pgd = xen_set_pgd;
+#if CONFIG_PGTABLE_LEVELS >= 4
+	pv_mmu_ops.set_p4d = xen_set_p4d;
 #endif
 
 	/* This will work as long as patching hasn't happened yet
@@ -2442,7 +2470,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
 	pv_mmu_ops.release_pte = xen_release_pte;
 	pv_mmu_ops.release_pmd = xen_release_pmd;
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
@@ -2508,10 +2536,10 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
 	.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
-	.set_pgd = xen_set_pgd_hyper,
+	.set_p4d = xen_set_p4d_hyper,
 
 	.alloc_pud = xen_alloc_pmd_init,
 	.release_pud = xen_release_pmd_init,
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
index bce990f5a35d..31acce9019a6 100644
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -241,21 +241,21 @@ TRACE_EVENT(xen_mmu_set_pud,
 		      (int)sizeof(pudval_t) * 2, (unsigned long long)__entry->pudval)
 	);
 
-TRACE_EVENT(xen_mmu_set_pgd,
-	    TP_PROTO(pgd_t *pgdp, pgd_t *user_pgdp, pgd_t pgdval),
-	    TP_ARGS(pgdp, user_pgdp, pgdval),
+TRACE_EVENT(xen_mmu_set_p4d,
+	    TP_PROTO(p4d_t *p4dp, p4d_t *user_p4dp, p4d_t p4dval),
+	    TP_ARGS(p4dp, user_p4dp, p4dval),
 	    TP_STRUCT__entry(
-		    __field(pgd_t *, pgdp)
-		    __field(pgd_t *, user_pgdp)
-		    __field(pgdval_t, pgdval)
-		    ),
-	    TP_fast_assign(__entry->pgdp = pgdp;
-			   __entry->user_pgdp = user_pgdp;
-			   __entry->pgdval = pgdval.pgd),
-	    TP_printk("pgdp %p user_pgdp %p pgdval %0*llx (raw %0*llx)",
-		      __entry->pgdp, __entry->user_pgdp,
-		      (int)sizeof(pgdval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->pgdval)),
-		      (int)sizeof(pgdval_t) * 2, (unsigned long long)__entry->pgdval)
+		    __field(p4d_t *, p4dp)
+		    __field(p4d_t *, user_p4dp)
+		    __field(p4dval_t, p4dval)
+		    ),
+	    TP_fast_assign(__entry->p4dp = p4dp;
+			   __entry->user_p4dp = user_p4dp;
+			   __entry->p4dval = p4d_val(p4dval)),
+	    TP_printk("p4dp %p user_p4dp %p p4dval %0*llx (raw %0*llx)",
+		      __entry->p4dp, __entry->user_p4dp,
+		      (int)sizeof(p4dval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->p4dval)),
+		      (int)sizeof(p4dval_t) * 2, (unsigned long long)__entry->p4dval)
 	);
 
 TRACE_EVENT(xen_mmu_pud_clear,
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/6] x86/mm/pat: Add 5-level paging support
  2017-03-17 18:55 ` [PATCH 3/6] x86/mm/pat: " Kirill A. Shutemov
@ 2017-03-17 19:49   ` Thomas Gleixner
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2017-03-17 19:49 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Linus Torvalds, Andrew Morton, x86, Ingo Molnar, Arnd Bergmann,
	H. Peter Anvin, Andi Kleen, Dave Hansen, Andy Lutomirski,
	Michal Hocko, linux-arch, linux-mm, linux-kernel

On Fri, 17 Mar 2017, Kirill A. Shutemov wrote:

> Straight-forward extension of existing code to support additional page
> table level.

Nicely done!

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h>
  2017-03-17 18:55 ` [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h> Kirill A. Shutemov
@ 2017-03-20 16:21   ` Andrey Ryabinin
  2017-03-22  7:31     ` Ingo Molnar
  0 siblings, 1 reply; 14+ messages in thread
From: Andrey Ryabinin @ 2017-03-20 16:21 UTC (permalink / raw)
  To: Kirill A. Shutemov, Linus Torvalds, Andrew Morton, x86,
	Thomas Gleixner, Ingo Molnar, Arnd Bergmann, H. Peter Anvin
  Cc: Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Dmitry Vyukov



On 03/17/2017 09:55 PM, Kirill A. Shutemov wrote:
> With folded p4d, pgd_clear() is nop. Change clear_pgds() to use
> p4d_clear() instead.
> 

You could probably just use set_pgd(pgd_offset_k(start), __pgd(0)); instead of pgd_clear()
as we already do in arm64.
It's basically pgd_clear() except it's not a nop wih p4d folded.


> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> ---
>  arch/x86/mm/kasan_init_64.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
> index 0a56059a95c7..b775ffd7989d 100644
> --- a/arch/x86/mm/kasan_init_64.c
> +++ b/arch/x86/mm/kasan_init_64.c
> @@ -35,8 +35,19 @@ static int __init map_range(struct range *range)
>  static void __init clear_pgds(unsigned long start,
>  			unsigned long end)
>  {
> -	for (; start < end; start += PGDIR_SIZE)
> -		pgd_clear(pgd_offset_k(start));
> +	pgd_t *pgd;
> +
> +	for (; start < end; start += PGDIR_SIZE) {
> +		pgd = pgd_offset_k(start);
> +		/*
> +		 * With folded p4d, pgd_clear() is nop, use p4d_clear()
> +		 * instead.
> +		 */
> +		if (CONFIG_PGTABLE_LEVELS < 5)
> +			p4d_clear(p4d_offset(pgd, start));
> +		else
> +			pgd_clear(pgd);
> +	}
>  }
>  
>  static void __init kasan_map_early_shadow(pgd_t *pgd)
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h>
  2017-03-20 16:21   ` Andrey Ryabinin
@ 2017-03-22  7:31     ` Ingo Molnar
  2017-03-24  9:07       ` Kirill A. Shutemov
  0 siblings, 1 reply; 14+ messages in thread
From: Ingo Molnar @ 2017-03-22  7:31 UTC (permalink / raw)
  To: Andrey Ryabinin
  Cc: Kirill A. Shutemov, Linus Torvalds, Andrew Morton, x86,
	Thomas Gleixner, Ingo Molnar, Arnd Bergmann, H. Peter Anvin,
	Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Dmitry Vyukov


* Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:

> 
> 
> On 03/17/2017 09:55 PM, Kirill A. Shutemov wrote:
> > With folded p4d, pgd_clear() is nop. Change clear_pgds() to use
> > p4d_clear() instead.
> > 
> 
> You could probably just use set_pgd(pgd_offset_k(start), __pgd(0)); instead of pgd_clear()
> as we already do in arm64.
> It's basically pgd_clear() except it's not a nop wih p4d folded.

Makes sense. Kirill, if you agree, mind spinning a v2 patch?

Thanks,

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h>
  2017-03-22  7:31     ` Ingo Molnar
@ 2017-03-24  9:07       ` Kirill A. Shutemov
  2017-03-24 10:58         ` Ingo Molnar
  0 siblings, 1 reply; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-24  9:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrey Ryabinin, Kirill A. Shutemov, Linus Torvalds,
	Andrew Morton, x86, Thomas Gleixner, Ingo Molnar, Arnd Bergmann,
	H. Peter Anvin, Andi Kleen, Dave Hansen, Andy Lutomirski,
	Michal Hocko, linux-arch, linux-mm, linux-kernel, Dmitry Vyukov

On Wed, Mar 22, 2017 at 08:31:36AM +0100, Ingo Molnar wrote:
> * Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> > On 03/17/2017 09:55 PM, Kirill A. Shutemov wrote:
> > > With folded p4d, pgd_clear() is nop. Change clear_pgds() to use
> > > p4d_clear() instead.
> > > 
> > 
> > You could probably just use set_pgd(pgd_offset_k(start), __pgd(0)); instead of pgd_clear()
> > as we already do in arm64.
> > It's basically pgd_clear() except it's not a nop wih p4d folded.
> 
> Makes sense. Kirill, if you agree, mind spinning a v2 patch?

I can re-spin, if you want. But honestly, I don't think such constructs
help readability.

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h>
  2017-03-24  9:07       ` Kirill A. Shutemov
@ 2017-03-24 10:58         ` Ingo Molnar
  0 siblings, 0 replies; 14+ messages in thread
From: Ingo Molnar @ 2017-03-24 10:58 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Andrey Ryabinin, Kirill A. Shutemov, Linus Torvalds,
	Andrew Morton, x86, Thomas Gleixner, Ingo Molnar, Arnd Bergmann,
	H. Peter Anvin, Andi Kleen, Dave Hansen, Andy Lutomirski,
	Michal Hocko, linux-arch, linux-mm, linux-kernel, Dmitry Vyukov


* Kirill A. Shutemov <kirill@shutemov.name> wrote:

> On Wed, Mar 22, 2017 at 08:31:36AM +0100, Ingo Molnar wrote:
> > * Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> > > On 03/17/2017 09:55 PM, Kirill A. Shutemov wrote:
> > > > With folded p4d, pgd_clear() is nop. Change clear_pgds() to use
> > > > p4d_clear() instead.
> > > > 
> > > 
> > > You could probably just use set_pgd(pgd_offset_k(start), __pgd(0)); instead of pgd_clear()
> > > as we already do in arm64.
> > > It's basically pgd_clear() except it's not a nop wih p4d folded.
> > 
> > Makes sense. Kirill, if you agree, mind spinning a v2 patch?
> 
> I can re-spin, if you want. But honestly, I don't think such constructs
> help readability.

Good point - I'll keep it as-is unless convinced otherwise.

Thanks,

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/6] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d
  2017-03-17 18:55 ` [PATCH 5/6] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d Kirill A. Shutemov
@ 2017-03-27  6:34   ` Ingo Molnar
  2017-03-27 13:13     ` Kirill A. Shutemov
  0 siblings, 1 reply; 14+ messages in thread
From: Ingo Molnar @ 2017-03-27  6:34 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Linus Torvalds, Andrew Morton, x86, Thomas Gleixner, Ingo Molnar,
	Arnd Bergmann, H. Peter Anvin, Andi Kleen, Dave Hansen,
	Andy Lutomirski, Michal Hocko, linux-arch, linux-mm, linux-kernel,
	Xiong Zhang

y
* Kirill A. Shutemov <kirill.shutemov@linux.intel.com> wrote:

> Split these helpers into few per-level functions and add support of
> additional page table level.
> 
> Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
> [kirill.shutemov@linux.intel.com: split off into separate patch]
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

So who's the primary author of this patch, you or Xiong Zhang? If the latter then 
a "From: " line is missing. For now I've added the missing "From" line.

Thanks,

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/6] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d
  2017-03-27  6:34   ` Ingo Molnar
@ 2017-03-27 13:13     ` Kirill A. Shutemov
  0 siblings, 0 replies; 14+ messages in thread
From: Kirill A. Shutemov @ 2017-03-27 13:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Kirill A. Shutemov, Linus Torvalds, Andrew Morton, x86,
	Thomas Gleixner, Ingo Molnar, Arnd Bergmann, H. Peter Anvin,
	Andi Kleen, Dave Hansen, Andy Lutomirski, Michal Hocko,
	linux-arch, linux-mm, linux-kernel, Xiong Zhang

On Mon, Mar 27, 2017 at 08:34:23AM +0200, Ingo Molnar wrote:
> y
> * Kirill A. Shutemov <kirill.shutemov@linux.intel.com> wrote:
> 
> > Split these helpers into few per-level functions and add support of
> > additional page table level.
> > 
> > Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
> > [kirill.shutemov@linux.intel.com: split off into separate patch]
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> 
> So who's the primary author of this patch, you or Xiong Zhang? If the latter then 
> a "From: " line is missing. For now I've added the missing "From" line.

Xiong is author of the changes. I've writetten commit message.

So, you're right, "From:" is missing.

Thanks.

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-03-27 13:13 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-17 18:55 [PATCH 0/6] x86: 5-level paging enabling for v4.12, Part 2 Kirill A. Shutemov
2017-03-17 18:55 ` [PATCH 1/6] x86/kexec: Add 5-level paging support Kirill A. Shutemov
2017-03-17 18:55 ` [PATCH 2/6] x86/efi: " Kirill A. Shutemov
2017-03-17 18:55 ` [PATCH 3/6] x86/mm/pat: " Kirill A. Shutemov
2017-03-17 19:49   ` Thomas Gleixner
2017-03-17 18:55 ` [PATCH 4/6] x86/kasan: Prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h> Kirill A. Shutemov
2017-03-20 16:21   ` Andrey Ryabinin
2017-03-22  7:31     ` Ingo Molnar
2017-03-24  9:07       ` Kirill A. Shutemov
2017-03-24 10:58         ` Ingo Molnar
2017-03-17 18:55 ` [PATCH 5/6] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d Kirill A. Shutemov
2017-03-27  6:34   ` Ingo Molnar
2017-03-27 13:13     ` Kirill A. Shutemov
2017-03-17 18:55 ` [PATCH 6/6] x86: Convert the rest of the code to support p4d_t Kirill A. Shutemov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).