linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  2017-10-20 19:59 [PATCH 0/4] Boot-time switching between 4- and 5-level paging for 4.15, Part 2 Kirill A. Shutemov
@ 2017-10-20 19:59 ` Kirill A. Shutemov
  0 siblings, 0 replies; 7+ messages in thread
From: Kirill A. Shutemov @ 2017-10-20 19:59 UTC (permalink / raw)
  To: Ingo Molnar, Linus Torvalds, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Andy Lutomirski, Cyrill Gorcunov, Borislav Petkov, Andi Kleen,
	linux-mm, linux-kernel, Kirill A. Shutemov

This patch addresses shortcoming in current boot process on machines
that supports 5-level paging.

If bootloader enables 64-bit mode with 4-level paging, we need to
switch over to 5-level paging. The switching requires disabling paging.
It works fine if kernel itself is loaded below 4G.

If bootloader put the kernel above 4G (not sure if anybody does this),
we would loose control as soon as paging is disabled as code becomes
unreachable.

This patch implements trampoline in lower memory to handle this
situation.

We only need the memory for very short time, until main kernel image
setup its own page tables.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/head_64.S | 72 ++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 4d1555b39de0..e8331f5a77f4 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -32,6 +32,7 @@
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
 #include <asm/bootparam.h>
+#include "pagetable.h"
 
 /*
  * Locally defined symbols should be marked hidden:
@@ -288,6 +289,19 @@ ENTRY(startup_64)
 	leaq	boot_stack_end(%rbx), %rsp
 
 #ifdef CONFIG_X86_5LEVEL
+/*
+ * We need trampoline in lower memory switch from 4- to 5-level paging for
+ * cases when bootloader put kernel above 4G, but didn't enable 5-level paging
+ * for us.
+ *
+ * We also have to have top page table in lower memory as we don't have a way
+ * to load 64-bit value into CR3 from 32-bit mode. We only need 8-bytes there
+ * as we only use the very first entry of the page table, but we allocate whole
+ * page anyway. We cannot have the code in the same because, there's hazard
+ * that a CPU would read page table speculatively and get confused seeing
+ * garbage.
+ */
+
 	/*
 	 * Check if we need to enable 5-level paging.
 	 * RSI holds real mode data and need to be preserved across
@@ -309,8 +323,8 @@ ENTRY(startup_64)
 	 * long mode would trigger #GP. So we need to switch off long mode
 	 * first.
 	 *
-	 * NOTE: This is not going to work if bootloader put us above 4G
-	 * limit.
+	 * We use trampoline in lower memory to handle situation when
+	 * bootloader put the kernel image above 4G.
 	 *
 	 * The first step is go into compatibility mode.
 	 */
@@ -327,26 +341,20 @@ ENTRY(startup_64)
 	popq	%rsi
 	movq	%rax, %rcx
 
-	/* Clear additional page table */
-	leaq	lvl5_pgtable(%rbx), %rdi
-	xorq	%rax, %rax
-	movq	$(PAGE_SIZE/8), %rcx
-	rep	stosq
-
 	/*
-	 * Setup current CR3 as the first and only entry in a new top level
-	 * page table.
+	 * Load address of lvl5 into RDI.
+	 * It will be used to return address from trampoline.
 	 */
-	movq	%cr3, %rdi
-	leaq	0x7 (%rdi), %rax
-	movq	%rax, lvl5_pgtable(%rbx)
+	leaq	lvl5(%rip), %rdi
 
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
-	leaq	compatible_mode(%rip), %rax
+	leaq	LVL5_TRAMPOLINE_CODE_OFF(%rcx), %rax
 	pushq	%rax
 	lretq
 lvl5:
+	/* Restore stack, 32-bit trampoline uses own stack */
+	leaq	boot_stack_end(%rbx), %rsp
 #endif
 
 	/* Zero EFLAGS */
@@ -484,22 +492,30 @@ relocated:
  */
 	jmp	*%rax
 
-	.code32
 #ifdef CONFIG_X86_5LEVEL
+	.code32
+/*
+ * This is 32-bit trampoline that will be copied over to low memory.
+ *
+ * RDI contains return address (might be above 4G).
+ * ECX contains the base address of trampoline memory.
+ */
 ENTRY(lvl5_trampoline_src)
-compatible_mode:
 	/* Setup data and stack segments */
 	movl	$__KERNEL_DS, %eax
 	movl	%eax, %ds
 	movl	%eax, %ss
 
+	/* Setup new stack at the end of trampoline memory */
+	leal	LVL5_TRAMPOLINE_STACK_END (%ecx), %esp
+
 	/* Disable paging */
 	movl	%cr0, %eax
 	btrl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
 
 	/* Point CR3 to 5-level paging */
-	leal	lvl5_pgtable(%ebx), %eax
+	leal	LVL5_TRAMPOLINE_PGTABLE_OFF (%ecx), %eax
 	movl	%eax, %cr3
 
 	/* Enable PAE and LA57 mode */
@@ -507,23 +523,29 @@ compatible_mode:
 	orl	$(X86_CR4_PAE | X86_CR4_LA57), %eax
 	movl	%eax, %cr4
 
-	/* Calculate address we are running at */
-	call	1f
-1:	popl	%edi
-	subl	$1b, %edi
+	/* Calculate address of lvl5_enabled once we are in trampoline */
+	leal	lvl5_enabled - lvl5_trampoline_src + LVL5_TRAMPOLINE_CODE_OFF (%ecx), %eax
 
 	/* Prepare stack for far return to Long Mode */
 	pushl	$__KERNEL_CS
-	leal	lvl5(%edi), %eax
-	push	%eax
+	pushl	%eax
 
 	/* Enable paging back */
 	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
 	movl	%eax, %cr0
 
 	lret
+
+	.code64
+lvl5_enabled:
+	/* Return from trampoline */
+	jmp	*%rdi
+
+	/* Bound size of trampoline code */
+	.org	lvl5_trampoline_src + LVL5_TRAMPOLINE_CODE_SIZE
 #endif
 
+	.code32
 no_longmode:
 	/* This isn't an x86-64 CPU so hang */
 1:
@@ -581,7 +603,3 @@ boot_stack_end:
 	.balign 4096
 pgtable:
 	.fill BOOT_PGT_SIZE, 1, 0
-#ifdef CONFIG_X86_5LEVEL
-lvl5_pgtable:
-	.fill PAGE_SIZE, 1, 0
-#endif
-- 
2.14.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  2017-11-01 11:54 [PATCH 0/4] x86: 5-level related changes into decompression code Kirill A. Shutemov
@ 2017-11-01 11:55 ` Kirill A. Shutemov
  0 siblings, 0 replies; 7+ messages in thread
From: Kirill A. Shutemov @ 2017-11-01 11:55 UTC (permalink / raw)
  To: Ingo Molnar, Linus Torvalds, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Andy Lutomirski, Cyrill Gorcunov, Borislav Petkov, Andi Kleen,
	linux-mm, linux-kernel, Kirill A. Shutemov

This patch addresses shortcoming in current boot process on machines
that supports 5-level paging.

If bootloader enables 64-bit mode with 4-level paging, we need to
switch over to 5-level paging. The switching requires disabling paging.
It works fine if kernel itself is loaded below 4G.

If bootloader put the kernel above 4G (not sure if anybody does this),
we would loose control as soon as paging is disabled as code becomes
unreachable.

This patch implements trampoline in lower memory to handle this
situation.

We only need the memory for very short time, until main kernel image
setup its own page tables.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/head_64.S | 72 ++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 4d1555b39de0..e8331f5a77f4 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -32,6 +32,7 @@
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
 #include <asm/bootparam.h>
+#include "pagetable.h"
 
 /*
  * Locally defined symbols should be marked hidden:
@@ -288,6 +289,19 @@ ENTRY(startup_64)
 	leaq	boot_stack_end(%rbx), %rsp
 
 #ifdef CONFIG_X86_5LEVEL
+/*
+ * We need trampoline in lower memory switch from 4- to 5-level paging for
+ * cases when bootloader put kernel above 4G, but didn't enable 5-level paging
+ * for us.
+ *
+ * We also have to have top page table in lower memory as we don't have a way
+ * to load 64-bit value into CR3 from 32-bit mode. We only need 8-bytes there
+ * as we only use the very first entry of the page table, but we allocate whole
+ * page anyway. We cannot have the code in the same because, there's hazard
+ * that a CPU would read page table speculatively and get confused seeing
+ * garbage.
+ */
+
 	/*
 	 * Check if we need to enable 5-level paging.
 	 * RSI holds real mode data and need to be preserved across
@@ -309,8 +323,8 @@ ENTRY(startup_64)
 	 * long mode would trigger #GP. So we need to switch off long mode
 	 * first.
 	 *
-	 * NOTE: This is not going to work if bootloader put us above 4G
-	 * limit.
+	 * We use trampoline in lower memory to handle situation when
+	 * bootloader put the kernel image above 4G.
 	 *
 	 * The first step is go into compatibility mode.
 	 */
@@ -327,26 +341,20 @@ ENTRY(startup_64)
 	popq	%rsi
 	movq	%rax, %rcx
 
-	/* Clear additional page table */
-	leaq	lvl5_pgtable(%rbx), %rdi
-	xorq	%rax, %rax
-	movq	$(PAGE_SIZE/8), %rcx
-	rep	stosq
-
 	/*
-	 * Setup current CR3 as the first and only entry in a new top level
-	 * page table.
+	 * Load address of lvl5 into RDI.
+	 * It will be used to return address from trampoline.
 	 */
-	movq	%cr3, %rdi
-	leaq	0x7 (%rdi), %rax
-	movq	%rax, lvl5_pgtable(%rbx)
+	leaq	lvl5(%rip), %rdi
 
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
-	leaq	compatible_mode(%rip), %rax
+	leaq	LVL5_TRAMPOLINE_CODE_OFF(%rcx), %rax
 	pushq	%rax
 	lretq
 lvl5:
+	/* Restore stack, 32-bit trampoline uses own stack */
+	leaq	boot_stack_end(%rbx), %rsp
 #endif
 
 	/* Zero EFLAGS */
@@ -484,22 +492,30 @@ relocated:
  */
 	jmp	*%rax
 
-	.code32
 #ifdef CONFIG_X86_5LEVEL
+	.code32
+/*
+ * This is 32-bit trampoline that will be copied over to low memory.
+ *
+ * RDI contains return address (might be above 4G).
+ * ECX contains the base address of trampoline memory.
+ */
 ENTRY(lvl5_trampoline_src)
-compatible_mode:
 	/* Setup data and stack segments */
 	movl	$__KERNEL_DS, %eax
 	movl	%eax, %ds
 	movl	%eax, %ss
 
+	/* Setup new stack at the end of trampoline memory */
+	leal	LVL5_TRAMPOLINE_STACK_END (%ecx), %esp
+
 	/* Disable paging */
 	movl	%cr0, %eax
 	btrl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
 
 	/* Point CR3 to 5-level paging */
-	leal	lvl5_pgtable(%ebx), %eax
+	leal	LVL5_TRAMPOLINE_PGTABLE_OFF (%ecx), %eax
 	movl	%eax, %cr3
 
 	/* Enable PAE and LA57 mode */
@@ -507,23 +523,29 @@ compatible_mode:
 	orl	$(X86_CR4_PAE | X86_CR4_LA57), %eax
 	movl	%eax, %cr4
 
-	/* Calculate address we are running at */
-	call	1f
-1:	popl	%edi
-	subl	$1b, %edi
+	/* Calculate address of lvl5_enabled once we are in trampoline */
+	leal	lvl5_enabled - lvl5_trampoline_src + LVL5_TRAMPOLINE_CODE_OFF (%ecx), %eax
 
 	/* Prepare stack for far return to Long Mode */
 	pushl	$__KERNEL_CS
-	leal	lvl5(%edi), %eax
-	push	%eax
+	pushl	%eax
 
 	/* Enable paging back */
 	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
 	movl	%eax, %cr0
 
 	lret
+
+	.code64
+lvl5_enabled:
+	/* Return from trampoline */
+	jmp	*%rdi
+
+	/* Bound size of trampoline code */
+	.org	lvl5_trampoline_src + LVL5_TRAMPOLINE_CODE_SIZE
 #endif
 
+	.code32
 no_longmode:
 	/* This isn't an x86-64 CPU so hang */
 1:
@@ -581,7 +603,3 @@ boot_stack_end:
 	.balign 4096
 pgtable:
 	.fill BOOT_PGT_SIZE, 1, 0
-#ifdef CONFIG_X86_5LEVEL
-lvl5_pgtable:
-	.fill PAGE_SIZE, 1, 0
-#endif
-- 
2.14.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 0/4] x86/boot/compressed/64: Switch between paging modes using trampoline
@ 2018-03-12 10:02 Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 1/4] x86/boot/compressed/64: Make sure we have 32-bit code segment Kirill A. Shutemov
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Kirill A. Shutemov @ 2018-03-12 10:02 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Linus Torvalds, Andy Lutomirski, Cyrill Gorcunov, Borislav Petkov,
	Andi Kleen, Matthew Wilcox, linux-mm, linux-kernel,
	Kirill A. Shutemov

This patchset changes kernel decompression code to use trampoline to
switch between paging modes.

The patchset is replacement for previously reverted patch "Handle 5-level
paging boot if kernel is above 4G".

Please review and consider applying.

Kirill A. Shutemov (4):
  x86/boot/compressed/64: Make sure we have 32-bit code segment
  x86/boot/compressed/64: Use stack from trampoline memory
  x86/boot/compressed/64: Use page table in trampoline memory
  x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G

 arch/x86/boot/compressed/head_64.S | 128 ++++++++++++++++++++++++++-----------
 1 file changed, 90 insertions(+), 38 deletions(-)

-- 
2.16.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/4] x86/boot/compressed/64: Make sure we have 32-bit code segment
  2018-03-12 10:02 [PATCH 0/4] x86/boot/compressed/64: Switch between paging modes using trampoline Kirill A. Shutemov
@ 2018-03-12 10:02 ` Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 2/4] x86/boot/compressed/64: Use stack from trampoline memory Kirill A. Shutemov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Kirill A. Shutemov @ 2018-03-12 10:02 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Linus Torvalds, Andy Lutomirski, Cyrill Gorcunov, Borislav Petkov,
	Andi Kleen, Matthew Wilcox, linux-mm, linux-kernel,
	Kirill A. Shutemov

When kernel starts in 64-bit mode we inherit GDT from a bootloader.
It may cause a problem if the GDT doesn't have 32-bit code segment
where we expect it to be.

Load our own GDT with known segments.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/head_64.S | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index c813cb004056..f0c3a2f7e528 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -313,6 +313,11 @@ ENTRY(startup_64)
 	 * first.
 	 */
 
+	/* Make sure we have GDT with 32-bit code segment */
+	leaq	gdt(%rip), %rax
+	movq	%rax, gdt64+2(%rip)
+	lgdt	gdt64(%rip)
+
 	/*
 	 * paging_prepare() sets up the trampoline and checks if we need to
 	 * enable 5-level paging.
@@ -547,6 +552,11 @@ no_longmode:
 #include "../../kernel/verify_cpu.S"
 
 	.data
+gdt64:
+	.word	gdt_end - gdt
+	.long	0
+	.word	0
+	.quad   0
 gdt:
 	.word	gdt_end - gdt
 	.long	gdt
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/4] x86/boot/compressed/64: Use stack from trampoline memory
  2018-03-12 10:02 [PATCH 0/4] x86/boot/compressed/64: Switch between paging modes using trampoline Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 1/4] x86/boot/compressed/64: Make sure we have 32-bit code segment Kirill A. Shutemov
@ 2018-03-12 10:02 ` Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 3/4] x86/boot/compressed/64: Use page table in " Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G Kirill A. Shutemov
  3 siblings, 0 replies; 7+ messages in thread
From: Kirill A. Shutemov @ 2018-03-12 10:02 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Linus Torvalds, Andy Lutomirski, Cyrill Gorcunov, Borislav Petkov,
	Andi Kleen, Matthew Wilcox, linux-mm, linux-kernel,
	Kirill A. Shutemov

As the first step on using trampoline memory, let's make 32-bit code use
stack there.

Separate stack is required to return back from trampoline and we cannot
user stack from 64-bit mode as it may be above 4G.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/head_64.S | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f0c3a2f7e528..0014459d9bcb 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
 #include <asm/bootparam.h>
+#include "pgtable.h"
 
 /*
  * Locally defined symbols should be marked hidden:
@@ -359,6 +360,8 @@ ENTRY(startup_64)
 	pushq	%rax
 	lretq
 lvl5:
+	/* Restore the stack, the 32-bit trampoline uses its own stack */
+	leaq	boot_stack_end(%rbx), %rsp
 
 	/*
 	 * cleanup_trampoline() would restore trampoline memory.
@@ -513,6 +516,9 @@ compatible_mode:
 	movl	%eax, %ds
 	movl	%eax, %ss
 
+	/* Setup new stack */
+	leal	TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
+
 	/* Disable paging */
 	movl	%cr0, %eax
 	btrl	$X86_CR0_PG_BIT, %eax
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/4] x86/boot/compressed/64: Use page table in trampoline memory
  2018-03-12 10:02 [PATCH 0/4] x86/boot/compressed/64: Switch between paging modes using trampoline Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 1/4] x86/boot/compressed/64: Make sure we have 32-bit code segment Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 2/4] x86/boot/compressed/64: Use stack from trampoline memory Kirill A. Shutemov
@ 2018-03-12 10:02 ` Kirill A. Shutemov
  2018-03-12 10:02 ` [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G Kirill A. Shutemov
  3 siblings, 0 replies; 7+ messages in thread
From: Kirill A. Shutemov @ 2018-03-12 10:02 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Linus Torvalds, Andy Lutomirski, Cyrill Gorcunov, Borislav Petkov,
	Andi Kleen, Matthew Wilcox, linux-mm, linux-kernel,
	Kirill A. Shutemov

If a bootloader enables 64-bit mode with 4-level paging, we might need to
switch over to 5-level paging. The switching requires the disabling
paging. It works fine if kernel itself is loaded below 4G.

But if the bootloader put the kernel above 4G (i.e. in kexec() case),
we would lose control as soon as paging is disabled, because the code
becomes unreachable to the CPU.

To handle the situation, we need a trampoline in lower memory that would
take care of switching on 5-level paging.

Apart from the trampoline code itself we also need a place to store
top-level page table in lower memory as we don't have a way to load
64-bit values into CR3 in 32-bit mode. We only really need 8 bytes there
as we only use the very first entry of the page table. But we allocate a
whole page anyway.

This patch switches 32-bit code to use page table in trampoline memory.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/head_64.S | 47 +++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 0014459d9bcb..836ed319e995 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -336,23 +336,6 @@ ENTRY(startup_64)
 	/* Save the trampoline address in RCX */
 	movq	%rax, %rcx
 
-	/* Check if we need to enable 5-level paging */
-	cmpq	$0, %rdx
-	jz	lvl5
-
-	/* Clear additional page table */
-	leaq	lvl5_pgtable(%rbx), %rdi
-	xorq	%rax, %rax
-	movq	$(PAGE_SIZE/8), %rcx
-	rep	stosq
-
-	/*
-	 * Setup current CR3 as the first and only entry in a new top level
-	 * page table.
-	 */
-	movq	%cr3, %rdi
-	leaq	0x7 (%rdi), %rax
-	movq	%rax, lvl5_pgtable(%rbx)
 
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
@@ -524,13 +507,31 @@ compatible_mode:
 	btrl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
 
-	/* Point CR3 to 5-level paging */
-	leal	lvl5_pgtable(%ebx), %eax
-	movl	%eax, %cr3
+	/* Check what paging mode we want to be in after the trampoline */
+	cmpl	$0, %edx
+	jz	1f
 
-	/* Enable PAE and LA57 mode */
+	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
 	movl	%cr4, %eax
-	orl	$(X86_CR4_PAE | X86_CR4_LA57), %eax
+	testl	$X86_CR4_LA57, %eax
+	jnz	3f
+	jmp	2f
+1:
+	/* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
+	movl	%cr4, %eax
+	testl	$X86_CR4_LA57, %eax
+	jz	3f
+2:
+	/* Point CR3 to the trampoline's new top level page table */
+	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+	movl	%eax, %cr3
+3:
+	/* Enable PAE and LA57 (if required) paging modes */
+	movl	$X86_CR4_PAE, %eax
+	cmpl	$0, %edx
+	jz	1f
+	orl	$X86_CR4_LA57, %eax
+1:
 	movl	%eax, %cr4
 
 	/* Calculate address we are running at */
@@ -611,5 +612,3 @@ boot_stack_end:
 	.balign 4096
 pgtable:
 	.fill BOOT_PGT_SIZE, 1, 0
-lvl5_pgtable:
-	.fill PAGE_SIZE, 1, 0
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  2018-03-12 10:02 [PATCH 0/4] x86/boot/compressed/64: Switch between paging modes using trampoline Kirill A. Shutemov
                   ` (2 preceding siblings ...)
  2018-03-12 10:02 ` [PATCH 3/4] x86/boot/compressed/64: Use page table in " Kirill A. Shutemov
@ 2018-03-12 10:02 ` Kirill A. Shutemov
  3 siblings, 0 replies; 7+ messages in thread
From: Kirill A. Shutemov @ 2018-03-12 10:02 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Linus Torvalds, Andy Lutomirski, Cyrill Gorcunov, Borislav Petkov,
	Andi Kleen, Matthew Wilcox, linux-mm, linux-kernel,
	Kirill A. Shutemov

This patch addresses a shortcoming in current boot process on machines
that supports 5-level paging.

If a bootloader enables 64-bit mode with 4-level paging, we might need to
switch over to 5-level paging. The switching requires the disabling
paging. It works fine if kernel itself is loaded below 4G.

But if the bootloader put the kernel above 4G (not sure if anybody does
this), we would lose control as soon as paging is disabled, because the
code becomes unreachable to the CPU.

This patch implements a trampoline in lower memory to handle this
situation.

We only need the memory for a very short time, until the main kernel
image sets up own page tables.

We go through the trampoline even if we don't have to: if we're already
in 5-level paging mode or if we don't need to switch to it. This way the
trampoline gets tested on every boot.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/head_64.S | 69 +++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 16 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 836ed319e995..33d7e72f3943 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -307,11 +307,27 @@ ENTRY(startup_64)
 
 	/*
 	 * At this point we are in long mode with 4-level paging enabled,
-	 * but we want to enable 5-level paging.
+	 * but we might want to enable 5-level paging or vice versa.
 	 *
-	 * The problem is that we cannot do it directly. Setting LA57 in
-	 * long mode would trigger #GP. So we need to switch off long mode
-	 * first.
+	 * The problem is that we cannot do it directly. Setting or clearing
+	 * CR4.LA57 in long mode would trigger #GP. So we need to switch off
+	 * long mode and paging first.
+	 *
+	 * We also need a trampoline in lower memory to switch over from
+	 * 4- to 5-level paging for cases when the bootloader puts the kernel
+	 * above 4G, but didn't enable 5-level paging for us.
+	 *
+	 * The same trampoline can be used to switch from 5- to 4-level paging
+	 * mode, like when starting 4-level paging kernel via kexec() when
+	 * original kernel worked in 5-level paging mode.
+	 *
+	 * For the trampoline, we need the top page table to reside in lower
+	 * memory as we don't have a way to load 64-bit values into CR3 in
+	 * 32-bit mode.
+	 *
+	 * We go though the trampoline even if we don't have to: if we're
+	 * already in a desired paging mode. This way the trampoline code gets
+	 * tested on every boot.
 	 */
 
 	/* Make sure we have GDT with 32-bit code segment */
@@ -336,13 +352,18 @@ ENTRY(startup_64)
 	/* Save the trampoline address in RCX */
 	movq	%rax, %rcx
 
+	/*
+	 * Load the address of trampoline_return() into RDI.
+	 * It will be used by the trampoline to return to the main code.
+	 */
+	leaq	trampoline_return(%rip), %rdi
 
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
-	leaq	compatible_mode(%rip), %rax
+	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
 	pushq	%rax
 	lretq
-lvl5:
+trampoline_return:
 	/* Restore the stack, the 32-bit trampoline uses its own stack */
 	leaq	boot_stack_end(%rbx), %rsp
 
@@ -492,8 +513,14 @@ relocated:
 	jmp	*%rax
 
 	.code32
+/*
+ * This is the 32-bit trampoline that will be copied over to low memory.
+ *
+ * RDI contains the return address (might be above 4G).
+ * ECX contains the base address of the trampoline memory.
+ * Non zero RDX on return means we need to enable 5-level paging.
+ */
 ENTRY(trampoline_32bit_src)
-compatible_mode:
 	/* Set up data and stack segments */
 	movl	$__KERNEL_DS, %eax
 	movl	%eax, %ds
@@ -534,24 +561,34 @@ compatible_mode:
 1:
 	movl	%eax, %cr4
 
-	/* Calculate address we are running at */
-	call	1f
-1:	popl	%edi
-	subl	$1b, %edi
+	/* Calculate address of paging_enabled() once we are executing in the trampoline */
+	leal	paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
 
-	/* Prepare stack for far return to Long Mode */
+	/* Prepare the stack for far return to Long Mode */
 	pushl	$__KERNEL_CS
-	leal	lvl5(%edi), %eax
-	push	%eax
+	pushl	%eax
 
-	/* Enable paging back */
+	/* Enable paging again */
 	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
 	movl	%eax, %cr0
 
 	lret
 
+	.code64
+paging_enabled:
+	/* Return from the trampoline */
+	jmp	*%rdi
+
+	/*
+         * The trampoline code has a size limit.
+         * Make sure we fail to compile if the trampoline code grows
+         * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
+	 */
+	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+	.code32
 no_longmode:
-	/* This isn't an x86-64 CPU so hang */
+	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
 1:
 	hlt
 	jmp     1b
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-03-12 10:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-03-12 10:02 [PATCH 0/4] x86/boot/compressed/64: Switch between paging modes using trampoline Kirill A. Shutemov
2018-03-12 10:02 ` [PATCH 1/4] x86/boot/compressed/64: Make sure we have 32-bit code segment Kirill A. Shutemov
2018-03-12 10:02 ` [PATCH 2/4] x86/boot/compressed/64: Use stack from trampoline memory Kirill A. Shutemov
2018-03-12 10:02 ` [PATCH 3/4] x86/boot/compressed/64: Use page table in " Kirill A. Shutemov
2018-03-12 10:02 ` [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G Kirill A. Shutemov
  -- strict thread matches above, loose matches on Subject: below --
2017-11-01 11:54 [PATCH 0/4] x86: 5-level related changes into decompression code Kirill A. Shutemov
2017-11-01 11:55 ` [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G Kirill A. Shutemov
2017-10-20 19:59 [PATCH 0/4] Boot-time switching between 4- and 5-level paging for 4.15, Part 2 Kirill A. Shutemov
2017-10-20 19:59 ` [PATCH 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G Kirill A. Shutemov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).