* [PATCH 1/6] arm64: kvm: add a cpu tear-down function
2015-10-12 13:17 [PATCH 0/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
@ 2015-10-12 13:17 ` James Morse
2015-10-12 13:17 ` [PATCH 2/6] arm64: Fold proc-macros.S into assembler.h James Morse
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: James Morse @ 2015-10-12 13:17 UTC (permalink / raw)
To: linux-arm-kernel
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
The CPU must be put back into its initial state, at least, in the
following cases in order to shutdown the system and/or re-initialize CPUs
later on:
1) kexec/kdump
2) cpu hotplug (offline)
3) removing kvm as a module
4) resume from hibernate (pgd+stack moved)
To address those issues in later patches, this patch adds a tear-down
function, kvm_cpu_reset(), that disables the MMU and restores the vector
table to the initial stub at EL2.
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
[use kvm_call_hyp(), simplified mmu-off code]
Signed-off-by: James Morse <james.morse@arm.com>
---
This is based on v4 from
http://lists.infradead.org/pipermail/kexec/2015-May/013709.html.
This patch is superseded by a v5 [0], but its changes to the cpu hotplug
hook are causing a problem.
[0] https://lists.linaro.org/pipermail/linaro-kernel/2015-May/021575.html
arch/arm/include/asm/kvm_asm.h | 1 +
arch/arm/include/asm/kvm_host.h | 7 +++++++
arch/arm/include/asm/kvm_mmu.h | 7 +++++++
arch/arm/kvm/arm.c | 18 ++++++++++++++++++
arch/arm/kvm/init.S | 5 +++++
arch/arm/kvm/mmu.c | 7 +++++--
arch/arm64/include/asm/kvm_asm.h | 1 +
arch/arm64/include/asm/kvm_host.h | 8 ++++++++
arch/arm64/include/asm/kvm_mmu.h | 7 +++++++
arch/arm64/kvm/hyp-init.S | 37 +++++++++++++++++++++++++++++++++++++
10 files changed, 96 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 194c91b610ff..6ecd59127f3f 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -85,6 +85,7 @@ struct kvm_vcpu;
extern char __kvm_hyp_init[];
extern char __kvm_hyp_init_end[];
+extern char __kvm_hyp_reset[];
extern char __kvm_hyp_exit[];
extern char __kvm_hyp_exit_end[];
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c4072d9f32c7..f27d45f9e346 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_reset_cpu(void);
void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
struct kvm_arch {
@@ -211,6 +212,12 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+ phys_addr_t phys_idmap_start,
+ unsigned long reset_func)
+{
+}
+
static inline int kvm_arch_dev_ioctl_check_extension(long ext)
{
return 0;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 405aa1883307..64201f4f2de8 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -66,6 +66,8 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_mmu_get_boot_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
+extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
@@ -269,6 +271,11 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
+#define kvm_virt_to_trampoline(x) \
+ (TRAMPOLINE_VA \
+ + ((unsigned long)(x) \
+ - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK)))
+
static inline bool __kvm_cpu_uses_extended_idmap(void)
{
return false;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dc017adfddc8..f145c4453893 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -937,6 +937,24 @@ static void cpu_init_hyp_mode(void *dummy)
kvm_arm_init_debug();
}
+void kvm_reset_cpu(void)
+{
+ phys_addr_t boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+ phys_addr_t phys_idmap_start = kvm_get_idmap_start();
+
+ /* Is KVM initialised? */
+ if (boot_pgd_ptr == virt_to_phys(NULL) ||
+ phys_idmap_start == virt_to_phys(NULL))
+ return;
+
+ /* Do we need to return the vectors to hyp_default_vectors? */
+ if (__hyp_get_vectors() == hyp_default_vectors)
+ return;
+
+ __cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start,
+ kvm_virt_to_trampoline(__kvm_hyp_reset));
+}
+
static int hyp_init_cpu_notify(struct notifier_block *self,
unsigned long action, void *cpu)
{
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 3988e72d16ff..23bdeac287da 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -151,6 +151,11 @@ target: @ We're now in the trampoline code, switch page tables
eret
+ .globl __kvm_hyp_reset
+__kvm_hyp_reset:
+ /* not yet implemented */
+ ret lr
+
.ltorg
.globl __kvm_hyp_init_end
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 6984342da13d..88e7d29d8da8 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -31,8 +31,6 @@
#include "trace.h"
-extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
-
static pgd_t *boot_hyp_pgd;
static pgd_t *hyp_pgd;
static pgd_t *merged_hyp_pgd;
@@ -1644,6 +1642,11 @@ phys_addr_t kvm_get_idmap_vector(void)
return hyp_idmap_vector;
}
+phys_addr_t kvm_get_idmap_start(void)
+{
+ return hyp_idmap_start;
+}
+
int kvm_mmu_init(void)
{
int err;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 5e377101f919..fae48c9584c3 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -108,6 +108,7 @@ struct kvm_vcpu;
extern char __kvm_hyp_init[];
extern char __kvm_hyp_init_end[];
+extern char __kvm_hyp_reset[];
extern char __kvm_hyp_vector[];
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ed039688c221..91157de8a30a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_reset_cpu(void);
int kvm_arch_dev_ioctl_check_extension(long ext);
struct kvm_arch {
@@ -244,6 +245,13 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
hyp_stack_ptr, vector_ptr);
}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+ phys_addr_t phys_idmap_start,
+ unsigned long reset_func)
+{
+ kvm_call_hyp((void *)reset_func, boot_pgd_ptr, phys_idmap_start);
+}
+
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 61505676d085..31c52e3bc518 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -98,6 +98,8 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_mmu_get_boot_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
+extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
@@ -271,6 +273,11 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
+#define kvm_virt_to_trampoline(x) \
+ (TRAMPOLINE_VA \
+ + ((unsigned long)(x) \
+ - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK)))
+
static inline bool __kvm_cpu_uses_extended_idmap(void)
{
return __cpu_uses_extended_idmap();
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 178ba2248a98..009a9ffdfca3 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -140,6 +140,43 @@ merged:
eret
ENDPROC(__kvm_hyp_init)
+ /*
+ * x0: HYP boot pgd
+ * x1: HYP phys_idmap_start
+ */
+ENTRY(__kvm_hyp_reset)
+ /*
+ * Restore el1's lr so we can eret from here. The stack is inaccessible
+ * after we turn the mmu off. This value was pushed in el1_sync().
+ */
+ pop lr, xzr
+
+ /* We're in trampoline code in VA, switch back to boot page tables */
+ msr ttbr0_el2, x0
+ isb
+
+ /* Invalidate the old TLBs */
+ tlbi alle2
+ dsb sy
+
+ /* Branch into PA space */
+ adr x0, 1f
+ bfi x1, x0, #0, #PAGE_SHIFT
+ br x1
+
+ /* We're now in idmap, disable MMU */
+1: mrs x0, sctlr_el2
+ bic x0, x0, #SCTLR_EL2_M
+ msr sctlr_el2, x0
+ isb
+
+ /* Install stub vectors */
+ adr_l x2, __hyp_stub_vectors
+ msr vbar_el2, x2
+
+ eret
+ENDPROC(__kvm_hyp_reset)
+
.ltorg
.popsection
--
2.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/6] arm64: Fold proc-macros.S into assembler.h
2015-10-12 13:17 [PATCH 0/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
2015-10-12 13:17 ` [PATCH 1/6] arm64: kvm: add a cpu tear-down function James Morse
@ 2015-10-12 13:17 ` James Morse
2015-10-12 13:17 ` [PATCH 3/6] arm64: kernel: Rework finisher callback out of __cpu_suspend_enter() James Morse
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: James Morse @ 2015-10-12 13:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Geoff Levand <geoff@infradead.org>
To allow the assembler macros defined in arch/arm64/mm/proc-macros.S to
be used outside the mm code move the contents of proc-macros.S to
asm/assembler.h. Also, delete proc-macros.S, and fix up all references
to proc-macros.S.
Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: James Morse <james.morse@arm.com>
---
This is v8 from
http://lists.infradead.org/pipermail/kexec/2015-March/013432.html
arch/arm64/include/asm/assembler.h | 48 +++++++++++++++++++++++++++-
arch/arm64/kernel/head.S | 1 -
arch/arm64/kvm/hyp-init.S | 1 -
arch/arm64/mm/cache.S | 2 --
arch/arm64/mm/proc-macros.S | 64 --------------------------------------
arch/arm64/mm/proc.S | 3 --
6 files changed, 47 insertions(+), 72 deletions(-)
delete mode 100644 arch/arm64/mm/proc-macros.S
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index b51f2cc22ca9..91cb311d33de 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -1,5 +1,5 @@
/*
- * Based on arch/arm/include/asm/assembler.h
+ * * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
*
* Copyright (C) 1996-2000 Russell King
* Copyright (C) 2012 ARM Ltd.
@@ -23,6 +23,8 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
+#include <asm/asm-offsets.h>
+#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
@@ -193,4 +195,48 @@ lr .req x30 // link register
str \src, [\tmp, :lo12:\sym]
.endm
+/*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+ .macro vma_vm_mm, rd, rn
+ ldr \rd, [\rn, #VMA_VM_MM]
+ .endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+ .macro mmid, rd, rn
+ ldr \rd, [\rn, #MM_CONTEXT_ID]
+ .endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+ .macro dcache_line_size, reg, tmp
+ mrs \tmp, ctr_el0 // read CTR
+ ubfm \tmp, \tmp, #16, #19 // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register.
+ */
+ .macro icache_line_size, reg, tmp
+ mrs \tmp, ctr_el0 // read CTR
+ and \tmp, \tmp, #0xf // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+ .macro tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+ ldr_l \tmpreg, idmap_t0sz
+ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 90d09eddd5b2..9ad8b1f15b19 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -31,7 +31,6 @@
#include <asm/cputype.h>
#include <asm/memory.h>
#include <asm/thread_info.h>
-#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/virt.h>
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 009a9ffdfca3..72c093eb885b 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,7 +20,6 @@
#include <asm/assembler.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
-#include <asm/pgtable-hwdef.h>
.text
.pushsection .hyp.idmap.text, "ax"
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index eb48d5df4a0f..9e13cb53c927 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -24,8 +24,6 @@
#include <asm/cpufeature.h>
#include <asm/alternative.h>
-#include "proc-macros.S"
-
/*
* flush_icache_range(start,end)
*
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
deleted file mode 100644
index 4c4d93c4bf65..000000000000
--- a/arch/arm64/mm/proc-macros.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Based on arch/arm/mm/proc-macros.S
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
- .macro vma_vm_mm, rd, rn
- ldr \rd, [\rn, #VMA_VM_MM]
- .endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
- .macro mmid, rd, rn
- ldr \rd, [\rn, #MM_CONTEXT_ID]
- .endm
-
-/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
- */
- .macro dcache_line_size, reg, tmp
- mrs \tmp, ctr_el0 // read CTR
- ubfm \tmp, \tmp, #16, #19 // cache line size encoding
- mov \reg, #4 // bytes per word
- lsl \reg, \reg, \tmp // actual cache line size
- .endm
-
-/*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
- */
- .macro icache_line_size, reg, tmp
- mrs \tmp, ctr_el0 // read CTR
- and \tmp, \tmp, #0xf // cache line size encoding
- mov \reg, #4 // bytes per word
- lsl \reg, \reg, \tmp // actual cache line size
- .endm
-
-/*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
- */
- .macro tcr_set_idmap_t0sz, valreg, tmpreg
-#ifndef CONFIG_ARM64_VA_BITS_48
- ldr_l \tmpreg, idmap_t0sz
- bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
-#endif
- .endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index e4ee7bd8830a..456c1c5f8ecd 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -23,11 +23,8 @@
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
-#include "proc-macros.S"
-
#ifdef CONFIG_ARM64_64K_PAGES
#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
#else
--
2.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/6] arm64: kernel: Rework finisher callback out of __cpu_suspend_enter().
2015-10-12 13:17 [PATCH 0/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
2015-10-12 13:17 ` [PATCH 1/6] arm64: kvm: add a cpu tear-down function James Morse
2015-10-12 13:17 ` [PATCH 2/6] arm64: Fold proc-macros.S into assembler.h James Morse
@ 2015-10-12 13:17 ` James Morse
2015-10-20 11:30 ` Lorenzo Pieralisi
2015-10-12 13:17 ` [PATCH 4/6] arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va James Morse
` (2 subsequent siblings)
5 siblings, 1 reply; 9+ messages in thread
From: James Morse @ 2015-10-12 13:17 UTC (permalink / raw)
To: linux-arm-kernel
Hibernate could make use of the cpu_suspend() code to save/restore cpu
state, however it needs to be able to return '0' from the 'finisher'.
Rework cpu_suspend() so that the finisher is called from C code,
independently from the save/restore of cpu state. Space to save the context
in is allocated in the caller's stack frame, and passed into
__cpu_suspend_enter().
Hibernate's use of this API will look like a copy of the cpu_suspend()
function.
Signed-off-by: James Morse <james.morse@arm.com>
---
arch/arm64/include/asm/suspend.h | 8 ++++
arch/arm64/kernel/asm-offsets.c | 2 +
arch/arm64/kernel/sleep.S | 86 +++++++++++++---------------------------
arch/arm64/kernel/suspend.c | 81 ++++++++++++++++++++++---------------
4 files changed, 86 insertions(+), 91 deletions(-)
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 59a5b0f1e81c..a9de0d3f543f 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,6 +2,7 @@
#define __ASM_SUSPEND_H
#define NR_CTX_REGS 11
+#define NR_CALLEE_SAVED_REGS 12
/*
* struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on
@@ -21,6 +22,13 @@ struct sleep_save_sp {
phys_addr_t save_ptr_stash_phys;
};
+struct sleep_stack_data {
+ struct cpu_suspend_ctx system_regs;
+ unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS];
+};
+
extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
extern void cpu_resume(void);
+int __cpu_suspend_enter(struct sleep_stack_data *state);
+void __cpu_suspend_exit(struct mm_struct *mm);
#endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 8d89cf8dae55..5daa4e692932 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -160,6 +160,8 @@ int main(void)
DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
+ DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
+ DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
return 0;
}
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index f586f7c875e2..6182388e32a5 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -50,36 +50,24 @@
.endm
/*
* Save CPU state for a suspend and execute the suspend finisher.
- * On success it will return 0 through cpu_resume - ie through a CPU
- * soft/hard reboot from the reset vector.
- * On failure it returns the suspend finisher return value or force
- * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
- * is not allowed to return, if it does this must be considered failure).
- * It saves callee registers, and allocates space on the kernel stack
- * to save the CPU specific registers + some other data for resume.
+ * This function returns a non-zero value. Resuming through cpu_resume()
+ * will cause 0 to appear to be returned by this function.
*
- * x0 = suspend finisher argument
- * x1 = suspend finisher function pointer
+ * x0 = struct sleep_stack_data area
*/
ENTRY(__cpu_suspend_enter)
- stp x29, lr, [sp, #-96]!
- stp x19, x20, [sp,#16]
- stp x21, x22, [sp,#32]
- stp x23, x24, [sp,#48]
- stp x25, x26, [sp,#64]
- stp x27, x28, [sp,#80]
- /*
- * Stash suspend finisher and its argument in x20 and x19
- */
- mov x19, x0
- mov x20, x1
+ stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
+ stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
+ stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
+ stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48]
+ stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64]
+ stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80]
+
+ /* save the sp in cpu_suspend_ctx */
mov x2, sp
- sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx
- mov x0, sp
- /*
- * x0 now points to struct cpu_suspend_ctx allocated on the stack
- */
- str x2, [x0, #CPU_CTX_SP]
+ str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
+
+ /* find the mpidr_hash */
ldr x1, =sleep_save_sp
ldr x1, [x1, #SLEEP_SAVE_SP_VIRT]
mrs x7, mpidr_el1
@@ -93,34 +81,11 @@ ENTRY(__cpu_suspend_enter)
ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
add x1, x1, x8, lsl #3
+
+ push x29, lr
bl __cpu_suspend_save
- /*
- * Grab suspend finisher in x20 and its argument in x19
- */
- mov x0, x19
- mov x1, x20
- /*
- * We are ready for power down, fire off the suspend finisher
- * in x1, with argument in x0
- */
- blr x1
- /*
- * Never gets here, unless suspend finisher fails.
- * Successful cpu_suspend should return from cpu_resume, returning
- * through this code path is considered an error
- * If the return value is set to 0 force x0 = -EOPNOTSUPP
- * to make sure a proper error condition is propagated
- */
- cmp x0, #0
- mov x3, #-EOPNOTSUPP
- csel x0, x3, x0, eq
- add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
+ pop x29, lr
+ mov x0, #1
ret
ENDPROC(__cpu_suspend_enter)
.ltorg
@@ -146,12 +111,6 @@ ENDPROC(cpu_resume_mmu)
.popsection
cpu_resume_after_mmu:
mov x0, #0 // return zero on success
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
ret
ENDPROC(cpu_resume_after_mmu)
@@ -168,6 +127,8 @@ ENTRY(cpu_resume)
/* x7 contains hash index, let's use it to grab context pointer */
ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
ldr x0, [x0, x7, lsl #3]
+ add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
/* load physical address of identity map page table in x1 */
@@ -178,5 +139,12 @@ ENTRY(cpu_resume)
* pointer and x1 to contain physical address of 1:1 page tables
*/
bl cpu_do_resume // PC relative jump, MMU off
+ /* Can't access these by physical address once the MMU is on */
+ ldp x19, x20, [x29, #16]
+ ldp x21, x22, [x29, #32]
+ ldp x23, x24, [x29, #48]
+ ldp x25, x26, [x29, #64]
+ ldp x27, x28, [x29, #80]
+ ldp x29, lr, [x29]
b cpu_resume_mmu // Resume MMU, never returns
ENDPROC(cpu_resume)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 8297d502217e..2c1a1fd0b4bb 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -9,22 +9,22 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
-extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
+
/*
* This is called by __cpu_suspend_enter() to save the state, and do whatever
* flushing is required to ensure that when the CPU goes to sleep we have
* the necessary data available when the caches are not searched.
*
- * ptr: CPU context virtual address
+ * ptr: sleep_stack_data containing cpu state virtual address.
* save_ptr: address of the location where the context physical address
* must be saved
*/
-void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
+void notrace __cpu_suspend_save(struct sleep_stack_data *ptr,
phys_addr_t *save_ptr)
{
*save_ptr = virt_to_phys(ptr);
- cpu_do_suspend(ptr);
+ cpu_do_suspend(&ptr->system_regs);
/*
* Only flush the context that must be retrieved with the MMU
* off. VA primitives ensure the flush is applied to all
@@ -50,6 +50,37 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
hw_breakpoint_restore = hw_bp_restore;
}
+void notrace __cpu_suspend_exit(struct mm_struct *mm)
+{
+ /*
+ * We are resuming from reset with TTBR0_EL1 set to the
+ * idmap to enable the MMU; restore the active_mm mappings in
+ * TTBR0_EL1 unless the active_mm == &init_mm, in which case
+ * the thread entered cpu_suspend with TTBR0_EL1 set to
+ * reserved TTBR0 page tables and should be restored as such.
+ */
+ if (mm == &init_mm)
+ cpu_set_reserved_ttbr0();
+ else
+ cpu_switch_mm(mm->pgd, mm);
+
+ flush_tlb_all();
+
+ /*
+ * Restore per-cpu offset before any kernel
+ * subsystem relying on it has a chance to run.
+ */
+ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+
+ /*
+ * Restore HW breakpoint registers to sane values
+ * before debug exceptions are possibly reenabled
+ * through local_dbg_restore.
+ */
+ if (hw_breakpoint_restore)
+ hw_breakpoint_restore(NULL);
+}
+
/*
* cpu_suspend
*
@@ -60,8 +91,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
struct mm_struct *mm = current->active_mm;
- int ret;
+ int ret = 0;
unsigned long flags;
+ struct sleep_stack_data state;
/*
* From this point debug exceptions are disabled to prevent
@@ -76,36 +108,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
* page tables, so that the thread address space is properly
* set-up on function return.
*/
- ret = __cpu_suspend_enter(arg, fn);
- if (ret == 0) {
- /*
- * We are resuming from reset with TTBR0_EL1 set to the
- * idmap to enable the MMU; restore the active_mm mappings in
- * TTBR0_EL1 unless the active_mm == &init_mm, in which case
- * the thread entered cpu_suspend with TTBR0_EL1 set to
- * reserved TTBR0 page tables and should be restored as such.
- */
- if (mm == &init_mm)
- cpu_set_reserved_ttbr0();
- else
- cpu_switch_mm(mm->pgd, mm);
-
- flush_tlb_all();
-
- /*
- * Restore per-cpu offset before any kernel
- * subsystem relying on it has a chance to run.
- */
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+ if (__cpu_suspend_enter(&state)) {
+ /* Call the suspend finisher */
+ ret = fn(arg);
/*
- * Restore HW breakpoint registers to sane values
- * before debug exceptions are possibly reenabled
- * through local_dbg_restore.
+ * Never gets here, unless suspend finisher fails.
+ * Successful cpu_suspend should return from cpu_resume,
+ * returning through this code path is considered an error
+ * If the return value is set to 0 force ret = -EOPNOTSUPP
+ * to make sure a proper error condition is propagated
*/
- if (hw_breakpoint_restore)
- hw_breakpoint_restore(NULL);
- }
+ if (!ret)
+ ret = -EOPNOTSUPP;
+ } else
+ __cpu_suspend_exit(mm);
/*
* Restore pstate flags. OS lock and mdscr have been already
--
2.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/6] arm64: kernel: Rework finisher callback out of __cpu_suspend_enter().
2015-10-12 13:17 ` [PATCH 3/6] arm64: kernel: Rework finisher callback out of __cpu_suspend_enter() James Morse
@ 2015-10-20 11:30 ` Lorenzo Pieralisi
0 siblings, 0 replies; 9+ messages in thread
From: Lorenzo Pieralisi @ 2015-10-20 11:30 UTC (permalink / raw)
To: linux-arm-kernel
Hi James,
On Mon, Oct 12, 2015 at 02:17:35PM +0100, James Morse wrote:
> Hibernate could make use of the cpu_suspend() code to save/restore cpu
> state, however it needs to be able to return '0' from the 'finisher'.
>
> Rework cpu_suspend() so that the finisher is called from C code,
> independently from the save/restore of cpu state. Space to save the context
> in is allocated in the caller's stack frame, and passed into
> __cpu_suspend_enter().
>
> Hibernate's use of this API will look like a copy of the cpu_suspend()
> function.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> arch/arm64/include/asm/suspend.h | 8 ++++
> arch/arm64/kernel/asm-offsets.c | 2 +
> arch/arm64/kernel/sleep.S | 86 +++++++++++++---------------------------
> arch/arm64/kernel/suspend.c | 81 ++++++++++++++++++++++---------------
> 4 files changed, 86 insertions(+), 91 deletions(-)
Two minor requests below to update some comments, otherwise:
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
> index 59a5b0f1e81c..a9de0d3f543f 100644
> --- a/arch/arm64/include/asm/suspend.h
> +++ b/arch/arm64/include/asm/suspend.h
> @@ -2,6 +2,7 @@
> #define __ASM_SUSPEND_H
>
> #define NR_CTX_REGS 11
> +#define NR_CALLEE_SAVED_REGS 12
>
> /*
> * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on
> @@ -21,6 +22,13 @@ struct sleep_save_sp {
> phys_addr_t save_ptr_stash_phys;
> };
>
> +struct sleep_stack_data {
> + struct cpu_suspend_ctx system_regs;
> + unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS];
Please add a comment referring to the __cpu_suspend_enter expected
registers layout and how this struct and __cpu_suspend_enter are related.
> +};
> +
> extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
> extern void cpu_resume(void);
> +int __cpu_suspend_enter(struct sleep_stack_data *state);
> +void __cpu_suspend_exit(struct mm_struct *mm);
> #endif
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 8d89cf8dae55..5daa4e692932 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -160,6 +160,8 @@ int main(void)
> DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
> DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
> DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
> + DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
> + DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
> #endif
> return 0;
> }
> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> index f586f7c875e2..6182388e32a5 100644
> --- a/arch/arm64/kernel/sleep.S
> +++ b/arch/arm64/kernel/sleep.S
> @@ -50,36 +50,24 @@
> .endm
> /*
> * Save CPU state for a suspend and execute the suspend finisher.
This function does not call the finisher anymore, please update the
comment above.
> - * On success it will return 0 through cpu_resume - ie through a CPU
> - * soft/hard reboot from the reset vector.
> - * On failure it returns the suspend finisher return value or force
> - * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
> - * is not allowed to return, if it does this must be considered failure).
> - * It saves callee registers, and allocates space on the kernel stack
> - * to save the CPU specific registers + some other data for resume.
> + * This function returns a non-zero value. Resuming through cpu_resume()
> + * will cause 0 to appear to be returned by this function.
Nit: please replace the description with an updated one to explain
what __cpu_suspend_enter is meant to achieve, in particular the
reasoning behind the return value (and code path) logic.
Thanks,
Lorenzo
> *
> - * x0 = suspend finisher argument
> - * x1 = suspend finisher function pointer
> + * x0 = struct sleep_stack_data area
> */
> ENTRY(__cpu_suspend_enter)
> - stp x29, lr, [sp, #-96]!
> - stp x19, x20, [sp,#16]
> - stp x21, x22, [sp,#32]
> - stp x23, x24, [sp,#48]
> - stp x25, x26, [sp,#64]
> - stp x27, x28, [sp,#80]
> - /*
> - * Stash suspend finisher and its argument in x20 and x19
> - */
> - mov x19, x0
> - mov x20, x1
> + stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
> + stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
> + stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
> + stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48]
> + stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64]
> + stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80]
> +
> + /* save the sp in cpu_suspend_ctx */
> mov x2, sp
> - sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx
> - mov x0, sp
> - /*
> - * x0 now points to struct cpu_suspend_ctx allocated on the stack
> - */
> - str x2, [x0, #CPU_CTX_SP]
> + str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
> +
> + /* find the mpidr_hash */
> ldr x1, =sleep_save_sp
> ldr x1, [x1, #SLEEP_SAVE_SP_VIRT]
> mrs x7, mpidr_el1
> @@ -93,34 +81,11 @@ ENTRY(__cpu_suspend_enter)
> ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
> compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
> add x1, x1, x8, lsl #3
> +
> + push x29, lr
> bl __cpu_suspend_save
> - /*
> - * Grab suspend finisher in x20 and its argument in x19
> - */
> - mov x0, x19
> - mov x1, x20
> - /*
> - * We are ready for power down, fire off the suspend finisher
> - * in x1, with argument in x0
> - */
> - blr x1
> - /*
> - * Never gets here, unless suspend finisher fails.
> - * Successful cpu_suspend should return from cpu_resume, returning
> - * through this code path is considered an error
> - * If the return value is set to 0 force x0 = -EOPNOTSUPP
> - * to make sure a proper error condition is propagated
> - */
> - cmp x0, #0
> - mov x3, #-EOPNOTSUPP
> - csel x0, x3, x0, eq
> - add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer
> - ldp x19, x20, [sp, #16]
> - ldp x21, x22, [sp, #32]
> - ldp x23, x24, [sp, #48]
> - ldp x25, x26, [sp, #64]
> - ldp x27, x28, [sp, #80]
> - ldp x29, lr, [sp], #96
> + pop x29, lr
> + mov x0, #1
> ret
> ENDPROC(__cpu_suspend_enter)
> .ltorg
> @@ -146,12 +111,6 @@ ENDPROC(cpu_resume_mmu)
> .popsection
> cpu_resume_after_mmu:
> mov x0, #0 // return zero on success
> - ldp x19, x20, [sp, #16]
> - ldp x21, x22, [sp, #32]
> - ldp x23, x24, [sp, #48]
> - ldp x25, x26, [sp, #64]
> - ldp x27, x28, [sp, #80]
> - ldp x29, lr, [sp], #96
> ret
> ENDPROC(cpu_resume_after_mmu)
>
> @@ -168,6 +127,8 @@ ENTRY(cpu_resume)
> /* x7 contains hash index, let's use it to grab context pointer */
> ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
> ldr x0, [x0, x7, lsl #3]
> + add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
> + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
> /* load sp from context */
> ldr x2, [x0, #CPU_CTX_SP]
> /* load physical address of identity map page table in x1 */
> @@ -178,5 +139,12 @@ ENTRY(cpu_resume)
> * pointer and x1 to contain physical address of 1:1 page tables
> */
> bl cpu_do_resume // PC relative jump, MMU off
> + /* Can't access these by physical address once the MMU is on */
> + ldp x19, x20, [x29, #16]
> + ldp x21, x22, [x29, #32]
> + ldp x23, x24, [x29, #48]
> + ldp x25, x26, [x29, #64]
> + ldp x27, x28, [x29, #80]
> + ldp x29, lr, [x29]
> b cpu_resume_mmu // Resume MMU, never returns
> ENDPROC(cpu_resume)
> diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
> index 8297d502217e..2c1a1fd0b4bb 100644
> --- a/arch/arm64/kernel/suspend.c
> +++ b/arch/arm64/kernel/suspend.c
> @@ -9,22 +9,22 @@
> #include <asm/suspend.h>
> #include <asm/tlbflush.h>
>
> -extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
> +
> /*
> * This is called by __cpu_suspend_enter() to save the state, and do whatever
> * flushing is required to ensure that when the CPU goes to sleep we have
> * the necessary data available when the caches are not searched.
> *
> - * ptr: CPU context virtual address
> + * ptr: sleep_stack_data containing cpu state virtual address.
> * save_ptr: address of the location where the context physical address
> * must be saved
> */
> -void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
> +void notrace __cpu_suspend_save(struct sleep_stack_data *ptr,
> phys_addr_t *save_ptr)
> {
> *save_ptr = virt_to_phys(ptr);
>
> - cpu_do_suspend(ptr);
> + cpu_do_suspend(&ptr->system_regs);
> /*
> * Only flush the context that must be retrieved with the MMU
> * off. VA primitives ensure the flush is applied to all
> @@ -50,6 +50,37 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
> hw_breakpoint_restore = hw_bp_restore;
> }
>
> +void notrace __cpu_suspend_exit(struct mm_struct *mm)
> +{
> + /*
> + * We are resuming from reset with TTBR0_EL1 set to the
> + * idmap to enable the MMU; restore the active_mm mappings in
> + * TTBR0_EL1 unless the active_mm == &init_mm, in which case
> + * the thread entered cpu_suspend with TTBR0_EL1 set to
> + * reserved TTBR0 page tables and should be restored as such.
> + */
> + if (mm == &init_mm)
> + cpu_set_reserved_ttbr0();
> + else
> + cpu_switch_mm(mm->pgd, mm);
> +
> + flush_tlb_all();
> +
> + /*
> + * Restore per-cpu offset before any kernel
> + * subsystem relying on it has a chance to run.
> + */
> + set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
> +
> + /*
> + * Restore HW breakpoint registers to sane values
> + * before debug exceptions are possibly reenabled
> + * through local_dbg_restore.
> + */
> + if (hw_breakpoint_restore)
> + hw_breakpoint_restore(NULL);
> +}
> +
> /*
> * cpu_suspend
> *
> @@ -60,8 +91,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
> int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> {
> struct mm_struct *mm = current->active_mm;
> - int ret;
> + int ret = 0;
> unsigned long flags;
> + struct sleep_stack_data state;
>
> /*
> * From this point debug exceptions are disabled to prevent
> @@ -76,36 +108,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
> * page tables, so that the thread address space is properly
> * set-up on function return.
> */
> - ret = __cpu_suspend_enter(arg, fn);
> - if (ret == 0) {
> - /*
> - * We are resuming from reset with TTBR0_EL1 set to the
> - * idmap to enable the MMU; restore the active_mm mappings in
> - * TTBR0_EL1 unless the active_mm == &init_mm, in which case
> - * the thread entered cpu_suspend with TTBR0_EL1 set to
> - * reserved TTBR0 page tables and should be restored as such.
> - */
> - if (mm == &init_mm)
> - cpu_set_reserved_ttbr0();
> - else
> - cpu_switch_mm(mm->pgd, mm);
> -
> - flush_tlb_all();
> -
> - /*
> - * Restore per-cpu offset before any kernel
> - * subsystem relying on it has a chance to run.
> - */
> - set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
> + if (__cpu_suspend_enter(&state)) {
> + /* Call the suspend finisher */
> + ret = fn(arg);
>
> /*
> - * Restore HW breakpoint registers to sane values
> - * before debug exceptions are possibly reenabled
> - * through local_dbg_restore.
> + * Never gets here, unless suspend finisher fails.
> + * Successful cpu_suspend should return from cpu_resume,
> + * returning through this code path is considered an error
> + * If the return value is set to 0 force ret = -EOPNOTSUPP
> + * to make sure a proper error condition is propagated
> */
> - if (hw_breakpoint_restore)
> - hw_breakpoint_restore(NULL);
> - }
> + if (!ret)
> + ret = -EOPNOTSUPP;
> + } else
> + __cpu_suspend_exit(mm);
>
> /*
> * Restore pstate flags. OS lock and mdscr have been already
> --
> 2.1.4
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/6] arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va
2015-10-12 13:17 [PATCH 0/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
` (2 preceding siblings ...)
2015-10-12 13:17 ` [PATCH 3/6] arm64: kernel: Rework finisher callback out of __cpu_suspend_enter() James Morse
@ 2015-10-12 13:17 ` James Morse
2015-10-12 13:17 ` [PATCH 5/6] arm64: kernel: Include _AC definition in page.h James Morse
2015-10-12 13:17 ` [PATCH 6/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
5 siblings, 0 replies; 9+ messages in thread
From: James Morse @ 2015-10-12 13:17 UTC (permalink / raw)
To: linux-arm-kernel
By enabling the MMU early in cpu_resume(), the sleep_save_sp and stack can
be accessed by VA, which avoids the need to convert-addresses and clean to
PoC on the suspend path.
MMU setup is shared with the boot path, meaning the swapper_pg_dir is
restored directly: ttbr1_el1 is no longer saved/restored.
struct sleep_save_sp is removed, replacing it with a single array of
pointers.
cpu_do_{suspend,resume} could be further reduced to not restore: cpacr_el1,
mdscr_el1, tcr_el1, vbar_el1 and sctlr_el1, all of which are set by
__cpu_setup(). However these values all contain res0 bits that may be used
to enable future features.
Signed-off-by: James Morse <james.morse@arm.com>
---
arch/arm64/include/asm/suspend.h | 7 +----
arch/arm64/kernel/asm-offsets.c | 3 ---
arch/arm64/kernel/head.S | 2 +-
arch/arm64/kernel/setup.c | 1 -
arch/arm64/kernel/sleep.S | 57 ++++++++++++++--------------------------
arch/arm64/kernel/suspend.c | 37 +++-----------------------
arch/arm64/mm/proc.S | 27 +++++--------------
7 files changed, 33 insertions(+), 101 deletions(-)
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index a9de0d3f543f..36f35ba41fa2 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -1,7 +1,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H
-#define NR_CTX_REGS 11
+#define NR_CTX_REGS 10
#define NR_CALLEE_SAVED_REGS 12
/*
@@ -17,11 +17,6 @@ struct cpu_suspend_ctx {
u64 sp;
} __aligned(16);
-struct sleep_save_sp {
- phys_addr_t *save_ptr_stash;
- phys_addr_t save_ptr_stash_phys;
-};
-
struct sleep_stack_data {
struct cpu_suspend_ctx system_regs;
unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS];
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5daa4e692932..3cb1383d3deb 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -157,9 +157,6 @@ int main(void)
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask));
DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff));
- DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
- DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
- DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 9ad8b1f15b19..cf4e0bdf6533 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -630,7 +630,7 @@ ENDPROC(__secondary_switched)
* other registers depend on the function called upon completion
*/
.section ".idmap.text", "ax"
-__enable_mmu:
+ENTRY(__enable_mmu)
ldr x5, =vectors
msr vbar_el1, x5
msr ttbr0_el1, x25 // load TTBR0
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 232247945b1c..5a338235ba1a 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -192,7 +192,6 @@ static void __init smp_build_mpidr_hash(void)
*/
if (mpidr_hash_size() > 4 * num_possible_cpus())
pr_warn("Large number of MPIDR hash buckets detected\n");
- __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
}
static void __init setup_processor(void)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 6182388e32a5..da4405062d83 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -68,8 +68,8 @@ ENTRY(__cpu_suspend_enter)
str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
/* find the mpidr_hash */
- ldr x1, =sleep_save_sp
- ldr x1, [x1, #SLEEP_SAVE_SP_VIRT]
+ ldr x1, =sleep_save_stash
+ ldr x1, [x1]
mrs x7, mpidr_el1
ldr x9, =mpidr_hash
ldr x10, [x9, #MPIDR_HASH_MASK]
@@ -82,40 +82,26 @@ ENTRY(__cpu_suspend_enter)
compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
add x1, x1, x8, lsl #3
+ str x0, [x1]
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
push x29, lr
- bl __cpu_suspend_save
+ bl cpu_do_suspend
pop x29, lr
mov x0, #1
ret
ENDPROC(__cpu_suspend_enter)
.ltorg
-/*
- * x0 must contain the sctlr value retrieved from restored context
- */
- .pushsection ".idmap.text", "ax"
-ENTRY(cpu_resume_mmu)
- ldr x3, =cpu_resume_after_mmu
- msr sctlr_el1, x0 // restore sctlr_el1
- isb
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
- br x3 // global jump to virtual address
-ENDPROC(cpu_resume_mmu)
- .popsection
-cpu_resume_after_mmu:
- mov x0, #0 // return zero on success
- ret
-ENDPROC(cpu_resume_after_mmu)
-
ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
+ /* enable the MMU early - so we can access sleep_save_stash by va */
+ adr_l lr, __enable_mmu /* __cpu_setup will return here */
+ ldr x27, =_cpu_resume /* __enable_mmu will branch here */
+ adrp x25, idmap_pg_dir
+ adrp x26, swapper_pg_dir
+ b __cpu_setup
+
+ENTRY(_cpu_resume)
mrs x1, mpidr_el1
adrp x8, mpidr_hash
add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
@@ -125,26 +111,23 @@ ENTRY(cpu_resume)
ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
/* x7 contains hash index, let's use it to grab context pointer */
- ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
+ ldr_l x0, sleep_save_stash
ldr x0, [x0, x7, lsl #3]
add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
- /* load physical address of identity map page table in x1 */
- adrp x1, idmap_pg_dir
mov sp, x2
- /*
- * cpu_do_resume expects x0 to contain context physical address
- * pointer and x1 to contain physical address of 1:1 page tables
- */
- bl cpu_do_resume // PC relative jump, MMU off
- /* Can't access these by physical address once the MMU is on */
+ bl cpu_do_resume
+ msr sctlr_el1, x0
+ isb
+
ldp x19, x20, [x29, #16]
ldp x21, x22, [x29, #32]
ldp x23, x24, [x29, #48]
ldp x25, x26, [x29, #64]
ldp x27, x28, [x29, #80]
ldp x29, lr, [x29]
- b cpu_resume_mmu // Resume MMU, never returns
+ mov x0, #0
+ ret
ENDPROC(cpu_resume)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 2c1a1fd0b4bb..0e761cf34202 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -11,30 +11,6 @@
/*
- * This is called by __cpu_suspend_enter() to save the state, and do whatever
- * flushing is required to ensure that when the CPU goes to sleep we have
- * the necessary data available when the caches are not searched.
- *
- * ptr: sleep_stack_data containing cpu state virtual address.
- * save_ptr: address of the location where the context physical address
- * must be saved
- */
-void notrace __cpu_suspend_save(struct sleep_stack_data *ptr,
- phys_addr_t *save_ptr)
-{
- *save_ptr = virt_to_phys(ptr);
-
- cpu_do_suspend(&ptr->system_regs);
- /*
- * Only flush the context that must be retrieved with the MMU
- * off. VA primitives ensure the flush is applied to all
- * cache levels so context is pushed to DRAM.
- */
- __flush_dcache_area(ptr, sizeof(*ptr));
- __flush_dcache_area(save_ptr, sizeof(*save_ptr));
-}
-
-/*
* This hook is provided so that cpu_suspend code can restore HW
* breakpoints as early as possible in the resume path, before reenabling
* debug exceptions. Code cannot be run from a CPU PM notifier since by the
@@ -134,22 +110,17 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
return ret;
}
-struct sleep_save_sp sleep_save_sp;
+unsigned long *sleep_save_stash;
static int __init cpu_suspend_init(void)
{
- void *ctx_ptr;
-
/* ctx_ptr is an array of physical addresses */
- ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+ sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash),
+ GFP_KERNEL);
- if (WARN_ON(!ctx_ptr))
+ if (WARN_ON(!sleep_save_stash))
return -ENOMEM;
- sleep_save_sp.save_ptr_stash = ctx_ptr;
- sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
- __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
-
return 0;
}
early_initcall(cpu_suspend_init);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 456c1c5f8ecd..b3afb6123c81 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -59,20 +59,17 @@ ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
mrs x3, tpidrro_el0
mrs x4, contextidr_el1
- mrs x5, mair_el1
mrs x6, cpacr_el1
- mrs x7, ttbr1_el1
mrs x8, tcr_el1
mrs x9, vbar_el1
mrs x10, mdscr_el1
mrs x11, oslsr_el1
mrs x12, sctlr_el1
stp x2, x3, [x0]
- stp x4, x5, [x0, #16]
- stp x6, x7, [x0, #32]
- stp x8, x9, [x0, #48]
- stp x10, x11, [x0, #64]
- str x12, [x0, #80]
+ stp x4, xzr, [x0, #16]
+ stp x6, x8, [x0, #32]
+ stp x9, x10, [x0, #48]
+ stp x11, x12, [x0, #64]
ret
ENDPROC(cpu_do_suspend)
@@ -80,29 +77,20 @@ ENDPROC(cpu_do_suspend)
* cpu_do_resume - restore CPU register context
*
* x0: Physical address of context pointer
- * x1: ttbr0_el1 to be restored
*
* Returns:
* sctlr_el1 value in x0
*/
ENTRY(cpu_do_resume)
- /*
- * Invalidate local tlb entries before turning on MMU
- */
- tlbi vmalle1
ldp x2, x3, [x0]
ldp x4, x5, [x0, #16]
- ldp x6, x7, [x0, #32]
- ldp x8, x9, [x0, #48]
- ldp x10, x11, [x0, #64]
- ldr x12, [x0, #80]
+ ldp x6, x8, [x0, #32]
+ ldp x9, x10, [x0, #48]
+ ldp x11, x12, [x0, #64]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
- msr mair_el1, x5
msr cpacr_el1, x6
- msr ttbr0_el1, x1
- msr ttbr1_el1, x7
tcr_set_idmap_t0sz x8, x7
msr tcr_el1, x8
msr vbar_el1, x9
@@ -113,7 +101,6 @@ ENTRY(cpu_do_resume)
ubfx x11, x11, #1, #1
msr oslar_el1, x11
mov x0, x12
- dsb nsh // Make sure local tlb invalidation completed
isb
ret
ENDPROC(cpu_do_resume)
--
2.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 5/6] arm64: kernel: Include _AC definition in page.h
2015-10-12 13:17 [PATCH 0/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
` (3 preceding siblings ...)
2015-10-12 13:17 ` [PATCH 4/6] arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va James Morse
@ 2015-10-12 13:17 ` James Morse
2015-10-12 13:17 ` [PATCH 6/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
5 siblings, 0 replies; 9+ messages in thread
From: James Morse @ 2015-10-12 13:17 UTC (permalink / raw)
To: linux-arm-kernel
page.h uses '_AC' in the definition of PAGE_SIZE, but doesn't include
linux/const.h where this is defined. This produces build warnings when only
asm/page.h is included by asm code.
Signed-off-by: James Morse <james.morse@arm.com>
---
arch/arm64/include/asm/page.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7d9c7e4a424b..74f67d049a63 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,6 +19,8 @@
#ifndef __ASM_PAGE_H
#define __ASM_PAGE_H
+#include <linux/const.h>
+
/* PAGE_SHIFT determines the page size */
#ifdef CONFIG_ARM64_64K_PAGES
#define PAGE_SHIFT 16
--
2.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 6/6] arm64: kernel: Add support for hibernate/suspend-to-disk.
2015-10-12 13:17 [PATCH 0/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
` (4 preceding siblings ...)
2015-10-12 13:17 ` [PATCH 5/6] arm64: kernel: Include _AC definition in page.h James Morse
@ 2015-10-12 13:17 ` James Morse
2015-10-22 10:38 ` Lorenzo Pieralisi
5 siblings, 1 reply; 9+ messages in thread
From: James Morse @ 2015-10-12 13:17 UTC (permalink / raw)
To: linux-arm-kernel
Add support for hibernate/suspend-to-disk.
Suspend borrows code from cpu_suspend() to write cpu state onto the stack,
before calling swusp_save() to save the memory image.
Restore creates a set of temporary page tables, covering only the linear
map, and copies the restore code to a 'safe' page, then
uses the copy to restore the memory image. It calls into cpu_resume(),
and then follows the normal cpu_suspend() path back into the suspend code.
The suspend C code also includes some post-hibernate cache cleanup.
The implementation assumes that exactly the same kernel is booted on the
same hardware, and that the kernel is loaded at the same physical address.
Signed-off-by: James Morse <james.morse@arm.com>
---
arch/arm64/Kconfig | 3 +
arch/arm64/include/asm/suspend.h | 5 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 4 +
arch/arm64/kernel/hibernate-asm.S | 133 ++++++++++++
arch/arm64/kernel/hibernate.c | 441 ++++++++++++++++++++++++++++++++++++++
arch/arm64/kernel/sleep.S | 1 +
arch/arm64/kernel/vmlinux.lds.S | 15 ++
8 files changed, 603 insertions(+)
create mode 100644 arch/arm64/kernel/hibernate-asm.S
create mode 100644 arch/arm64/kernel/hibernate.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 07d1811aa03f..d081dbc35335 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -707,6 +707,9 @@ menu "Power management options"
source "kernel/power/Kconfig"
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y
+
config ARCH_SUSPEND_POSSIBLE
def_bool y
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 36f35ba41fa2..d7405ca4e6c8 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -22,6 +22,11 @@ struct sleep_stack_data {
unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS];
};
+extern int swsusp_arch_suspend(void);
+extern int swsusp_arch_resume(void);
+int swsusp_arch_suspend_enter(struct cpu_suspend_ctx *ptr);
+void __noreturn swsusp_arch_suspend_exit(phys_addr_t tmp_pg_dir,
+ phys_addr_t swapper_pg_dir);
extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
extern void cpu_resume(void);
int __cpu_suspend_enter(struct sleep_stack_data *state);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 22dc9bc781be..b9151ae4a7ae 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -36,6 +36,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
+arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3cb1383d3deb..b5d9495a94a1 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/suspend.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -160,5 +161,8 @@ int main(void)
DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
+ DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
+ DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
+ DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
return 0;
}
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
new file mode 100644
index 000000000000..267510138d78
--- /dev/null
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -0,0 +1,133 @@
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/cputype.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#define KERNEL_START _text
+#define KERNEL_END _end
+
+/*
+ * void __clean_dcache_pou(unsigned long kaddr, unsigned long size)
+ *
+ * Clean the data held in kaddr to the PoU, for later execution.
+ * Based on flush_icache_range().
+ * N.B This function does not invalidate the icache, or provide a barrier,
+ * use flush_icache_range() if that is what you wanted.
+ *
+ * x0: kaddr
+ * x1: size
+ */
+ENTRY(__clean_dcache_pou)
+ dcache_line_size x2, x3
+ add x1, x0, x1
+ sub x3, x2, #1
+ bic x0, x0, x3
+1: dc cvau, x0 // clean D line / unified line
+ add x0, x0, x2
+ cmp x0, x1
+ b.lo 1b
+ ret
+ENDPROC(__clean_dcache_pou)
+
+
+/*
+ * Corrupt memory.
+ *
+ * Loads temporary page tables then restores the memory image.
+ * Finally branches to cpu_resume() to restore the state saved by
+ * swsusp_arch_suspend().
+ *
+ * Because this code has to be copied to a safe_page, it can't call out to
+ * other functions by pc-relative address. Also remember that it may be
+ * mid-way through over-writing other functions. For this reason it contains
+ * a copy of copy_page() and code from flush_icache_range().
+ *
+ * All of memory gets written to, including code. We need to clean the kernel
+ * text to the PoC before secondary cores can be booted. The modules range and
+ * userspace are somewhat tricky, and are done after we return into
+ * swsusp_arch_suspend().
+ *
+ * x0: physical address of temporary page tables.
+ * x1: physical address of swapper page tables.
+ */
+.pushsection ".hibernate_exit.text", "ax"
+ENTRY(swsusp_arch_suspend_exit)
+ /* Temporary page tables are a copy, so no need for a trampoline here */
+ msr ttbr1_el1, x0
+ isb
+ tlbi vmalle1is
+ ic ialluis
+ isb
+
+ mov x20, x1
+
+ /* walk the restore_pblist and use copy_page() to over-write memory */
+ ldr x19, =restore_pblist
+ ldr x19, [x19]
+
+2: ldr x0, [x19, #HIBERN_PBE_ORIG]
+ ldr x1, [x19, #HIBERN_PBE_ADDR]
+
+ /* arch/arm64/lib/copy_page.S:copy_page() */
+ prfm pldl1strm, [x1, #64]
+3: ldp x2, x3, [x1]
+ ldp x4, x5, [x1, #16]
+ ldp x6, x7, [x1, #32]
+ ldp x8, x9, [x1, #48]
+ add x1, x1, #64
+ prfm pldl1strm, [x1, #64]
+ stnp x2, x3, [x0]
+ stnp x4, x5, [x0, #16]
+ stnp x6, x7, [x0, #32]
+ stnp x8, x9, [x0, #48]
+ add x0, x0, #64
+ tst x1, #(PAGE_SIZE - 1)
+ b.ne 3b
+
+ ldr x19, [x19, #HIBERN_PBE_NEXT]
+ cbnz x19, 2b
+
+ dsb ish // memory restore must finish before cleaning
+
+ ldr x0, =KERNEL_START
+ ldr x1, =KERNEL_END
+ /* Clean the kernel text to PoC - based on flush_icache_range() */
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x4, x0, x3
+4: dc cvac, x4
+ add x4, x4, x2
+ cmp x4, x1
+ b.lo 4b
+
+ dsb ish
+
+ /*
+ * branch into the restored kernel - so that when we restore the page
+ * tables, code continues to be executable.
+ */
+ ldr x1, =__hibernate_exit
+ br x1
+
+ .ltorg
+ENDPROC(swsusp_arch_suspend_exit)
+.popsection
+
+/*
+ * Reset the page tables, and wake up in cpu_resume().
+ * Temporary page tables were a copy, so again, no trampoline here.
+ *
+ * x20: physical address of swapper_pg_dir
+ */
+ENTRY(__hibernate_exit)
+ msr ttbr1_el1, x20
+ isb
+ tlbi vmalle1is
+ ic ialluis
+ isb
+ b _cpu_resume
+ENDPROC(__hibernate_exit)
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
new file mode 100644
index 000000000000..5e0683752dbf
--- /dev/null
+++ b/arch/arm64/kernel/hibernate.c
@@ -0,0 +1,441 @@
+/*:
+ * Hibernate support specific for ARM64
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ * https://lkml.org/lkml/2010/6/18/4
+ * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ * https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#define pr_fmt(x) "hibernate: " x
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/pm.h>
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <linux/version.h>
+
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/irqflags.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+
+/*
+ * for_each_vma() - iterate through each vma in use by an mm.
+ * @mm: struct mm_struct * to read.
+ * @vma: struct vm_area_struct *, the current vma.
+ *
+ * Iterates through an mm's vma map. You should hold mm->mmap_sem for reading.
+ */
+#define for_each_vma(mm, vma) \
+ for (vma = mm->mmap; vma->vm_next != NULL; vma = vma->vm_next)
+
+/* These are necessary to build without ifdefery */
+#ifndef pmd_index
+#define pmd_index(x) 0
+#endif
+#ifndef pud_index
+#define pud_index(x) 0
+#endif
+
+/*
+ * Clean the provided range to the PoU - used on the modules+user space ranges.
+ */
+void __clean_dcache_pou(unsigned long kaddr, unsigned long size);
+
+/*
+ * Start/end of the hibernate exit code, this must be copied to a 'safe'
+ * location in memory, and executed from there.
+ */
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+ unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+ return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+ local_fiq_disable();
+}
+
+void notrace restore_processor_state(void)
+{
+ local_fiq_enable();
+}
+
+/*
+ * Heavily-based on the version in /arch/x86.
+ * TODO: move this out of /arch/
+ */
+pte_t *lookup_address(pgd_t *pgd, unsigned long address, size_t *length)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ *length = PGDIR_SIZE;
+ if (pgd_none(*pgd))
+ return NULL;
+
+ *length = PUD_SIZE;
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud))
+ return NULL;
+
+ if (pud_sect(*pud) || !pud_present(*pud))
+ return (pte_t *)pud;
+
+ *length = PMD_SIZE;
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd))
+ return NULL;
+
+ if (pmd_sect(*pmd) || !pmd_present(*pmd))
+ return (pte_t *)pmd;
+
+ *length = PAGE_SIZE;
+ pte = pte_offset_kernel(pmd, address);
+ if (pte_none(*pte))
+ return NULL;
+ return pte;
+}
+
+/*
+ * Walk the provided mm's page tables, from start_addr to end_addr. Translate
+ * each page to its alias in the linear map, and clean that to the PoU.
+ * This is safe to call on user-space mm's, as all the access is to page tables
+ * and kernel linear-map addresses.
+ *
+ * Uses __clean_dcache_pou(), which does not provide any barriers or icache
+ * maintenance. Ensure start_addr is page aligned.
+ */
+static void clean_mapped_range(struct mm_struct *mm, unsigned long start_addr,
+ unsigned long end_addr)
+{
+ pte_t *pte;
+ size_t length;
+ unsigned long map_addr;
+ unsigned long linear_addr;
+
+ for (map_addr = start_addr; map_addr < end_addr; map_addr += length) {
+ pte = lookup_address(pgd_offset(mm, map_addr), map_addr,
+ &length);
+ /* length is valid, even if pte is NULL */
+ if (!pte || !pte_valid(*pte))
+ continue;
+
+ linear_addr = (unsigned long)pfn_to_kaddr(pte_pfn(*pte));
+ __clean_dcache_pou(linear_addr, linear_addr+length);
+ }
+}
+
+int swsusp_arch_suspend(void)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct task_struct *p;
+ struct vm_area_struct *vma;
+ struct sleep_stack_data state;
+ struct mm_struct *mm = current->active_mm;
+
+ local_dbg_save(flags);
+
+ if (__cpu_suspend_enter(&state))
+ ret = swsusp_save();
+ else {
+ __cpu_suspend_exit(mm);
+
+ pr_info("Performing cache maintenance.\n");
+
+ /*
+ * We clean the 'tricky' cache ranges here. Modules and user
+ * space executable code may have been written to via its
+ * alias in the kernel linear mapping.
+ *
+ * To clean these ranges, we walk the page tables to find the
+ * physical pages, and then their position in the linear map.
+ *
+ * The restore_pblist used during restore only contains pages
+ * that were in use - other pages containing executable code
+ * may have been written by core hibernate code.
+ */
+ clean_mapped_range(&init_mm, MODULES_VADDR, MODULES_END);
+
+ /*
+ * Any user space executable code that isn't going to be
+ * reloaded from disk (e.g. jit code) is now potentially
+ * in the data cache, and needs cleaning.
+ *
+ * TODO: Some pages are mapped to user-space many times.
+ * Implement a 'cleaned' bitmap so we only clean each
+ * page once.
+ */
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ if (!p->mm || p->mm == &init_mm)
+ continue;
+
+ down_read(&p->mm->mmap_sem);
+ for_each_vma(p->mm, vma) {
+ if (!(vma->vm_flags & VM_EXEC))
+ continue;
+
+ clean_mapped_range(p->mm, vma->vm_start,
+ vma->vm_end);
+ }
+ up_read(&p->mm->mmap_sem);
+ }
+ read_unlock(&tasklist_lock);
+
+ /* page tables may still be cached -how does this affect dma? */
+
+ /* all cache cleaning should have finished */
+ dsb(ish);
+ __flush_icache_all();
+ }
+
+ local_dbg_restore(flags);
+
+ return ret;
+}
+
+static int copy_pte(pmd_t *dst, pmd_t *src, unsigned long start_addr)
+{
+ int i;
+ pte_t *old_pte = pte_offset_kernel(src, start_addr);
+ pte_t *new_pte = pte_offset_kernel(dst, start_addr);
+
+ for (i = pte_index(start_addr); i < PTRS_PER_PTE;
+ i++, old_pte++, new_pte++) {
+ if (pte_val(*old_pte))
+ set_pte(new_pte,
+ __pte(pte_val(*old_pte) & ~PTE_RDONLY));
+ }
+
+ return 0;
+}
+
+static int copy_pmd(pud_t *dst, pud_t *src, unsigned long start_addr)
+{
+ int i;
+ int rc = 0;
+ pte_t *new_pte;
+ pmd_t *old_pmd = pmd_offset(src, start_addr);
+ pmd_t *new_pmd = pmd_offset(dst, start_addr);
+
+ for (i = pmd_index(start_addr); i < PTRS_PER_PMD;
+ i++, start_addr += PMD_SIZE, old_pmd++, new_pmd++) {
+ if (!pmd_val(*old_pmd))
+ continue;
+
+ if (pmd_table(*(old_pmd))) {
+ new_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!new_pte) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ set_pmd(new_pmd, __pmd(virt_to_phys(new_pte)
+ | PMD_TYPE_TABLE));
+
+ rc = copy_pte(new_pmd, old_pmd, start_addr);
+ if (rc)
+ break;
+ } else
+ set_pmd(new_pmd,
+ __pmd(pmd_val(*old_pmd) & ~PMD_SECT_RDONLY));
+ }
+
+ return rc;
+}
+
+static int copy_pud(pgd_t *dst, pgd_t *src, unsigned long start_addr)
+{
+ int i;
+ int rc = 0;
+ pmd_t *new_pmd;
+ pud_t *old_pud = pud_offset(src, start_addr);
+ pud_t *new_pud = pud_offset(dst, start_addr);
+
+ for (i = pud_index(start_addr); i < PTRS_PER_PUD;
+ i++, start_addr += PUD_SIZE, old_pud++, new_pud++) {
+ if (!pud_val(*old_pud))
+ continue;
+
+ if (pud_table(*(old_pud))) {
+ if (PTRS_PER_PMD != 1) {
+ new_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!new_pmd) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ set_pud(new_pud, __pud(virt_to_phys(new_pmd)
+ | PUD_TYPE_TABLE));
+ }
+
+ rc = copy_pmd(new_pud, old_pud, start_addr);
+ if (rc)
+ break;
+ } else
+ set_pud(new_pud,
+ __pud(pud_val(*old_pud) & ~PMD_SECT_RDONLY));
+ }
+
+ return rc;
+}
+
+static int copy_linear_map(pgd_t *new_pgd)
+{
+ int i;
+ int rc = 0;
+ pud_t *new_pud;
+ unsigned long start_addr = PAGE_OFFSET;
+ pgd_t *old_pgd = pgd_offset_k(start_addr);
+
+ new_pgd += pgd_index(start_addr);
+
+ for (i = pgd_index(start_addr); i < PTRS_PER_PGD;
+ i++, start_addr += PGDIR_SIZE, old_pgd++, new_pgd++) {
+ if (!pgd_val(*old_pgd))
+ continue;
+
+ if (PTRS_PER_PUD != 1) {
+ new_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!new_pud) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ set_pgd(new_pgd, __pgd(virt_to_phys(new_pud)
+ | PUD_TYPE_TABLE));
+ }
+
+ rc = copy_pud(new_pgd, old_pgd, start_addr);
+ if (rc)
+ break;
+ }
+
+ return rc;
+}
+
+/*
+ * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
+ *
+ * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
+ * we don't need to free it here.
+ *
+ * Allocate a safe zero page to use as ttbr0, as all existing page tables, and
+ * even the empty_zero_page will be overwritten.
+ */
+int swsusp_arch_resume(void)
+{
+ int rc = 0;
+ pgd_t *pgd;
+ size_t length;
+ size_t exit_size;
+ pgd_t *tmp_pg_dir;
+ pte_t *exit_page_pte;
+ pte_t exit_page_pte_orig;
+ unsigned long exit_page;
+ void *safe_zero_page_mem;
+ void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t);
+
+ /* Copy swsusp_arch_suspend_exit() to a safe page. */
+ exit_page = get_safe_page(GFP_ATOMIC);
+ if (!exit_page) {
+ pr_err("Failed to allocate memory for hibernate_exit code.");
+ rc = -ENOMEM;
+ goto out;
+ }
+ exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
+ memcpy((void *)exit_page, __hibernate_exit_text_start, exit_size);
+ flush_icache_range(exit_page, exit_page + exit_size);
+ if (IS_ENABLED(CONFIG_DEBUG_RODATA)) {
+ /*
+ * set_memory_x() is only for the module ranges. We only have
+ * the linear-map mapped - so need to make the copied page
+ * executable now, and when we run with the copied page tables.
+ * The process of restoring the hibernate kernel will undo
+ * this change.
+ */
+ pgd = pgd_offset(&init_mm, exit_page);
+ exit_page_pte = lookup_address(pgd, exit_page, &length);
+ if (exit_page_pte) {
+ exit_page_pte_orig = pte_val(*exit_page_pte);
+ set_pte_at(&init_mm, exit_page, exit_page_pte,
+ __pte(pte_val(*exit_page_pte) & ~PTE_PXN));
+ flush_tlb_kernel_range(exit_page, exit_page + PAGE_SIZE);
+ }
+ else {
+ pr_err("Failed to find page table entry for hibernate_exit code!");
+ rc = -EFAULT;
+ goto out;
+ }
+ }
+ hibernate_exit = (void *)exit_page;
+
+ /*
+ * Even the zero page may get overwritten during restore.
+ * get_safe_page() only returns zero'd pages.
+ */
+ safe_zero_page_mem = (void *)get_safe_page(GFP_ATOMIC);
+ if (!safe_zero_page_mem) {
+ pr_err("Failed to allocate memory for zero page.");
+ rc = -ENOMEM;
+ goto pte_undo;
+ }
+ empty_zero_page = virt_to_page(safe_zero_page_mem);
+ cpu_set_reserved_ttbr0();
+
+ /*
+ * Restoring the memory image will overwrite the ttbr1 page tables.
+ * Create a second copy, of just the linear map, and use this when
+ * restoring.
+ */
+ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!tmp_pg_dir) {
+ pr_err("Failed to allocate memory for temporary page tables.");
+ rc = -ENOMEM;
+ goto pte_undo;
+ }
+ rc = copy_linear_map(tmp_pg_dir);
+ if (rc)
+ goto pte_undo;
+
+ /*
+ * EL2 may get upset if we overwrite its page-tables/stack.
+ * kvm_reset_cpu() returns EL2 to the hyp stub. This isn't needed
+ * on normal suspend/resume as PSCI prevents us from ruining EL2.
+ */
+ if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
+ kvm_reset_cpu();
+
+ hibernate_exit(virt_to_phys(tmp_pg_dir), virt_to_phys(swapper_pg_dir));
+
+pte_undo:
+ if (IS_ENABLED(CONFIG_DEBUG_RODATA)) {
+ set_pte_at(&init_mm, exit_page, exit_page_pte,
+ exit_page_pte_orig);
+ flush_tlb_kernel_range(exit_page, exit_page + PAGE_SIZE);
+ }
+out:
+ return rc;
+}
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index da4405062d83..f58008d6dadf 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -2,6 +2,7 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
+#include <asm/memory.h>
.text
/*
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 98073332e2d0..3d8284d91f4c 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -44,6 +44,16 @@ jiffies = jiffies_64;
*(.idmap.text) \
VMLINUX_SYMBOL(__idmap_text_end) = .;
+#ifdef CONFIG_HIBERNATION
+#define HIBERNATE_TEXT \
+ . = ALIGN(SZ_4K); \
+ VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+ *(.hibernate_exit.text) \
+ VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+#else
+#define HIBERNATE_TEXT
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -102,6 +112,7 @@ SECTIONS
LOCK_TEXT
HYPERVISOR_TEXT
IDMAP_TEXT
+ HIBERNATE_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
@@ -181,6 +192,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"HYP init code too big or misaligned")
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
+#ifdef CONFIG_HIBERNATION
+ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
+ <= SZ_4K, "Hibernate exit text too big or misaligned")
+#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
--
2.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 6/6] arm64: kernel: Add support for hibernate/suspend-to-disk.
2015-10-12 13:17 ` [PATCH 6/6] arm64: kernel: Add support for hibernate/suspend-to-disk James Morse
@ 2015-10-22 10:38 ` Lorenzo Pieralisi
0 siblings, 0 replies; 9+ messages in thread
From: Lorenzo Pieralisi @ 2015-10-22 10:38 UTC (permalink / raw)
To: linux-arm-kernel
Hi Pavel, Rafael,
On Mon, Oct 12, 2015 at 02:17:38PM +0100, James Morse wrote:
[...]
> +/*
> + * Walk the provided mm's page tables, from start_addr to end_addr. Translate
> + * each page to its alias in the linear map, and clean that to the PoU.
> + * This is safe to call on user-space mm's, as all the access is to page tables
> + * and kernel linear-map addresses.
> + *
> + * Uses __clean_dcache_pou(), which does not provide any barriers or icache
> + * maintenance. Ensure start_addr is page aligned.
> + */
> +static void clean_mapped_range(struct mm_struct *mm, unsigned long start_addr,
> + unsigned long end_addr)
> +{
> + pte_t *pte;
> + size_t length;
> + unsigned long map_addr;
> + unsigned long linear_addr;
> +
> + for (map_addr = start_addr; map_addr < end_addr; map_addr += length) {
> + pte = lookup_address(pgd_offset(mm, map_addr), map_addr,
> + &length);
> + /* length is valid, even if pte is NULL */
> + if (!pte || !pte_valid(*pte))
> + continue;
> +
> + linear_addr = (unsigned long)pfn_to_kaddr(pte_pfn(*pte));
> + __clean_dcache_pou(linear_addr, linear_addr+length);
> + }
> +}
> +
> +int swsusp_arch_suspend(void)
> +{
> + int ret = 0;
> + unsigned long flags;
> + struct task_struct *p;
> + struct vm_area_struct *vma;
> + struct sleep_stack_data state;
> + struct mm_struct *mm = current->active_mm;
> +
> + local_dbg_save(flags);
> +
> + if (__cpu_suspend_enter(&state))
> + ret = swsusp_save();
> + else {
> + __cpu_suspend_exit(mm);
> +
> + pr_info("Performing cache maintenance.\n");
> +
> + /*
> + * We clean the 'tricky' cache ranges here. Modules and user
> + * space executable code may have been written to via its
> + * alias in the kernel linear mapping.
> + *
> + * To clean these ranges, we walk the page tables to find the
> + * physical pages, and then their position in the linear map.
> + *
> + * The restore_pblist used during restore only contains pages
> + * that were in use - other pages containing executable code
> + * may have been written by core hibernate code.
> + */
> + clean_mapped_range(&init_mm, MODULES_VADDR, MODULES_END);
> +
> + /*
> + * Any user space executable code that isn't going to be
> + * reloaded from disk (e.g. jit code) is now potentially
> + * in the data cache, and needs cleaning.
> + *
> + * TODO: Some pages are mapped to user-space many times.
> + * Implement a 'cleaned' bitmap so we only clean each
> + * page once.
> + */
> + read_lock(&tasklist_lock);
> + for_each_process(p) {
> + if (!p->mm || p->mm == &init_mm)
> + continue;
> +
> + down_read(&p->mm->mmap_sem);
> + for_each_vma(p->mm, vma) {
> + if (!(vma->vm_flags & VM_EXEC))
> + continue;
> +
> + clean_mapped_range(p->mm, vma->vm_start,
> + vma->vm_end);
> + }
> + up_read(&p->mm->mmap_sem);
> + }
> + read_unlock(&tasklist_lock);
After resuming from hibernate in the code above, we are forced to
walk the list of processes to make sure that for executable pages
that were copied by hibernate core code (I mean pages that are not
part of the restore_pblist and were already "restored" by the time
swsusp_arch_resume is called) the I-cache(s) and D-cache(s) are in sync.
We were wondering if you would accept moving that code to hibernate core
code, we see two options for that:
1) Add a cache flushing hook to do_copy_page() in snapshot.c so that
we can make sure that, if the page in question contains executable
code (I do not think there is an easy way to detect that though unless
we walk the processes page tables), the D-cache and I-cache are in-sync
(the hook might be empty on architectures like x86 where this is not an
issue)
2) We add code like the loop above in hibernate core code either to
create a bitmap of pages that need I-cache/D-cache synchronization
on resume from hibernate (the bitmap can be created at snapshot
preparation time) or to just do what James does here, basically walk
the list of processes and call a cache sync hook to make sure I-cache
and D-caches are in-sync
I do not think that the code above belongs in architecture specific
code since it is an arch-agnostic issue and it is already dealt with
in the kernel with the cache flushing API (eg flush_dcache_page) for
different purposes, I guess hibernate core code does not need this at
present because on x86 it is a non-existing problem.
If you are not opposed to moving this code to hibernate core code
we can put a patch together to deal with this in core code, please
let us know.
Thanks,
Lorenzo
> +
> + /* page tables may still be cached -how does this affect dma? */
> +
> + /* all cache cleaning should have finished */
> + dsb(ish);
> + __flush_icache_all();
> + }
> +
> + local_dbg_restore(flags);
> +
> + return ret;
> +}
> +
> +static int copy_pte(pmd_t *dst, pmd_t *src, unsigned long start_addr)
> +{
> + int i;
> + pte_t *old_pte = pte_offset_kernel(src, start_addr);
> + pte_t *new_pte = pte_offset_kernel(dst, start_addr);
> +
> + for (i = pte_index(start_addr); i < PTRS_PER_PTE;
> + i++, old_pte++, new_pte++) {
> + if (pte_val(*old_pte))
> + set_pte(new_pte,
> + __pte(pte_val(*old_pte) & ~PTE_RDONLY));
> + }
> +
> + return 0;
> +}
> +
> +static int copy_pmd(pud_t *dst, pud_t *src, unsigned long start_addr)
> +{
> + int i;
> + int rc = 0;
> + pte_t *new_pte;
> + pmd_t *old_pmd = pmd_offset(src, start_addr);
> + pmd_t *new_pmd = pmd_offset(dst, start_addr);
> +
> + for (i = pmd_index(start_addr); i < PTRS_PER_PMD;
> + i++, start_addr += PMD_SIZE, old_pmd++, new_pmd++) {
> + if (!pmd_val(*old_pmd))
> + continue;
> +
> + if (pmd_table(*(old_pmd))) {
> + new_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
> + if (!new_pte) {
> + rc = -ENOMEM;
> + break;
> + }
> +
> + set_pmd(new_pmd, __pmd(virt_to_phys(new_pte)
> + | PMD_TYPE_TABLE));
> +
> + rc = copy_pte(new_pmd, old_pmd, start_addr);
> + if (rc)
> + break;
> + } else
> + set_pmd(new_pmd,
> + __pmd(pmd_val(*old_pmd) & ~PMD_SECT_RDONLY));
> + }
> +
> + return rc;
> +}
> +
> +static int copy_pud(pgd_t *dst, pgd_t *src, unsigned long start_addr)
> +{
> + int i;
> + int rc = 0;
> + pmd_t *new_pmd;
> + pud_t *old_pud = pud_offset(src, start_addr);
> + pud_t *new_pud = pud_offset(dst, start_addr);
> +
> + for (i = pud_index(start_addr); i < PTRS_PER_PUD;
> + i++, start_addr += PUD_SIZE, old_pud++, new_pud++) {
> + if (!pud_val(*old_pud))
> + continue;
> +
> + if (pud_table(*(old_pud))) {
> + if (PTRS_PER_PMD != 1) {
> + new_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
> + if (!new_pmd) {
> + rc = -ENOMEM;
> + break;
> + }
> +
> + set_pud(new_pud, __pud(virt_to_phys(new_pmd)
> + | PUD_TYPE_TABLE));
> + }
> +
> + rc = copy_pmd(new_pud, old_pud, start_addr);
> + if (rc)
> + break;
> + } else
> + set_pud(new_pud,
> + __pud(pud_val(*old_pud) & ~PMD_SECT_RDONLY));
> + }
> +
> + return rc;
> +}
> +
> +static int copy_linear_map(pgd_t *new_pgd)
> +{
> + int i;
> + int rc = 0;
> + pud_t *new_pud;
> + unsigned long start_addr = PAGE_OFFSET;
> + pgd_t *old_pgd = pgd_offset_k(start_addr);
> +
> + new_pgd += pgd_index(start_addr);
> +
> + for (i = pgd_index(start_addr); i < PTRS_PER_PGD;
> + i++, start_addr += PGDIR_SIZE, old_pgd++, new_pgd++) {
> + if (!pgd_val(*old_pgd))
> + continue;
> +
> + if (PTRS_PER_PUD != 1) {
> + new_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
> + if (!new_pud) {
> + rc = -ENOMEM;
> + break;
> + }
> +
> + set_pgd(new_pgd, __pgd(virt_to_phys(new_pud)
> + | PUD_TYPE_TABLE));
> + }
> +
> + rc = copy_pud(new_pgd, old_pgd, start_addr);
> + if (rc)
> + break;
> + }
> +
> + return rc;
> +}
> +
> +/*
> + * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
> + *
> + * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
> + * we don't need to free it here.
> + *
> + * Allocate a safe zero page to use as ttbr0, as all existing page tables, and
> + * even the empty_zero_page will be overwritten.
> + */
> +int swsusp_arch_resume(void)
> +{
> + int rc = 0;
> + pgd_t *pgd;
> + size_t length;
> + size_t exit_size;
> + pgd_t *tmp_pg_dir;
> + pte_t *exit_page_pte;
> + pte_t exit_page_pte_orig;
> + unsigned long exit_page;
> + void *safe_zero_page_mem;
> + void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t);
> +
> + /* Copy swsusp_arch_suspend_exit() to a safe page. */
> + exit_page = get_safe_page(GFP_ATOMIC);
> + if (!exit_page) {
> + pr_err("Failed to allocate memory for hibernate_exit code.");
> + rc = -ENOMEM;
> + goto out;
> + }
> + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
> + memcpy((void *)exit_page, __hibernate_exit_text_start, exit_size);
> + flush_icache_range(exit_page, exit_page + exit_size);
> + if (IS_ENABLED(CONFIG_DEBUG_RODATA)) {
> + /*
> + * set_memory_x() is only for the module ranges. We only have
> + * the linear-map mapped - so need to make the copied page
> + * executable now, and when we run with the copied page tables.
> + * The process of restoring the hibernate kernel will undo
> + * this change.
> + */
> + pgd = pgd_offset(&init_mm, exit_page);
> + exit_page_pte = lookup_address(pgd, exit_page, &length);
> + if (exit_page_pte) {
> + exit_page_pte_orig = pte_val(*exit_page_pte);
> + set_pte_at(&init_mm, exit_page, exit_page_pte,
> + __pte(pte_val(*exit_page_pte) & ~PTE_PXN));
> + flush_tlb_kernel_range(exit_page, exit_page + PAGE_SIZE);
> + }
> + else {
> + pr_err("Failed to find page table entry for hibernate_exit code!");
> + rc = -EFAULT;
> + goto out;
> + }
> + }
> + hibernate_exit = (void *)exit_page;
> +
> + /*
> + * Even the zero page may get overwritten during restore.
> + * get_safe_page() only returns zero'd pages.
> + */
> + safe_zero_page_mem = (void *)get_safe_page(GFP_ATOMIC);
> + if (!safe_zero_page_mem) {
> + pr_err("Failed to allocate memory for zero page.");
> + rc = -ENOMEM;
> + goto pte_undo;
> + }
> + empty_zero_page = virt_to_page(safe_zero_page_mem);
> + cpu_set_reserved_ttbr0();
> +
> + /*
> + * Restoring the memory image will overwrite the ttbr1 page tables.
> + * Create a second copy, of just the linear map, and use this when
> + * restoring.
> + */
> + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
> + if (!tmp_pg_dir) {
> + pr_err("Failed to allocate memory for temporary page tables.");
> + rc = -ENOMEM;
> + goto pte_undo;
> + }
> + rc = copy_linear_map(tmp_pg_dir);
> + if (rc)
> + goto pte_undo;
> +
> + /*
> + * EL2 may get upset if we overwrite its page-tables/stack.
> + * kvm_reset_cpu() returns EL2 to the hyp stub. This isn't needed
> + * on normal suspend/resume as PSCI prevents us from ruining EL2.
> + */
> + if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
> + kvm_reset_cpu();
> +
> + hibernate_exit(virt_to_phys(tmp_pg_dir), virt_to_phys(swapper_pg_dir));
> +
> +pte_undo:
> + if (IS_ENABLED(CONFIG_DEBUG_RODATA)) {
> + set_pte_at(&init_mm, exit_page, exit_page_pte,
> + exit_page_pte_orig);
> + flush_tlb_kernel_range(exit_page, exit_page + PAGE_SIZE);
> + }
> +out:
> + return rc;
> +}
> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> index da4405062d83..f58008d6dadf 100644
> --- a/arch/arm64/kernel/sleep.S
> +++ b/arch/arm64/kernel/sleep.S
> @@ -2,6 +2,7 @@
> #include <linux/linkage.h>
> #include <asm/asm-offsets.h>
> #include <asm/assembler.h>
> +#include <asm/memory.h>
>
> .text
> /*
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 98073332e2d0..3d8284d91f4c 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -44,6 +44,16 @@ jiffies = jiffies_64;
> *(.idmap.text) \
> VMLINUX_SYMBOL(__idmap_text_end) = .;
>
> +#ifdef CONFIG_HIBERNATION
> +#define HIBERNATE_TEXT \
> + . = ALIGN(SZ_4K); \
> + VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
> + *(.hibernate_exit.text) \
> + VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
> +#else
> +#define HIBERNATE_TEXT
> +#endif
> +
> /*
> * The size of the PE/COFF section that covers the kernel image, which
> * runs from stext to _edata, must be a round multiple of the PE/COFF
> @@ -102,6 +112,7 @@ SECTIONS
> LOCK_TEXT
> HYPERVISOR_TEXT
> IDMAP_TEXT
> + HIBERNATE_TEXT
> *(.fixup)
> *(.gnu.warning)
> . = ALIGN(16);
> @@ -181,6 +192,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
> "HYP init code too big or misaligned")
> ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
> "ID map text too big or misaligned")
> +#ifdef CONFIG_HIBERNATION
> +ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
> + <= SZ_4K, "Hibernate exit text too big or misaligned")
> +#endif
>
> /*
> * If padding is applied before .head.text, virt<->phys conversions will fail.
> --
> 2.1.4
>
^ permalink raw reply [flat|nested] 9+ messages in thread