* [PATCH] c/r: define s390-specific checkpoint-restart code (v4)
@ 2009-02-23 21:39 Dan Smith
[not found] ` <1235425143-9949-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Dan Smith @ 2009-02-23 21:39 UTC (permalink / raw)
To: containers-qjLDD68F18O7TbgM5vRIOg
Implement the s390 arch-specific checkpoint/restart helpers. This
is on top of Oren Laadan's c/r code.
With these, I am able to checkpoint and restart simple programs as per
Oren's patch intro. While on x86 I never had to freeze a single task
to checkpoint it, on s390 I do need to. That is a prereq for consistent
snapshots (esp with multiple processes) anyway so I don't see that as
a problem.
Changelog:
Feb 23:
. Macro-ize the un/packing of trace flags
. Fix the crash when externally-linked
. Break out the restart functions into restart.c
. Remove unneeded s390_enable_sie() call
Jan 30:
. Switched types in cr_hdr_cpu to __u64 etc.
(Per Oren suggestion)
. Replaced direct inclusion of structs in
cr_hdr_cpu with the struct members.
(Per Oren suggestion)
. Also ended up adding a bunch of new things
into restart (mm_segment, ksp, etc) in vain
attempt to get code using fpu to not segfault
after restart.
Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
arch/s390/include/asm/checkpoint_hdr.h | 99 +++++++++++++++++++++++++++
arch/s390/include/asm/unistd.h | 4 +-
arch/s390/kernel/compat_wrapper.S | 12 +++
arch/s390/kernel/syscalls.S | 2 +
arch/s390/mm/Makefile | 1 +
arch/s390/mm/checkpoint.c | 106 +++++++++++++++++++++++++++++
arch/s390/mm/restart.c | 117 ++++++++++++++++++++++++++++++++
checkpoint/Kconfig | 2 +-
8 files changed, 341 insertions(+), 2 deletions(-)
create mode 100644 arch/s390/include/asm/checkpoint_hdr.h
create mode 100644 arch/s390/mm/checkpoint.c
create mode 100644 arch/s390/mm/restart.c
diff --git a/arch/s390/include/asm/checkpoint_hdr.h b/arch/s390/include/asm/checkpoint_hdr.h
new file mode 100644
index 0000000..6f19cfe
--- /dev/null
+++ b/arch/s390/include/asm/checkpoint_hdr.h
@@ -0,0 +1,99 @@
+#ifndef __ASM_S390_CKPT_HDR_H
+#define __ASM_S390_CKPT_HDR_H
+/*
+ * Checkpoint/restart - architecture specific headers s/390
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/types.h>
+
+/*
+ * To maintain compatibility between 32-bit and 64-bit architecture flavors,
+ * keep data 64-bit aligned: use padding for structure members, and use
+ * __attribute__((aligned (8))) for the entire structure.
+ *
+ */
+
+#ifdef __KERNEL__
+#include <asm/processor.h>
+#else
+#include <sys/user.h>
+#endif
+
+#ifdef __s390x__
+
+#define CR_390_PACK_TRACE_FLAGS(hdr, thread) \
+ do { \
+ hdr->em_instr = 0; \
+ if (thread->per_info.single_step) \
+ hdr->em_instr |= 1; \
+ if (thread->per_info.instruction_fetch) \
+ hdr->em_instr |= 2; \
+} while (0)
+
+#define CR_390_UNPACK_TRACE_FLAGS(hdr, thread) \
+ do { \
+ if (hdr->em_instr & 1) \
+ thread->per_info.single_step = 1; \
+ if (hdr->em_instr & 2) \
+ thread->per_info.instruction_fetch = 1; \
+ } while (0)
+
+/*
+ * Notes
+ * NUM_GPRS defined in <asm/ptrace.h> to be 16
+ * NUM_FPRS defined in <asm/ptrace.h> to be 16
+ * NUM_APRS defined in <asm/ptrace.h> to be 16
+ */
+struct cr_hdr_cpu {
+ __u64 args[1];
+ __u64 gprs[NUM_GPRS];
+ __u64 orig_gpr2;
+ __u16 svcnr;
+ __u16 ilc;
+ __u32 acrs[NUM_ACRS];
+ __u64 ieee_instruction_pointer;
+
+ /* psw_t */
+ __u64 psw_t_mask;
+ __u64 psw_t_addr;
+
+ /* s390_fp_regs_t */
+ __u32 fpc;
+ struct {
+ float f;
+ double d;
+ __u64 ui;
+ __u32 fp_hi;
+ __u32 fp_lo;
+ } fprs[NUM_FPRS];
+
+ /* per_struct */
+ __u64 per_control_regs[3];
+ __u32 em_instr;
+ __u64 starting_addr;
+ __u64 ending_addr;
+ __u16 perc_atmid;
+ __u64 address;
+ __u8 access_id;
+};
+
+struct cr_hdr_mm_context {
+ unsigned long vdso_base;
+ int noexec;
+ int has_pgste;
+ int alloc_pgste;
+ unsigned long asce_bits;
+ unsigned long asce_limit;
+};
+
+struct cr_hdr_head_arch {
+};
+#endif /* __s390x__ */
+
+#endif /* __ASM_S390_CKPT_HDR__H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index c8ad350..ffe64a0 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -265,7 +265,9 @@
#define __NR_pipe2 325
#define __NR_dup3 326
#define __NR_epoll_create1 327
-#define NR_syscalls 328
+#define __NR_checkpoint 328
+#define __NR_restart 329
+#define NR_syscalls 330
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index fc2c971..9546a81 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1767,3 +1767,15 @@ sys_dup3_wrapper:
sys_epoll_create1_wrapper:
lgfr %r2,%r2 # int
jg sys_epoll_create1 # branch to system call
+
+ .globl sys_checkpoint_wrapper
+sys_checkpoint_wrapper:
+ lgfr %r2,%r2 # pid_t
+ lgfr %r3,%r3 # int
+ llgfr %r4,%r4 # unsigned long
+
+ .globl sys_restart_wrapper
+sys_restart_wrapper:
+ lgfr %r2,%r2 # int
+ lgfr %r3,%r3 # int
+ llgfr %r4,%r4 # unsigned long
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2d61787..54316c8 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -336,3 +336,5 @@ SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper)
SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */
SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper)
SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
+SYSCALL(sys_checkpoint,sys_checkpoint,sys_checkpoint_wrapper)
+SYSCALL(sys_restart,sys_restart,sys_restart_wrapper)
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 2a74581..040fbb7 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -6,3 +6,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PAGE_STATES) += page-states.o
+obj-$(CONFIG_CHECKPOINT_RESTART) += checkpoint.o restart.o
diff --git a/arch/s390/mm/checkpoint.c b/arch/s390/mm/checkpoint.c
new file mode 100644
index 0000000..a588dc9
--- /dev/null
+++ b/arch/s390/mm/checkpoint.c
@@ -0,0 +1,106 @@
+/*
+ * Checkpoint/restart - architecture specific support for s390
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+
+int cr_write_thread(struct cr_ctx *ctx, struct task_struct *t)
+{
+ return 0;
+}
+
+static void cr_save_cpu_regs(struct cr_hdr_cpu *hh, struct task_struct *t)
+{
+ struct thread_struct *thread = &t->thread;
+ struct pt_regs *regs = task_pt_regs(t);
+
+ hh->fpc = thread->fp_regs.fpc;
+ memcpy(&hh->fprs, &thread->fp_regs.fprs, NUM_FPRS*sizeof(freg_t));
+ memcpy(hh->acrs, &thread->acrs[0], NUM_ACRS * sizeof(unsigned int));
+ hh->psw_t_mask = regs->psw.mask;
+ hh->psw_t_addr = regs->psw.addr;
+
+ hh->args[0] = regs->args[0];
+ hh->svcnr = regs->svcnr;
+ hh->ilc = regs->ilc;
+ memcpy(hh->gprs, regs->gprs, NUM_GPRS*sizeof(unsigned long));
+ hh->orig_gpr2 = regs->orig_gpr2;
+
+ hh->ieee_instruction_pointer = thread->ieee_instruction_pointer;
+
+ /* per_info */
+ memcpy(&hh->per_control_regs, &thread->per_info.control_regs.words,
+ 3 * sizeof(unsigned long));
+ CR_390_PACK_TRACE_FLAGS(hh, thread);
+ hh->starting_addr = thread->per_info.starting_addr;
+ hh->ending_addr = thread->per_info.ending_addr;
+ hh->perc_atmid = thread->per_info.lowcore.words.perc_atmid;
+ hh->address = thread->per_info.lowcore.words.address;
+ hh->access_id = thread->per_info.lowcore.words.access_id;
+}
+
+/* dump the cpu state and registers of a given task */
+int cr_write_cpu(struct cr_ctx *ctx, struct task_struct *t)
+{
+ struct cr_hdr h;
+ struct cr_hdr_cpu *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ int ret;
+
+ h.type = CR_HDR_CPU;
+ h.len = sizeof(*hh);
+ h.parent = task_pid_vnr(t);
+
+ cr_save_cpu_regs(hh, t);
+
+ ret = cr_write_obj(ctx, &h, hh);
+ cr_hbuf_put(ctx, sizeof(*hh));
+ WARN_ON_ONCE(ret < 0);
+
+ return ret;
+}
+
+int cr_write_head_arch(struct cr_ctx *ctx)
+{
+ return 0;
+}
+
+/* Nothing to do for mm context state */
+int cr_write_mm_context(struct cr_ctx *ctx, struct mm_struct *mm, int parent)
+{
+ struct cr_hdr h;
+ struct cr_hdr_mm_context *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ int ret;
+
+ h.type = CR_HDR_MM_CONTEXT;
+ h.len = sizeof(*hh);
+ h.parent = parent;
+
+#if 0
+ /* Oren's v13 is on an older kernel which has no vdso_base */
+ /* on newer kernel, we'll have to enable this */
+ hh->vdso_base = mm->context.vdso_base;
+ pr_debug(KERN_NOTICE "checkpointing vdso_base %lx\n", hh->vdso_base);
+#else
+ hh->vdso_base = 0;
+#endif
+ hh->noexec = mm->context.noexec;
+ hh->has_pgste = mm->context.has_pgste;
+ hh->alloc_pgste = mm->context.alloc_pgste;
+ hh->asce_bits = mm->context.asce_bits;
+ hh->asce_limit = mm->context.asce_limit;
+
+ ret = cr_write_obj(ctx, &h, hh);
+ cr_hbuf_put(ctx, sizeof(*hh));
+
+ return ret;
+}
diff --git a/arch/s390/mm/restart.c b/arch/s390/mm/restart.c
new file mode 100644
index 0000000..32b1b15
--- /dev/null
+++ b/arch/s390/mm/restart.c
@@ -0,0 +1,117 @@
+/*
+ * Checkpoint/restart - architecture specific support for s390
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+
+int cr_read_thread(struct cr_ctx *ctx)
+{
+ return 0;
+}
+
+int cr_read_cpu(struct cr_ctx *ctx)
+{
+ struct cr_hdr_cpu *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct thread_struct *thread = ¤t->thread;
+ struct pt_regs *regs = task_pt_regs(current);
+ int parent, ret;
+
+ parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_CPU);
+ if (parent < 0) {
+ ret = parent;
+ goto out;
+ }
+ ret = 0;
+
+ regs->psw.addr &= ~PSW_ADDR_INSN;
+ regs->psw.addr |= hh->psw_t_addr & PSW_ADDR_INSN;
+
+ regs->args[0] = hh->args[0];
+
+ /* ilc is syscall restart addr. it looks like we must
+ restore it since we restore psw.addr */
+ regs->ilc = hh->ilc;
+ memcpy(regs->gprs, hh->gprs, NUM_GPRS*sizeof(unsigned long));
+ regs->orig_gpr2 = hh->orig_gpr2;
+ /* svcnr is the current system call. but it's on ptregs so
+ restore it */
+ regs->svcnr = hh->svcnr;
+
+ memcpy(thread->acrs, hh->acrs, NUM_ACRS * sizeof(unsigned int));
+ restore_access_regs(thread->acrs);
+
+ /* ieee_instruction_pointer points to a faulting address
+ on FPE. restore it (for ptrace) */
+ thread->ieee_instruction_pointer = hh->ieee_instruction_pointer;
+
+ /* s390_fp_regs_t */
+ thread->fp_regs.fpc = hh->fpc;
+ memcpy(&thread->fp_regs.fprs, &hh->fprs, NUM_FPRS*sizeof(freg_t));
+
+ /* per_struct - restore it */
+ memcpy(&thread->per_info.control_regs.words, &hh->per_control_regs,
+ 3 * sizeof(unsigned long));
+ CR_390_UNPACK_TRACE_FLAGS(hh, thread);
+ thread->per_info.starting_addr = hh->starting_addr;
+ thread->per_info.ending_addr = hh->ending_addr;
+ thread->per_info.lowcore.words.perc_atmid = hh->perc_atmid;
+ thread->per_info.lowcore.words.address = hh->address;
+ thread->per_info.lowcore.words.access_id = hh->access_id;
+
+out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
+
+int cr_read_head_arch(struct cr_ctx *ctx)
+{
+ return 0;
+}
+
+
+int cr_read_mm_context(struct cr_ctx *ctx, struct mm_struct *mm, int rparent)
+{
+ struct cr_hdr_mm_context *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ int parent, ret = -EINVAL;
+
+ parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_MM_CONTEXT);
+ if (parent < 0) {
+ ret = parent;
+ goto out;
+ }
+ if (parent != rparent)
+ goto out;
+
+#if 0
+ /* enable this when s390 defines vdso-base */
+ mm->context.vdso_base = hh->vdso_base;
+ pr_debug("read vdso_base %lx\n", hh->vdso_base);
+#endif
+ pr_debug("i had nx %d haspgste %d apgste %d bits %lx limit %lx\n",
+ mm->context.noexec, mm->context.has_pgste,
+ mm->context.alloc_pgste, mm->context.asce_bits,
+ mm->context.asce_limit);
+ mm->context.noexec = hh->noexec;
+ mm->context.has_pgste = hh->has_pgste;
+ mm->context.alloc_pgste = hh->alloc_pgste;
+ mm->context.asce_limit = hh->asce_limit;
+ mm->context.asce_bits = hh->asce_bits;
+ pr_debug("restoring nx %d haspgste %d apgste %d bits %lx limit %lx\n",
+ hh->noexec, hh->has_pgste, hh->alloc_pgste,
+ hh->asce_bits, hh->asce_limit);
+ ret = 0;
+
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
diff --git a/checkpoint/Kconfig b/checkpoint/Kconfig
index ffaa635..cb1d29d 100644
--- a/checkpoint/Kconfig
+++ b/checkpoint/Kconfig
@@ -1,7 +1,7 @@
config CHECKPOINT_RESTART
prompt "Enable checkpoint/restart (EXPERIMENTAL)"
def_bool n
- depends on X86_32 && EXPERIMENTAL
+ depends on (X86_32 || (S390 && 64BIT)) && EXPERIMENTAL
help
Application checkpoint/restart is the ability to save the
state of a running application so that it can later resume
--
1.6.1
^ permalink raw reply related [flat|nested] 6+ messages in thread[parent not found: <1235425143-9949-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] c/r: define s390-specific checkpoint-restart code (v4) [not found] ` <1235425143-9949-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2009-02-23 22:31 ` Dave Hansen 2009-02-23 23:04 ` Dan Smith 2009-02-24 15:23 ` Serge E. Hallyn 0 siblings, 2 replies; 6+ messages in thread From: Dave Hansen @ 2009-02-23 22:31 UTC (permalink / raw) To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg On Mon, 2009-02-23 at 16:39 -0500, Dan Smith wrote: > +#define CR_390_PACK_TRACE_FLAGS(hdr, thread) \ > + do { \ > + hdr->em_instr = 0; \ > + if (thread->per_info.single_step) \ > + hdr->em_instr |= 1; \ > + if (thread->per_info.instruction_fetch) \ > + hdr->em_instr |= 2; \ > +} while (0) > + > +#define CR_390_UNPACK_TRACE_FLAGS(hdr, thread) \ > + do { \ > + if (hdr->em_instr & 1) \ > + thread->per_info.single_step = 1; \ > + if (hdr->em_instr & 2) \ > + thread->per_info.instruction_fetch = 1; \ > + } while (0) Why are these macros? Why do we need to pack them, anyway? > +/* > + * Notes > + * NUM_GPRS defined in <asm/ptrace.h> to be 16 > + * NUM_FPRS defined in <asm/ptrace.h> to be 16 > + * NUM_APRS defined in <asm/ptrace.h> to be 16 > + */ > +struct cr_hdr_cpu { > + __u64 args[1]; > + __u64 gprs[NUM_GPRS]; > + __u64 orig_gpr2; > + __u16 svcnr; > + __u16 ilc; > + __u32 acrs[NUM_ACRS]; > + __u64 ieee_instruction_pointer; > + > + /* psw_t */ > + __u64 psw_t_mask; > + __u64 psw_t_addr; > + > + /* s390_fp_regs_t */ > + __u32 fpc; > + struct { > + float f; > + double d; > + __u64 ui; > + __u32 fp_hi; > + __u32 fp_lo; > + } fprs[NUM_FPRS]; I don't see a lot of floating point code go by, so I'm a bit stupid on this. But, this doesn't make a lot of sense to me. Are there really parallel sets of float and double registers? Or, do we want some kind of union here? > + /* per_struct */ > + __u64 per_control_regs[3]; > + __u32 em_instr; > + __u64 starting_addr; > + __u64 ending_addr; > + __u16 perc_atmid; > + __u64 address; > + __u8 access_id; > +}; > + > +struct cr_hdr_mm_context { > + unsigned long vdso_base; > + int noexec; > + int has_pgste; > + int alloc_pgste; > + unsigned long asce_bits; > + unsigned long asce_limit; > +}; > + > +struct cr_hdr_head_arch { > +}; > +#endif /* __s390x__ */ > + > +#endif /* __ASM_S390_CKPT_HDR__H */ > diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h > index c8ad350..ffe64a0 100644 > --- a/arch/s390/include/asm/unistd.h > +++ b/arch/s390/include/asm/unistd.h > @@ -265,7 +265,9 @@ > #define __NR_pipe2 325 > #define __NR_dup3 326 > #define __NR_epoll_create1 327 > -#define NR_syscalls 328 > +#define __NR_checkpoint 328 > +#define __NR_restart 329 > +#define NR_syscalls 330 > > /* > * There are some system calls that are not present on 64 bit, some > diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S > index fc2c971..9546a81 100644 > --- a/arch/s390/kernel/compat_wrapper.S > +++ b/arch/s390/kernel/compat_wrapper.S > @@ -1767,3 +1767,15 @@ sys_dup3_wrapper: > sys_epoll_create1_wrapper: > lgfr %r2,%r2 # int > jg sys_epoll_create1 # branch to system call > + > + .globl sys_checkpoint_wrapper > +sys_checkpoint_wrapper: > + lgfr %r2,%r2 # pid_t > + lgfr %r3,%r3 # int > + llgfr %r4,%r4 # unsigned long > + > + .globl sys_restart_wrapper > +sys_restart_wrapper: > + lgfr %r2,%r2 # int > + lgfr %r3,%r3 # int > + llgfr %r4,%r4 # unsigned long So, all s390 syscalls need these wrappers? They don't do anything in particular to help c/r, right? > +static void cr_save_cpu_regs(struct cr_hdr_cpu *hh, struct task_struct *t) > +{ > + struct thread_struct *thread = &t->thread; > + struct pt_regs *regs = task_pt_regs(t); > + > + hh->fpc = thread->fp_regs.fpc; > + memcpy(&hh->fprs, &thread->fp_regs.fprs, NUM_FPRS*sizeof(freg_t)); > + memcpy(hh->acrs, &thread->acrs[0], NUM_ACRS * sizeof(unsigned int)); One minor issue with all of these memcpy()s is that they're not stupid-proof. To figure out whether the 'sizeof(unsigned int)' is correct, I need to go back and look to ensure that hh->acrs is, in fact, an 'unsigned int'. But if you do: memcpy(hh->acrs, &thread->acrs[0], sizeof(hh->acrs)); it will always be right unless you turn hh->acrs into a plain pointer. But, even if you change the type to, say, a u64 or something, it will continue to work. > + hh->psw_t_mask = regs->psw.mask; > + hh->psw_t_addr = regs->psw.addr; > + > + hh->args[0] = regs->args[0]; Why is this an array? > + hh->svcnr = regs->svcnr; > + hh->ilc = regs->ilc; > + memcpy(hh->gprs, regs->gprs, NUM_GPRS*sizeof(unsigned long)); > + hh->orig_gpr2 = regs->orig_gpr2; > + > + hh->ieee_instruction_pointer = thread->ieee_instruction_pointer; > + > + /* per_info */ > + memcpy(&hh->per_control_regs, &thread->per_info.control_regs.words, > + 3 * sizeof(unsigned long)); This '3' is a magic number and is used in a few places. Does it make sense to define it as a variable. > + CR_390_PACK_TRACE_FLAGS(hh, thread); > + hh->starting_addr = thread->per_info.starting_addr; > + hh->ending_addr = thread->per_info.ending_addr; > + hh->perc_atmid = thread->per_info.lowcore.words.perc_atmid; > + hh->address = thread->per_info.lowcore.words.address; > + hh->access_id = thread->per_info.lowcore.words.access_id; > +} > + > +/* dump the cpu state and registers of a given task */ > +int cr_write_cpu(struct cr_ctx *ctx, struct task_struct *t) > +{ > + struct cr_hdr h; > + struct cr_hdr_cpu *hh = cr_hbuf_get(ctx, sizeof(*hh)); > + int ret; > + > + h.type = CR_HDR_CPU; > + h.len = sizeof(*hh); > + h.parent = task_pid_vnr(t); I'm starting to think we need a macro for this or something. We repeat these three lines of code way too often. > + cr_save_cpu_regs(hh, t); > + > + ret = cr_write_obj(ctx, &h, hh); > + cr_hbuf_put(ctx, sizeof(*hh)); > + WARN_ON_ONCE(ret < 0); > + > + return ret; > +} The WARN_ON() probably shouldn't be there when this is submitted. > +int cr_write_head_arch(struct cr_ctx *ctx) > +{ > + return 0; > +} > + > +/* Nothing to do for mm context state */ > +int cr_write_mm_context(struct cr_ctx *ctx, struct mm_struct *mm, int parent) > +{ > + struct cr_hdr h; > + struct cr_hdr_mm_context *hh = cr_hbuf_get(ctx, sizeof(*hh)); > + int ret; > + > + h.type = CR_HDR_MM_CONTEXT; > + h.len = sizeof(*hh); > + h.parent = parent; > + > +#if 0 > + /* Oren's v13 is on an older kernel which has no vdso_base */ > + /* on newer kernel, we'll have to enable this */ > + hh->vdso_base = mm->context.vdso_base; > + pr_debug(KERN_NOTICE "checkpointing vdso_base %lx\n", hh->vdso_base); > +#else > + hh->vdso_base = 0; > +#endif > + hh->noexec = mm->context.noexec; > + hh->has_pgste = mm->context.has_pgste; > + hh->alloc_pgste = mm->context.alloc_pgste; > + hh->asce_bits = mm->context.asce_bits; > + hh->asce_limit = mm->context.asce_limit; > + > + ret = cr_write_obj(ctx, &h, hh); > + cr_hbuf_put(ctx, sizeof(*hh)); > + > + return ret; > +} > diff --git a/arch/s390/mm/restart.c b/arch/s390/mm/restart.c > new file mode 100644 > index 0000000..32b1b15 > --- /dev/null > +++ b/arch/s390/mm/restart.c > @@ -0,0 +1,117 @@ > +/* > + * Checkpoint/restart - architecture specific support for s390 > + * > + * Copyright IBM Corp. 2009 > + * > + * This file is subject to the terms and conditions of the GNU General Public > + * License. See the file COPYING in the main directory of the Linux > + * distribution for more details. > + */ > + > +#include <linux/checkpoint.h> > +#include <linux/checkpoint_hdr.h> > +#include <linux/kernel.h> > +#include <asm/system.h> > +#include <asm/pgtable.h> > + > +int cr_read_thread(struct cr_ctx *ctx) > +{ > + return 0; > +} > + > +int cr_read_cpu(struct cr_ctx *ctx) > +{ > + struct cr_hdr_cpu *hh = cr_hbuf_get(ctx, sizeof(*hh)); > + struct thread_struct *thread = ¤t->thread; > + struct pt_regs *regs = task_pt_regs(current); > + int parent, ret; > + > + parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_CPU); > + if (parent < 0) { > + ret = parent; > + goto out; > + } > + ret = 0; > + > + regs->psw.addr &= ~PSW_ADDR_INSN; > + regs->psw.addr |= hh->psw_t_addr & PSW_ADDR_INSN; > + > + regs->args[0] = hh->args[0]; > + > + /* ilc is syscall restart addr. it looks like we must > + restore it since we restore psw.addr */ Don't forget CodingStyle on these, please. > + regs->ilc = hh->ilc; > + memcpy(regs->gprs, hh->gprs, NUM_GPRS*sizeof(unsigned long)); > + regs->orig_gpr2 = hh->orig_gpr2; > + /* svcnr is the current system call. but it's on ptregs so > + restore it */ > + regs->svcnr = hh->svcnr; > + > + memcpy(thread->acrs, hh->acrs, NUM_ACRS * sizeof(unsigned int)); > + restore_access_regs(thread->acrs); > + > + /* ieee_instruction_pointer points to a faulting address > + on FPE. restore it (for ptrace) */ > + thread->ieee_instruction_pointer = hh->ieee_instruction_pointer; > + > + /* s390_fp_regs_t */ > + thread->fp_regs.fpc = hh->fpc; > + memcpy(&thread->fp_regs.fprs, &hh->fprs, NUM_FPRS*sizeof(freg_t)); > + > + /* per_struct - restore it */ > + memcpy(&thread->per_info.control_regs.words, &hh->per_control_regs, > + 3 * sizeof(unsigned long)); > + CR_390_UNPACK_TRACE_FLAGS(hh, thread); > + thread->per_info.starting_addr = hh->starting_addr; > + thread->per_info.ending_addr = hh->ending_addr; > + thread->per_info.lowcore.words.perc_atmid = hh->perc_atmid; > + thread->per_info.lowcore.words.address = hh->address; > + thread->per_info.lowcore.words.access_id = hh->access_id; > + > +out: > + cr_hbuf_put(ctx, sizeof(*hh)); > + return ret; > +} > + I'm struggling to figure out how we're going to keep up with keeping checkpoint and restart synchronized. This is all pretty mindless copying in both directions. Is it possible and worthwhile for us to just define it once, but use it for both checkpoint and restart with some macro trickery? #define CKPT 1 #define RST 2 #define CR_COPY(op, a, b) do { WARN_ON(sizeof(a) != sizeof(b)); if (op == CKPT) memcpy(&a, &b, sizeof(b)); else memcpy(&b, &a, sizeof(b)); } while (0); void s390_cr_regs(int op, struct task_struct *thread, *hh) { CR_COPY(op, thread->per_info.lowcore.words.perc_atmid, hh->perc_atmid); CR_COPY(op, thread->per_info.lowcore.words.address, hh->address); CR_COPY(op, thread->per_info.lowcore.words.access_id, hh->access_id); ... } int cr_read_cpu(struct cr_ctx *ctx) { s390_cr_regs(RST, thread, hh); } int cr_save_cpu(struct cr_ctx *ctx) { s390_cr_regs(CKPT, thread, hh); } That works for both regular variables and for arrays. Is the decreased line count worth the weirdo non-standard abstraction? -- Dave ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] c/r: define s390-specific checkpoint-restart code (v4) 2009-02-23 22:31 ` Dave Hansen @ 2009-02-23 23:04 ` Dan Smith [not found] ` <87fxi492lr.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org> 2009-02-24 15:23 ` Serge E. Hallyn 1 sibling, 1 reply; 6+ messages in thread From: Dan Smith @ 2009-02-23 23:04 UTC (permalink / raw) To: Dave Hansen; +Cc: containers-qjLDD68F18O7TbgM5vRIOg > +#define CR_390_PACK_TRACE_FLAGS(hdr, thread) \ >> + do { \ >> + hdr->em_instr = 0; \ >> + if (thread->per_info.single_step) \ >> + hdr->em_instr |= 1; \ >> + if (thread->per_info.instruction_fetch) \ >> + hdr->em_instr |= 2; \ >> +} while (0) >> + >> +#define CR_390_UNPACK_TRACE_FLAGS(hdr, thread) \ >> + do { \ >> + if (hdr->em_instr & 1) \ >> + thread->per_info.single_step = 1; \ >> + if (hdr->em_instr & 2) \ >> + thread->per_info.instruction_fetch = 1; \ >> + } while (0) DH> Why are these macros? Well, almost the exact code above was spread between the checkpoint and restart paths. I think that this makes it a little clearer what 1 and 2 are since you can see them together. DH> Why do we need to pack them, anyway? I assume Serge was trying not to waste a word per flag, and expecting more flags to be needed to justify the savings. >> +/* >> + * Notes >> + * NUM_GPRS defined in <asm/ptrace.h> to be 16 >> + * NUM_FPRS defined in <asm/ptrace.h> to be 16 >> + * NUM_APRS defined in <asm/ptrace.h> to be 16 >> + */ >> +struct cr_hdr_cpu { >> + __u64 args[1]; >> + __u64 gprs[NUM_GPRS]; >> + __u64 orig_gpr2; >> + __u16 svcnr; >> + __u16 ilc; >> + __u32 acrs[NUM_ACRS]; >> + __u64 ieee_instruction_pointer; >> + >> + /* psw_t */ >> + __u64 psw_t_mask; >> + __u64 psw_t_addr; >> + >> + /* s390_fp_regs_t */ >> + __u32 fpc; >> + struct { >> + float f; >> + double d; >> + __u64 ui; >> + __u32 fp_hi; >> + __u32 fp_lo; >> + } fprs[NUM_FPRS]; DH> I don't see a lot of floating point code go by, so I'm a bit DH> stupid on this. But, this doesn't make a lot of sense to me. Are DH> there really parallel sets of float and double registers? Or, do DH> we want some kind of union here? Yeah, this looks like an improper copy of the freg_t union in s390/include/asm/ptrace.h to me. Serge, was there a specific reason for doing this? If not I'll make it mirror the actual definition. >> diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S >> index fc2c971..9546a81 100644 >> --- a/arch/s390/kernel/compat_wrapper.S >> +++ b/arch/s390/kernel/compat_wrapper.S >> @@ -1767,3 +1767,15 @@ sys_dup3_wrapper: >> sys_epoll_create1_wrapper: >> lgfr %r2,%r2 # int >> jg sys_epoll_create1 # branch to system call >> + >> + .globl sys_checkpoint_wrapper >> +sys_checkpoint_wrapper: >> + lgfr %r2,%r2 # pid_t >> + lgfr %r3,%r3 # int >> + llgfr %r4,%r4 # unsigned long >> + >> + .globl sys_restart_wrapper >> +sys_restart_wrapper: >> + lgfr %r2,%r2 # int >> + lgfr %r3,%r3 # int >> + llgfr %r4,%r4 # unsigned long DH> So, all s390 syscalls need these wrappers? They don't do anything DH> in particular to help c/r, right? Looking at the existing file, I'd say everything is wrapped that way. You'd have to ask someone who knows about s390 for the reason why, I guess :) DH> One minor issue with all of these memcpy()s is that they're not DH> stupid-proof. To figure out whether the 'sizeof(unsigned int)' is DH> correct, I need to go back and look to ensure that hh->acrs is, in DH> fact, an 'unsigned int'. But if you do: DH> memcpy(hh->acrs, &thread->acrs[0], sizeof(hh->acrs)); This doesn't copy all of the access registers though, just the first one. Perhaps you were thrown off by the use of &thread->acrs[0]? I'd prefer to just use thread->acrs there for clarity, so I'll make that change as well. >> + hh->psw_t_mask = regs->psw.mask; >> + hh->psw_t_addr = regs->psw.addr; >> + >> + hh->args[0] = regs->args[0]; DH> Why is this an array? Because it's an array of 1 in regs->args. I'll add a comment :) >> + hh->svcnr = regs->svcnr; >> + hh->ilc = regs->ilc; >> + memcpy(hh->gprs, regs->gprs, NUM_GPRS*sizeof(unsigned long)); >> + hh->orig_gpr2 = regs->orig_gpr2; >> + >> + hh->ieee_instruction_pointer = thread->ieee_instruction_pointer; >> + >> + /* per_info */ >> + memcpy(&hh->per_control_regs, &thread->per_info.control_regs.words, >> + 3 * sizeof(unsigned long)); DH> This '3' is a magic number and is used in a few places. Does it DH> make sense to define it as a variable. You know, I said the same thing when I reviewed this set initially. However the actual s390 code where its defined uses the magic 3, so if we define a constant we could still be out of sync with the actual definition. >> + cr_save_cpu_regs(hh, t); >> + >> + ret = cr_write_obj(ctx, &h, hh); >> + cr_hbuf_put(ctx, sizeof(*hh)); >> + WARN_ON_ONCE(ret < 0); >> + >> + return ret; >> +} DH> The WARN_ON() probably shouldn't be there when this is submitted. Okay. >> + /* ilc is syscall restart addr. it looks like we must >> + restore it since we restore psw.addr */ DH> Don't forget CodingStyle on these, please. Okay. DH> I'm struggling to figure out how we're going to keep up with DH> keeping checkpoint and restart synchronized. This is all pretty DH> mindless copying in both directions. Is it possible and DH> worthwhile for us to just define it once, but use it for both DH> checkpoint and restart with some macro trickery? DH> #define CKPT 1 DH> #define RST 2 DH> #define CR_COPY(op, a, b) DH> do { DH> WARN_ON(sizeof(a) != sizeof(b)); DH> if (op == CKPT) DH> memcpy(&a, &b, sizeof(b)); DH> else DH> memcpy(&b, &a, sizeof(b)); DH> } while (0); DH> void s390_cr_regs(int op, struct task_struct *thread, *hh) DH> { DH> CR_COPY(op, thread->per_info.lowcore.words.perc_atmid, hh->perc_atmid); DH> CR_COPY(op, thread->per_info.lowcore.words.address, hh->address); DH> CR_COPY(op, thread->per_info.lowcore.words.access_id, hh->access_id); DH> ... DH> } DH> int cr_read_cpu(struct cr_ctx *ctx) DH> { DH> s390_cr_regs(RST, thread, hh); DH> } DH> int cr_save_cpu(struct cr_ctx *ctx) DH> { DH> s390_cr_regs(CKPT, thread, hh); DH> } DH> That works for both regular variables and for arrays. Is the DH> decreased line count worth the weirdo non-standard abstraction? It looks cleaner to me, so I'm happy to change it if people think that would be better. -- Dan Smith IBM Linux Technology Center email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <87fxi492lr.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>]
* Re: [PATCH] c/r: define s390-specific checkpoint-restart code (v4) [not found] ` <87fxi492lr.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org> @ 2009-02-24 0:27 ` Dave Hansen 2009-02-24 15:24 ` Serge E. Hallyn 1 sibling, 0 replies; 6+ messages in thread From: Dave Hansen @ 2009-02-24 0:27 UTC (permalink / raw) To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg On Mon, 2009-02-23 at 15:04 -0800, Dan Smith wrote: > > +#define CR_390_PACK_TRACE_FLAGS(hdr, thread) \ > >> + do { \ > >> + hdr->em_instr = 0; \ > >> + if (thread->per_info.single_step) \ > >> + hdr->em_instr |= 1; \ > >> + if (thread->per_info.instruction_fetch) \ > >> + hdr->em_instr |= 2; \ > >> +} while (0) > >> + > >> +#define CR_390_UNPACK_TRACE_FLAGS(hdr, thread) \ > >> + do { \ > >> + if (hdr->em_instr & 1) \ > >> + thread->per_info.single_step = 1; \ > >> + if (hdr->em_instr & 2) \ > >> + thread->per_info.instruction_fetch = 1; \ > >> + } while (0) > > DH> Why are these macros? > > Well, almost the exact code above was spread between the checkpoint > and restart paths. I think that this makes it a little clearer what 1 > and 2 are since you can see them together. I'm just saying to use a 'static inline' instead. Can they be functions instead of macros. > DH> Why do we need to pack them, anyway? > > I assume Serge was trying not to waste a word per flag, and expecting > more flags to be needed to justify the savings. I actually think consistency with all the other fields and uniformity in looking at the code is probably worth 14 bits of waste. > >> + hh->svcnr = regs->svcnr; > >> + hh->ilc = regs->ilc; > >> + memcpy(hh->gprs, regs->gprs, NUM_GPRS*sizeof(unsigned long)); > >> + hh->orig_gpr2 = regs->orig_gpr2; > >> + > >> + hh->ieee_instruction_pointer = thread->ieee_instruction_pointer; > >> + > >> + /* per_info */ > >> + memcpy(&hh->per_control_regs, &thread->per_info.control_regs.words, > >> + 3 * sizeof(unsigned long)); > > DH> This '3' is a magic number and is used in a few places. Does it > DH> make sense to define it as a variable. > > You know, I said the same thing when I reviewed this set initially. > However the actual s390 code where its defined uses the magic 3, so if > we define a constant we could still be out of sync with the actual > definition. Then go fix the dang s390 code! It is already bad style, so let's not propagate it further. The fact that you (Serge) could do this patch without touching *any* current s390 code is really a sign that it is done wrong, not right. ;) > > DH> I'm struggling to figure out how we're going to keep up with > DH> keeping checkpoint and restart synchronized. This is all pretty > DH> mindless copying in both directions. Is it possible and > DH> worthwhile for us to just define it once, but use it for both > DH> checkpoint and restart with some macro trickery? > > DH> #define CKPT 1 > DH> #define RST 2 > > DH> #define CR_COPY(op, a, b) > DH> do { > DH> WARN_ON(sizeof(a) != sizeof(b)); > DH> if (op == CKPT) > DH> memcpy(&a, &b, sizeof(b)); > DH> else > DH> memcpy(&b, &a, sizeof(b)); > DH> } while (0); > > DH> void s390_cr_regs(int op, struct task_struct *thread, *hh) > DH> { > DH> CR_COPY(op, thread->per_info.lowcore.words.perc_atmid, hh->perc_atmid); > DH> CR_COPY(op, thread->per_info.lowcore.words.address, hh->address); > DH> CR_COPY(op, thread->per_info.lowcore.words.access_id, hh->access_id); > DH> ... > DH> } > > DH> int cr_read_cpu(struct cr_ctx *ctx) > DH> { > DH> s390_cr_regs(RST, thread, hh); > DH> } > > DH> int cr_save_cpu(struct cr_ctx *ctx) > DH> { > DH> s390_cr_regs(CKPT, thread, hh); > DH> } > > DH> That works for both regular variables and for arrays. Is the > DH> decreased line count worth the weirdo non-standard abstraction? > > It looks cleaner to me, so I'm happy to change it if people think that > would be better. Yeah, I'm just curious what everyone thinks as a whole. Keep it in the back of your mind. -- Dave ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] c/r: define s390-specific checkpoint-restart code (v4) [not found] ` <87fxi492lr.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org> 2009-02-24 0:27 ` Dave Hansen @ 2009-02-24 15:24 ` Serge E. Hallyn 1 sibling, 0 replies; 6+ messages in thread From: Serge E. Hallyn @ 2009-02-24 15:24 UTC (permalink / raw) To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg, Dave Hansen Quoting Dan Smith (danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org): > Yeah, this looks like an improper copy of the freg_t union in > s390/include/asm/ptrace.h to me. Serge, was there a specific reason > for doing this? Yeah I messed up :) -serge ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] c/r: define s390-specific checkpoint-restart code (v4) 2009-02-23 22:31 ` Dave Hansen 2009-02-23 23:04 ` Dan Smith @ 2009-02-24 15:23 ` Serge E. Hallyn 1 sibling, 0 replies; 6+ messages in thread From: Serge E. Hallyn @ 2009-02-24 15:23 UTC (permalink / raw) To: Dave Hansen; +Cc: containers-qjLDD68F18O7TbgM5vRIOg, Dan Smith Quoting Dave Hansen (dave-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org): > > + /* s390_fp_regs_t */ > > + __u32 fpc; > > + struct { > > + float f; > > + double d; > > + __u64 ui; > > + __u32 fp_hi; > > + __u32 fp_lo; > > + } fprs[NUM_FPRS]; > > I don't see a lot of floating point code go by, so I'm a bit stupid on > this. But, this doesn't make a lot of sense to me. Are there really > parallel sets of float and double registers? Or, do we want some kind > of union here? Heh, yeah, freg_t in arch/s390/include/asm/ptrace.h is a union. Which as you note ought to have been obvious based on the union members. My bad. > I'm struggling to figure out how we're going to keep up with keeping > checkpoint and restart synchronized. This is all pretty mindless > copying in both directions. Is it possible and worthwhile for us to > just define it once, but use it for both checkpoint and restart with > some macro trickery? > > #define CKPT 1 > #define RST 2 > > #define CR_COPY(op, a, b) > do { > WARN_ON(sizeof(a) != sizeof(b)); > if (op == CKPT) > memcpy(&a, &b, sizeof(b)); > else > memcpy(&b, &a, sizeof(b)); > } while (0); > > void s390_cr_regs(int op, struct task_struct *thread, *hh) > { > CR_COPY(op, thread->per_info.lowcore.words.perc_atmid, hh->perc_atmid); > CR_COPY(op, thread->per_info.lowcore.words.address, hh->address); > CR_COPY(op, thread->per_info.lowcore.words.access_id, hh->access_id); > ... > } > > int cr_read_cpu(struct cr_ctx *ctx) > { > s390_cr_regs(RST, thread, hh); > } > > int cr_save_cpu(struct cr_ctx *ctx) > { > s390_cr_regs(CKPT, thread, hh); > } > > That works for both regular variables and for arrays. Is the decreased > line count worth the weirdo non-standard abstraction? Well it's not necessarily nicer to read, but you're right it will almost doubtless prevent at least one bug resulting from update snafu between checkpoint and restart code. Great idea! Should we try and do that as much as possible for the x86 code too? -serge ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2009-02-24 15:24 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-23 21:39 [PATCH] c/r: define s390-specific checkpoint-restart code (v4) Dan Smith
[not found] ` <1235425143-9949-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-02-23 22:31 ` Dave Hansen
2009-02-23 23:04 ` Dan Smith
[not found] ` <87fxi492lr.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2009-02-24 0:27 ` Dave Hansen
2009-02-24 15:24 ` Serge E. Hallyn
2009-02-24 15:23 ` Serge E. Hallyn
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.