All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Hansen <dave@linux.vnet.ibm.com>
To: arnd@arndb.de
Cc: orenl@cs.columbia.edu, jeremy@goop.org,
	containers@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org,
	Dave Hansen <dave@linux.vnet.ibm.com>
Subject: [RFC v2][PATCH 3/9] checkpoint/restart: x86 support
Date: Wed, 20 Aug 2008 12:26:00 -0700	[thread overview]
Message-ID: <20080820192600.B0F4DD62@nimitz> (raw)
In-Reply-To: <20080820192557.98788FAB@nimitz>


The original version of Oren's patch contained a good hunk
of #ifdefs.  I've extracted all of those and created a bit
of an API for new architectures to follow.

Leaving Oren's sign-off because this is all still his code,
even though he hasn't seen it mangled like this before.

Signed-off-by: Oren Laadan <orenl@cs.columbia.edu>
---

 oren-cr.git-dave/checkpoint/Makefile     |    1 
 oren-cr.git-dave/checkpoint/checkpoint.c |    7 
 oren-cr.git-dave/checkpoint/ckpt.h       |    3 
 oren-cr.git-dave/checkpoint/ckpt_arch.h  |    6 
 oren-cr.git-dave/checkpoint/restart.c    |    7 
 oren-cr.git-dave/checkpoint/x86.c        |  270 +++++++++++++++++++++++++++++++
 oren-cr.git-dave/include/asm-x86/ckpt.h  |   46 +++++
 7 files changed, 340 insertions(+)

diff -puN checkpoint/checkpoint.c~0004-checkpoint-restart-x86-support checkpoint/checkpoint.c
--- oren-cr.git/checkpoint/checkpoint.c~0004-checkpoint-restart-x86-support	2008-08-20 12:12:49.000000000 -0700
+++ oren-cr.git-dave/checkpoint/checkpoint.c	2008-08-20 12:12:49.000000000 -0700
@@ -20,6 +20,7 @@
 
 #include "ckpt.h"
 #include "ckpt_hdr.h"
+#include "ckpt_arch.h"
 
 /**
  * cr_get_fname - return pathname of a given file
@@ -184,6 +185,12 @@ static int cr_write_task(struct cr_ctx *
 
 	ret = cr_write_task_struct(ctx, t);
 	pr_debug("ret (task_struct) %d\n", ret);
+	if (!ret)
+		ret = cr_write_thread(ctx, t);
+	pr_debug("ret (thread) %d\n", ret);
+	if (!ret)
+		ret = cr_write_cpu(ctx, t);
+	pr_debug("ret (cpu) %d\n", ret);
 
 	return ret;
 }
diff -puN /dev/null checkpoint/ckpt_arch.h
--- /dev/null	2008-04-22 10:49:52.000000000 -0700
+++ oren-cr.git-dave/checkpoint/ckpt_arch.h	2008-08-20 12:12:49.000000000 -0700
@@ -0,0 +1,6 @@
+#include "ckpt.h"
+
+int cr_write_thread(struct cr_ctx *ctx, struct task_struct *t);
+int cr_write_cpu(struct cr_ctx *ctx, struct task_struct *t);
+int cr_read_thread(struct cr_ctx *ctx);
+int cr_read_cpu(struct cr_ctx *ctx);
diff -puN checkpoint/ckpt.h~0004-checkpoint-restart-x86-support checkpoint/ckpt.h
--- oren-cr.git/checkpoint/ckpt.h~0004-checkpoint-restart-x86-support	2008-08-20 12:12:49.000000000 -0700
+++ oren-cr.git-dave/checkpoint/ckpt.h	2008-08-20 12:12:49.000000000 -0700
@@ -54,6 +54,9 @@ extern int cr_kwrite(struct cr_ctx *ctx,
 extern int cr_uread(struct cr_ctx *ctx, void *buf, int count);
 extern int cr_kread(struct cr_ctx *ctx, void *buf, int count);
 
+extern void *cr_hbuf_get(struct cr_ctx *ctx, int size);
+extern void cr_hbuf_put(struct cr_ctx *ctx, int n);
+
 struct cr_hdr;
 
 extern int cr_write_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf);
diff -puN checkpoint/Makefile~0004-checkpoint-restart-x86-support checkpoint/Makefile
--- oren-cr.git/checkpoint/Makefile~0004-checkpoint-restart-x86-support	2008-08-20 12:12:49.000000000 -0700
+++ oren-cr.git-dave/checkpoint/Makefile	2008-08-20 12:12:49.000000000 -0700
@@ -1 +1,2 @@
 obj-y += sys.o checkpoint.o restart.o
+obj-$(CONFIG_X86) += x86.o
diff -puN checkpoint/restart.c~0004-checkpoint-restart-x86-support checkpoint/restart.c
--- oren-cr.git/checkpoint/restart.c~0004-checkpoint-restart-x86-support	2008-08-20 12:12:49.000000000 -0700
+++ oren-cr.git-dave/checkpoint/restart.c	2008-08-20 12:12:49.000000000 -0700
@@ -22,6 +22,7 @@
 
 #include "ckpt.h"
 #include "ckpt_hdr.h"
+#include "ckpt_arch.h"
 
 /**
  * cr_hbuf_get - reserve space on the hbuf
@@ -172,6 +173,12 @@ static int cr_read_task(struct cr_ctx *c
 
 	ret = cr_read_task_struct(ctx);
 	pr_debug("ret (task_struct) %d\n", ret);
+	if (!ret)
+		ret = cr_read_thread(ctx);
+	pr_debug("ret (thread) %d\n", ret);
+	if (!ret)
+		ret = cr_read_cpu(ctx);
+	pr_debug("ret (cpu) %d\n", ret);
 
 	return ret;
 }
diff -puN /dev/null checkpoint/x86.c
--- /dev/null	2008-04-22 10:49:52.000000000 -0700
+++ oren-cr.git-dave/checkpoint/x86.c	2008-08-20 12:12:49.000000000 -0700
@@ -0,0 +1,270 @@
+#include <asm/ckpt.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+
+#include "ckpt.h"
+#include "ckpt_hdr.h"
+
+/* dump the thread_struct of a given task */
+int cr_write_thread(struct cr_ctx *ctx, struct task_struct *t)
+{
+	struct cr_hdr h;
+	struct cr_hdr_thread *hh = ctx->tbuf;
+	struct thread_struct *thread;
+	struct desc_struct *desc;
+	int ntls = 0;
+	int n, ret;
+
+	h.type = CR_HDR_THREAD;
+	h.len = sizeof(*hh);
+	h.id = ctx->pid;
+
+	thread = &t->thread;
+
+	/* calculate no. of TLS entries that follow */
+	desc = thread->tls_array;
+	for (n = GDT_ENTRY_TLS_ENTRIES; n > 0; n--, desc++) {
+		if (desc->a || desc->b)
+			ntls++;
+	}
+
+	hh->gdt_entry_tls_entries = GDT_ENTRY_TLS_ENTRIES;
+	hh->sizeof_tls_array = sizeof(thread->tls_array);
+	hh->ntls = ntls;
+
+	ret = cr_write_obj(ctx, &h, hh);
+	if (ret < 0)
+		return ret;
+
+	/* for simplicity dump the entire array, cherry-pick upon restart */
+	ret = cr_kwrite(ctx, thread->tls_array, sizeof(thread->tls_array));
+
+	pr_debug("ntls %d\n", ntls);
+
+	/* IGNORE RESTART BLOCKS FOR NOW ... */
+
+	return ret;
+}
+
+/* dump the cpu state and registers of a given task */
+int cr_write_cpu(struct cr_ctx *ctx, struct task_struct *t)
+{
+	struct cr_hdr h;
+	struct cr_hdr_cpu *hh = ctx->tbuf;
+	struct thread_struct *thread;
+	struct thread_info *thread_info;
+	struct pt_regs *regs;
+
+	h.type = CR_HDR_CPU;
+	h.len = sizeof(*hh);
+	h.id = ctx->pid;
+
+	thread = &t->thread;
+	thread_info = task_thread_info(t);
+	regs = task_pt_regs(t);
+
+	hh->bx = regs->bx;
+	hh->cx = regs->cx;
+	hh->dx = regs->dx;
+	hh->si = regs->si;
+	hh->di = regs->di;
+	hh->bp = regs->bp;
+	hh->ax = regs->ax;
+	hh->ds = regs->ds;
+	hh->es = regs->es;
+	hh->orig_ax = regs->orig_ax;
+	hh->ip = regs->ip;
+	hh->cs = regs->cs;
+	hh->flags = regs->flags;
+	hh->sp = regs->sp;
+	hh->ss = regs->ss;
+
+	/* for checkpoint in process context (from within a container)
+	   the GS and FS registers should be saved from the hardware;
+	   otherwise they are already sabed on the thread structure */
+	if (t == current) {
+		savesegment(gs, hh->gs);
+		savesegment(fs, hh->fs);
+	} else {
+		hh->gs = thread->gs;
+		hh->fs = thread->fs;
+	}
+
+	/*
+	 * for checkpoint in process context (from within a container),
+	 * the actual syscall is taking place at this very moment; so
+	 * we (optimistically) subtitute the future return value (0) of
+	 * this syscall into the orig_eax, so that upon restart it will
+	 * succeed (or it will endlessly retry checkpoint...)
+	 */
+	if (t == current) {
+		BUG_ON(hh->orig_ax < 0);
+		hh->ax = 0;
+	}
+
+	preempt_disable();
+
+	/* i387 + MMU + SSE logic */
+	hh->used_math = tsk_used_math(t) ? 1 : 0;
+	if (hh->used_math) {
+		/* normally, no need to unlazy_fpu(), since TS_USEDFPU flag
+		 * have been cleared when task was conexted-switched out...
+		 * except if we are in process context, in which case we do */
+		if (thread_info->status & TS_USEDFPU)
+			unlazy_fpu(current);
+
+		hh->has_fxsr = cpu_has_fxsr;
+		memcpy(&hh->xstate, &thread->xstate, sizeof(thread->xstate));
+	}
+
+	/* debug regs */
+
+	/*
+	 * for checkpoint in process context (from within a container),
+	 * get the actual registers; otherwise get the saved values.
+	 */
+	if (t == current) {
+		get_debugreg(hh->debugreg0, 0);
+		get_debugreg(hh->debugreg1, 1);
+		get_debugreg(hh->debugreg2, 2);
+		get_debugreg(hh->debugreg3, 3);
+		get_debugreg(hh->debugreg6, 6);
+		get_debugreg(hh->debugreg7, 7);
+	} else {
+		hh->debugreg0 = thread->debugreg0;
+		hh->debugreg1 = thread->debugreg1;
+		hh->debugreg2 = thread->debugreg2;
+		hh->debugreg3 = thread->debugreg3;
+		hh->debugreg6 = thread->debugreg6;
+		hh->debugreg7 = thread->debugreg7;
+	}
+
+	hh->uses_debug = !!(thread_info->flags & TIF_DEBUG);
+
+	preempt_enable();
+
+	pr_debug("math %d debug %d\n", hh->used_math, hh->uses_debug);
+
+	return cr_write_obj(ctx, &h, hh);
+}
+
+/* read the thread_struct into the current task */
+int cr_read_thread(struct cr_ctx *ctx)
+{
+	struct cr_hdr_thread *hh = cr_hbuf_get(ctx, sizeof(*hh));
+	struct task_struct *t = current;
+	struct thread_struct *thread = &t->thread;
+	int ret;
+
+	ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_THREAD);
+	if (ret < 0)
+		return ret;
+
+	pr_debug("ntls %d\n", hh->ntls);
+
+	if (hh->gdt_entry_tls_entries != GDT_ENTRY_TLS_ENTRIES ||
+	    hh->sizeof_tls_array != sizeof(thread->tls_array) ||
+	    hh->ntls < 0 || hh->ntls > GDT_ENTRY_TLS_ENTRIES)
+		return -EINVAL;
+
+	if (hh->ntls > 0) {
+
+		/* restore TLS by hand: why convert to struct user_desc if
+		 * sys_set_thread_entry() will convert it back ? */
+
+		struct desc_struct *buf = ctx->tbuf;
+		int size = sizeof(*buf) * GDT_ENTRY_TLS_ENTRIES;
+		int cpu;
+
+		BUG_ON(size > CR_TBUF_TOTAL);
+
+		ret = cr_kread(ctx, buf, size);
+		if (ret < 0)
+			return ret;
+
+		/* FIX: add sanity checks (eg. that values makes sense, that
+		 * that we don't overwrite old values, etc */
+
+		cpu = get_cpu();
+		memcpy(thread->tls_array, buf, size);
+		load_TLS(thread, cpu);
+		put_cpu();
+	}
+
+	return 0;
+}
+
+/* read the cpu state nad registers for the current task */
+int cr_read_cpu(struct cr_ctx *ctx)
+{
+	struct cr_hdr_cpu *hh = cr_hbuf_get(ctx, sizeof(*hh));
+	struct task_struct *t = current;
+	struct thread_struct *thread;
+	struct thread_info *thread_info;
+	struct pt_regs *regs;
+	int ret;
+
+	ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_CPU);
+	if (ret < 0)
+		return ret;
+
+	/* FIX: sanity check for sensitive registers (eg. eflags) */
+
+	thread = &t->thread;
+	thread_info = task_thread_info(t);
+	regs = task_pt_regs(t);
+
+	regs->bx = hh->bx;
+	regs->cx = hh->cx;
+	regs->dx = hh->dx;
+	regs->si = hh->si;
+	regs->di = hh->di;
+	regs->bp = hh->bp;
+	regs->ax = hh->ax;
+	regs->ds = hh->ds;
+	regs->es = hh->es;
+	regs->orig_ax = hh->orig_ax;
+	regs->ip = hh->ip;
+	regs->cs = hh->cs;
+	regs->flags = hh->flags;
+	regs->sp = hh->sp;
+	regs->ss = hh->ss;
+
+	thread->gs = hh->gs;
+	thread->fs = hh->fs;
+	loadsegment(gs, hh->gs);
+	loadsegment(fs, hh->fs);
+
+	pr_debug("math %d debug %d\n", hh->used_math, hh->uses_debug);
+
+	/* FIX: this should work ... (someone double check !) */
+
+	preempt_disable();
+
+	/* i387 + MMU + SSE */
+	__clear_fpu(t);		/* in case we used FPU in user mode */
+	if (!hh->used_math)
+		clear_used_math();
+	else {
+		if (hh->has_fxsr != cpu_has_fxsr) {
+			force_sig(SIGFPE, t);
+			return -EINVAL;
+		}
+		memcpy(&thread->xstate, &hh->xstate, sizeof(thread->xstate));
+		set_used_math();
+	}
+
+	/* debug regs */
+	if (hh->uses_debug) {
+		set_debugreg(hh->debugreg0, 0);
+		set_debugreg(hh->debugreg1, 1);
+		set_debugreg(hh->debugreg2, 2);
+		set_debugreg(hh->debugreg3, 3);
+		set_debugreg(hh->debugreg6, 6);
+		set_debugreg(hh->debugreg7, 7);
+	}
+
+	preempt_enable();
+
+	return 0;
+}
diff -puN /dev/null include/asm-x86/ckpt.h
--- /dev/null	2008-04-22 10:49:52.000000000 -0700
+++ oren-cr.git-dave/include/asm-x86/ckpt.h	2008-08-20 12:12:49.000000000 -0700
@@ -0,0 +1,46 @@
+#ifndef __ASM_X86_CKPT_H
+#define __ASM_X86_CKPT_H
+
+#include <asm/processor.h>
+
+struct cr_hdr_thread {
+	/* NEED: restart blocks */
+	__s16 gdt_entry_tls_entries;
+	__s16 sizeof_tls_array;
+	__s16 ntls;	/* number of TLS entries to follow */
+};
+
+struct cr_hdr_cpu {
+	__u64 bx;
+	__u64 cx;
+	__u64 dx;
+	__u64 si;
+	__u64 di;
+	__u64 bp;
+	__u64 ax;
+	__u64 ds;
+	__u64 es;
+	__u64 orig_ax;
+	__u64 ip;
+	__u64 cs;
+	__u64 flags;
+	__u64 sp;
+	__u64 ss;
+	__u64 fs;
+	__u64 gs;
+
+	__u64 debugreg0;
+	__u64 debugreg1;
+	__u64 debugreg2;
+	__u64 debugreg3;
+	__u64 debugreg6;
+	__u64 debugreg7;
+
+	__u8 uses_debug;
+
+	__u8 used_math;
+	__u8 has_fxsr;
+	union thread_xstate xstate;	/* i387 */
+};
+
+#endif /* __ASM_X86_CKPT_H */
_

  parent reply	other threads:[~2008-08-20 19:26 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-20 19:25 [RFC v2][PATCH 0/9] kernel-based checkpoint-restart Dave Hansen
2008-08-20 19:25 ` [RFC v2][PATCH 1/9] checkpoint-restart: general infrastructure Dave Hansen
2008-08-20 19:25   ` Dave Hansen
2008-08-29  3:34   ` Matt Helsley
2008-08-29  3:34   ` Matt Helsley
2008-08-20 19:25 ` [RFC v2][PATCH 2/9] Remove CAP_SYS_ADMIN for checkpoint/restart Dave Hansen
2008-08-20 19:25 ` Dave Hansen
2008-08-20 19:26 ` Dave Hansen [this message]
2008-08-20 19:26 ` [RFC v2][PATCH 3/9] checkpoint/restart: x86 support Dave Hansen
2008-08-20 19:26 ` [RFC v2][PATCH 4/9] checkpoint/restart: memory management Dave Hansen
2008-08-20 19:26 ` Dave Hansen
2008-08-20 19:26 ` [RFC v2][PATCH 5/9] Create trivial sys_checkpoint and sys_restore system calls Dave Hansen
2008-08-20 19:26   ` Dave Hansen
2008-08-20 19:26 ` [RFC v2][PATCH 6/9] Simplify filename handling for now Dave Hansen
2008-08-20 19:26 ` Dave Hansen
2008-08-20 19:26 ` [RFC v2][PATCH 7/9] remove temporary buffer structures Dave Hansen
2008-08-20 19:26   ` Dave Hansen
2008-08-20 19:26 ` [RFC v2][PATCH 8/9] Remove some BUG_ON()s that need some proper error handling instead Dave Hansen
2008-08-20 20:29   ` Dave Hansen
2008-08-20 20:29   ` Dave Hansen
2008-08-29  4:18   ` Matt Helsley
2008-08-29  4:18   ` Matt Helsley
2008-08-20 19:26 ` Dave Hansen
2008-08-20 19:26 ` [RFC v2][PATCH 9/9] remove ->cksum field Dave Hansen
2008-08-20 19:26 ` Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080820192600.B0F4DD62@nimitz \
    --to=dave@linux.vnet.ibm.com \
    --cc=arnd@arndb.de \
    --cc=containers@lists.linux-foundation.org \
    --cc=jeremy@goop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=orenl@cs.columbia.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.