All of lore.kernel.org
 help / color / mirror / Atom feed
* External checkpoint patch
@ 2008-10-21 18:16 Dave Hansen
  2008-10-21 22:49 ` Oren Laadan
  2008-10-21 23:07 ` Oren Laadan
  0 siblings, 2 replies; 5+ messages in thread
From: Dave Hansen @ 2008-10-21 18:16 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers, Cedric Le Goater

Oren,

Could you take a look over Cedric's external checkpoint patch?

http://git.kernel.org/?p=linux/kernel/git/daveh/linux-2.6-cr.git;a=commit;h=28ffabbc17d3641eee2a7eb66f714c266c400263

It seems pretty small to me.

--

From a2f88cbc023e2e9be5eb554fe64078a3d7d2ade6 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
Date: Tue, 30 Sep 2008 10:45:13 +0200
Subject: [PATCH] enable external checkpoint

Modify self checkpoint syscall to allow another process to initiate
checkpoint

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/mm/checkpoint.c   |    2 +-
 checkpoint/checkpoint.c    |    2 +-
 checkpoint/sys.c           |   28 +++++++++++++++++++++-------
 include/linux/checkpoint.h |    1 +
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/checkpoint.c b/arch/x86/mm/checkpoint.c
index d6c5263..202a579 100644
--- a/arch/x86/mm/checkpoint.c
+++ b/arch/x86/mm/checkpoint.c
@@ -164,7 +164,7 @@ void cr_write_cpu_fpu(struct cr_hdr_cpu *hh, struct task_struct *t)
 		 * except if we are in process context, in which case we do
 		 */
 		if (thread_info->status & TS_USEDFPU)
-			unlazy_fpu(current);
+			unlazy_fpu(t);
 
 		hh->has_fxsr = cpu_has_fxsr;
 		memcpy(&hh->xstate, &thread->xstate, sizeof(thread->xstate));
diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index 87420dc..c4e8242 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -227,7 +227,7 @@ int do_checkpoint(struct cr_ctx *ctx)
 	ret = cr_write_head(ctx);
 	if (ret < 0)
 		goto out;
-	ret = cr_write_task(ctx, current);
+	ret = cr_write_task(ctx, ctx->task);
 	if (ret < 0)
 		goto out;
 	ret = cr_write_tail(ctx);
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 4fcd3e0..ec028c6 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -176,7 +176,8 @@ void cr_ctx_free(struct cr_ctx *ctx)
 	kfree(ctx);
 }
 
-struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
+struct cr_ctx *cr_ctx_alloc(struct task_struct *t, pid_t pid, int fd,
+			    unsigned long flags)
 {
 	struct cr_ctx *ctx;
 
@@ -184,6 +185,7 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
+	ctx->task = t;
 	ctx->file = fget(fd);
 	if (!ctx->file) {
 		cr_ctx_free(ctx);
@@ -205,7 +207,7 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
 	 * assume checkpointer is in container's root vfs
 	 * FIXME: this works for now, but will change with real containers
 	 */
-	ctx->vfsroot = &current->fs->root;
+	ctx->vfsroot = &t->fs->root;
 	path_get(ctx->vfsroot);
 
 	INIT_LIST_HEAD(&ctx->pgarr_list);
@@ -231,20 +233,32 @@ asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned long flags)
 {
 	struct cr_ctx *ctx;
 	int ret;
+	struct task_struct *t;
 
 	/* no flags for now */
 	if (flags)
 		return -EINVAL;
 
-	ctx = cr_ctx_alloc(pid, fd, flags | CR_CTX_CKPT);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
+	read_lock(&tasklist_lock);
+	t = find_task_by_vpid(pid);
+	if (t)
+		get_task_struct(t);
+	read_unlock(&tasklist_lock);
+
+	if (!t)
+		return -ESRCH;
 
+	ctx = cr_ctx_alloc(t, pid, fd, flags | CR_CTX_CKPT);
+	if (IS_ERR(ctx)) {
+		ret = PTR_ERR(ctx);
+		goto out;
+	}
+
 	ret = do_checkpoint(ctx);
 
 	if (!ret)
 		ret = ctx->crid;
-
+out:
 	cr_ctx_free(ctx);
 	return ret;
 }
@@ -267,7 +281,7 @@ asmlinkage long sys_restart(int crid, int fd, unsigned long flags)
 	if (flags)
 		return -EINVAL;
 
-	ctx = cr_ctx_alloc(crid, fd, flags | CR_CTX_RSTR);
+	ctx = cr_ctx_alloc(current, crid, fd, flags | CR_CTX_RSTR);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 6c1e87f..2983700 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -17,6 +17,7 @@
 
 struct cr_ctx {
 	pid_t pid;		/* container identifier */
+	struct task_struct *task;
 	int crid;		/* unique checkpoint id */
 
 	unsigned long flags;
-- 
1.5.5.1



-- Dave

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: External checkpoint patch
  2008-10-21 18:16 External checkpoint patch Dave Hansen
@ 2008-10-21 22:49 ` Oren Laadan
  2008-10-21 23:07 ` Oren Laadan
  1 sibling, 0 replies; 5+ messages in thread
From: Oren Laadan @ 2008-10-21 22:49 UTC (permalink / raw)
  To: Dave Hansen; +Cc: containers, Cedric Le Goater


Dave Hansen wrote:
> Oren,
> 
> Could you take a look over Cedric's external checkpoint patch?
> 
> http://git.kernel.org/?p=linux/kernel/git/daveh/linux-2.6-cr.git;a=commit;h=28ffabbc17d3641eee2a7eb66f714c266c400263
> 
> It seems pretty small to me.
> 
> --
> 
>>From a2f88cbc023e2e9be5eb554fe64078a3d7d2ade6 Mon Sep 17 00:00:00 2001
> From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
> Date: Tue, 30 Sep 2008 10:45:13 +0200
> Subject: [PATCH] enable external checkpoint
> 
> Modify self checkpoint syscall to allow another process to initiate
> checkpoint
> 
> Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
> ---
>  arch/x86/mm/checkpoint.c   |    2 +-
>  checkpoint/checkpoint.c    |    2 +-
>  checkpoint/sys.c           |   28 +++++++++++++++++++++-------
>  include/linux/checkpoint.h |    1 +
>  4 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/mm/checkpoint.c b/arch/x86/mm/checkpoint.c
> index d6c5263..202a579 100644
> --- a/arch/x86/mm/checkpoint.c
> +++ b/arch/x86/mm/checkpoint.c
> @@ -164,7 +164,7 @@ void cr_write_cpu_fpu(struct cr_hdr_cpu *hh, struct task_struct *t)
>  		 * except if we are in process context, in which case we do
>  		 */
>  		if (thread_info->status & TS_USEDFPU)
> -			unlazy_fpu(current);
> +			unlazy_fpu(t);

This is not needed (only needed in self checkpoint).

>  
>  		hh->has_fxsr = cpu_has_fxsr;
>  		memcpy(&hh->xstate, &thread->xstate, sizeof(thread->xstate));
> diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
> index 87420dc..c4e8242 100644
> --- a/checkpoint/checkpoint.c
> +++ b/checkpoint/checkpoint.c
> @@ -227,7 +227,7 @@ int do_checkpoint(struct cr_ctx *ctx)
>  	ret = cr_write_head(ctx);
>  	if (ret < 0)
>  		goto out;
> -	ret = cr_write_task(ctx, current);
> +	ret = cr_write_task(ctx, ctx->task);
>  	if (ret < 0)
>  		goto out;
>  	ret = cr_write_tail(ctx);
> diff --git a/checkpoint/sys.c b/checkpoint/sys.c
> index 4fcd3e0..ec028c6 100644
> --- a/checkpoint/sys.c
> +++ b/checkpoint/sys.c
> @@ -176,7 +176,8 @@ void cr_ctx_free(struct cr_ctx *ctx)
>  	kfree(ctx);
>  }
>  
> -struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
> +struct cr_ctx *cr_ctx_alloc(struct task_struct *t, pid_t pid, int fd,
> +			    unsigned long flags)
>  {
>  	struct cr_ctx *ctx;
>  
> @@ -184,6 +185,7 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
>  	if (!ctx)
>  		return ERR_PTR(-ENOMEM);
>  
> +	ctx->task = t;
>  	ctx->file = fget(fd);
>  	if (!ctx->file) {
>  		cr_ctx_free(ctx);
> @@ -205,7 +207,7 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
>  	 * assume checkpointer is in container's root vfs
>  	 * FIXME: this works for now, but will change with real containers
>  	 */
> -	ctx->vfsroot = &current->fs->root;
> +	ctx->vfsroot = &t->fs->root;
>  	path_get(ctx->vfsroot);
>  
>  	INIT_LIST_HEAD(&ctx->pgarr_list);
> @@ -231,20 +233,32 @@ asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned long flags)
>  {
>  	struct cr_ctx *ctx;
>  	int ret;
> +	struct task_struct *t;
>  
>  	/* no flags for now */
>  	if (flags)
>  		return -EINVAL;
>  
> -	ctx = cr_ctx_alloc(pid, fd, flags | CR_CTX_CKPT);
> -	if (IS_ERR(ctx))
> -		return PTR_ERR(ctx);
> +	read_lock(&tasklist_lock);
> +	t = find_task_by_vpid(pid);
> +	if (t)
> +		get_task_struct(t);
> +	read_unlock(&tasklist_lock);

missing put_task_struct() ?
missing security check ?

> +
> +	if (!t)
> +		return -ESRCH;
>  
> +	ctx = cr_ctx_alloc(t, pid, fd, flags | CR_CTX_CKPT);
> +	if (IS_ERR(ctx)) {
> +		ret = PTR_ERR(ctx);
> +		goto out;
> +	}
> +
>  	ret = do_checkpoint(ctx);
>  
>  	if (!ret)
>  		ret = ctx->crid;
> -
> +out:
>  	cr_ctx_free(ctx);
>  	return ret;
>  }
> @@ -267,7 +281,7 @@ asmlinkage long sys_restart(int crid, int fd, unsigned long flags)
>  	if (flags)
>  		return -EINVAL;
>  
> -	ctx = cr_ctx_alloc(crid, fd, flags | CR_CTX_RSTR);
> +	ctx = cr_ctx_alloc(current, crid, fd, flags | CR_CTX_RSTR);
>  	if (IS_ERR(ctx))
>  		return PTR_ERR(ctx);
>  
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 6c1e87f..2983700 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -17,6 +17,7 @@
>  
>  struct cr_ctx {
>  	pid_t pid;		/* container identifier */
> +	struct task_struct *task;
>  	int crid;		/* unique checkpoint id */
>  
>  	unsigned long flags;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: External checkpoint patch
  2008-10-21 18:16 External checkpoint patch Dave Hansen
  2008-10-21 22:49 ` Oren Laadan
@ 2008-10-21 23:07 ` Oren Laadan
       [not found]   ` <Pine.LNX.4.64.0810211857140.22085-CXF6herHY6ykSYb+qCZC/1i27PF6R63G9nwVQlTi/Pw@public.gmane.org>
  1 sibling, 1 reply; 5+ messages in thread
From: Oren Laadan @ 2008-10-21 23:07 UTC (permalink / raw)
  To: Dave Hansen; +Cc: containers, Cedric Le Goater


> Oren,
> 
> Could you take a look over Cedric's external checkpoint patch?
> 
> http://git.kernel.org/?p=linux/kernel/git/daveh/linux-2.6-cr.git;a=commit;h=28ffabbc17d3641eee2a7eb66f714c266c400263
> 
> It seems pretty small to me.
> 
> --

I committed a couple of patches on top of ckpt-v7 that do this.
(the first one is actually a simple cleanup that is unrelated).

see:
http://git.ncl.cs.columbia.edu/?p=linux-cr-dev.git;a=shortlog;h=refs/heads/ckpt-v7
(or git://git.ncl.cs.columbia.edu/pub/git/linux-cr-dev.git ckpt-v7)

*  "Minor simplification of cr_ctx_alloc(), cr_ctx_free()"
*  "External checkpoint of a task other than ourself"

To checkpoint another task, you need to freeze that other task, or to
SIGSTOP it (if using a kernel without freezer cgroup).

Restart remains the same except the the original PID is not preserved
(because we haven't solved yet the fork-with-specific-pid issue).

In reality, you would create a new names space, and have the task running
in there call 'sys_restart()'.

Below are test1.c, ckpt.c (to checkpoint), and rstr.c (to restart), as
well as the two patches.

I tested it this way:
	$ ./test.1 &
	[1] 3493

	$ kill -STOP 3493
	$ ./ckpt 3493 > ckpt.image
	
	$ mv /tmp/cr-test.out /tmp/cr-test.out.orig
	$ cp /tmp/cr-test.out.orig /tmp/cr-test.out

	$ kill -CONT 3493

	$ ./rstr < ckpt.image
now compare the output of the two output files

Oren.

------------------------------------------------------------------------

#define _GNU_SOURCE        /* or _BSD_SOURCE or _SVID_SOURCE */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <math.h>
#include <asm/unistd.h>

#define OUTFILE  "/tmp/cr-test.out"

int main(int argc, char *argv[])
{
	FILE *file;
	float a;
	int i;

	close(0);
	close(1);
	close(2);

	unlink(OUTFILE);
	file = fopen(OUTFILE, "w+");
	if (!file) {
		perror("open");
		exit(1);
	}
	if (dup2(0,2) < 0) {
		perror("dup2");
		exit(1);
	}

	a = sqrt(2.53 * (getpid() / 1.21));

	fprintf(file, "hello, world (%.2f)!\n", a);
	fflush(file);

	for (i = 0; i < 1000; i++) {
		sleep(1);
		/* make the fpu work ->  a = a + i/10  */
		a = sqrt(a*a + 2*a*(i/10.0) + i*i/100.0);
		fprintf(file, "count %d (%.2f)!\n", i, a);
		fflush(file);
	}
		
	fprintf(file, "world, hello (%.2f) !\n", a);
	fflush(file);

	return 0;
}

------------------------------------------------------------------------

#define _GNU_SOURCE        /* or _BSD_SOURCE or _SVID_SOURCE */

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <asm/unistd.h>
#include <sys/syscall.h>

int main(int argc, char *argv[])
{
	pid_t pid;
	int ret;

	if (argc != 2) {
		printf("usage: ckpt PID\n");
		exit(1);
	}

	pid = atoi(argv[1]);
	if (pid <= 0) {
		printf("invalid pid\n");
		exit(1);
	}

	ret = syscall(__NR_checkpoint, pid, STDOUT_FILENO, 0);

	if (ret < 0)
		perror("checkpoint");
	else
		printf("checkpoint id %d\n", ret);

	return (ret > 0 ? 0 : 1);
}

------------------------------------------------------------------------

#define _GNU_SOURCE        /* or _BSD_SOURCE or _SVID_SOURCE */

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <asm/unistd.h>
#include <sys/syscall.h>

int main(int argc, char *argv[])
{
	pid_t pid = getpid();
	int ret;

	ret = syscall(__NR_restart, pid, STDIN_FILENO, 0);
	if (ret < 0)
		perror("restart");

	printf("should not reach here !\n");

	return 0;
}

------------------------------------------------------------------------

From 9e76d100bd81afdebda049797d3109102761fb4e Mon Sep 17 00:00:00 2001
From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
Date: Tue, 21 Oct 2008 16:04:36 -0400
Subject: [PATCH 1/2] Minor simplification of cr_ctx_alloc(), cr_ctx_free()

Decalre 'static' cr_ctx_alloc(), cr_ctx_free()
Common error path in cr_ctx_alloc()
Add comment for preempt_disable()/enable() in cr_write_cpu_fpu()
---
 arch/x86/mm/checkpoint.c |   10 ++++++----
 checkpoint/sys.c         |   35 ++++++++++++++++++-----------------
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/x86/mm/checkpoint.c b/arch/x86/mm/checkpoint.c
index 554dbff..9c2d949 100644
--- a/arch/x86/mm/checkpoint.c
+++ b/arch/x86/mm/checkpoint.c
@@ -154,7 +154,7 @@ void cr_write_cpu_fpu(struct cr_hdr_cpu *hh, struct task_struct *t)
 
 	/* i387 + MMU + SSE logic */
 
-	preempt_disable();
+	preempt_disable();	/* needed if t == current */
 
 	hh->used_math = tsk_used_math(t) ? 1 : 0;
 	if (hh->used_math) {
@@ -163,14 +163,16 @@ void cr_write_cpu_fpu(struct cr_hdr_cpu *hh, struct task_struct *t)
 		 * have been cleared when task was conexted-switched out...
 		 * except if we are in process context, in which case we do
 		 */
-		if (thread_info->status & TS_USEDFPU)
-			unlazy_fpu(current);
+		if (t == current) {
+			if (thread_info->status & TS_USEDFPU)
+				   unlazy_fpu(current);
+		}
 
 		hh->has_fxsr = cpu_has_fxsr;
 		memcpy(&hh->xstate, thread->xstate, sizeof(*thread->xstate));
 	}
 
-	preempt_enable();
+	preempt_enable();	/* needed if t == current */
 }
 
 #endif	/* CONFIG_X86_64 */
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index c1f2c8f..aa519ab 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -156,7 +156,7 @@ void cr_hbuf_put(struct cr_ctx *ctx, int n)
 /* unique checkpoint identifier (FIXME: should be per-container) */
 static atomic_t cr_ctx_count;
 
-void cr_ctx_free(struct cr_ctx *ctx)
+static void cr_ctx_free(struct cr_ctx *ctx)
 {
 	if (ctx->file)
 		fput(ctx->file);
@@ -172,30 +172,30 @@ void cr_ctx_free(struct cr_ctx *ctx)
 	kfree(ctx);
 }
 
-struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
+static struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
 {
 	struct cr_ctx *ctx;
+	int err;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
+	ctx->flags = flags;
+	ctx->pid = pid;
+
+	err = -EBADF;
 	ctx->file = fget(fd);
-	if (!ctx->file) {
-		cr_ctx_free(ctx);
-		return ERR_PTR(-EBADF);
-	}
+	if (!ctx->file)
+		goto err;
 
+	err = -ENOMEM;
 	ctx->hbuf = kmalloc(CR_HBUF_TOTAL, GFP_KERNEL);
-	if (!ctx->hbuf) {
-		cr_ctx_free(ctx);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!ctx->hbuf)
+		goto err;
 
-	if (cr_objhash_alloc(ctx) < 0) {
-		cr_ctx_free(ctx);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (cr_objhash_alloc(ctx) < 0)
+		goto err;
 
 	/*
 	 * assume checkpointer is in container's root vfs
@@ -206,12 +206,13 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
 
 	INIT_LIST_HEAD(&ctx->pgarr_list);
 
-	ctx->pid = pid;
-	ctx->flags = flags;
-
 	ctx->crid = atomic_inc_return(&cr_ctx_count);
 
 	return ctx;
+
+ err:
+	cr_ctx_free(ctx);
+	return ERR_PTR(err);
 }
 
 /**
-- 
1.5.4.3

------------------------------------------------------------------------

From fedd1a8b05f40712d21c01e22437a2a3cdbae2e9 Mon Sep 17 00:00:00 2001
From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
Date: Tue, 21 Oct 2008 16:26:10 -0400
Subject: [PATCH 2/2] External checkpoint of a task other than ourself

sys_checkpoint() now looks up the target pid (in our namespace) and
checkpoints that corresponding task. That task should be the root of
a container.

sys_restart() remains the same, as the restart is always done in the
context of the restarting task.
---
 checkpoint/checkpoint.c    |    4 ++-
 checkpoint/sys.c           |   65 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/checkpoint.h |    5 +++-
 3 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index 87420dc..5ee6cbf 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -190,6 +190,8 @@ static int cr_write_task(struct cr_ctx *ctx, struct task_struct *t)
 {
 	int ret ;
 
+	/* TODO: verity that the task is frozen (unless self) */
+
 	if (t->state == TASK_DEAD) {
 		pr_warning("CR: task may not be in state TASK_DEAD\n");
 		return -EAGAIN;
@@ -227,7 +229,7 @@ int do_checkpoint(struct cr_ctx *ctx)
 	ret = cr_write_head(ctx);
 	if (ret < 0)
 		goto out;
-	ret = cr_write_task(ctx, current);
+	ret = cr_write_task(ctx, ctx->root_task);
 	if (ret < 0)
 		goto out;
 	ret = cr_write_tail(ctx);
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index aa519ab..ce98295 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -9,6 +9,8 @@
  */
 
 #include <linux/sched.h>
+#include <linux/nsproxy.h>
+#include <linux/ptrace.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/file.h>
@@ -153,6 +155,60 @@ void cr_hbuf_put(struct cr_ctx *ctx, int n)
  * restart operation, and persists until the operation is completed.
  */
 
+static void cr_ctx_put_container(struct cr_ctx *ctx)
+{
+	if (ctx->root_nsproxy)
+		put_nsproxy(ctx->root_nsproxy);
+	if (ctx->root_task)
+		put_task_struct(ctx->root_task);
+	ctx->root_pid = 0;
+}
+
+static int cr_ctx_get_container(pid_t pid, struct cr_ctx *ctx)
+{
+	struct task_struct *task = NULL;
+	struct nsproxy *nsproxy = NULL;
+	int err = -ESRCH;
+
+	read_lock(&tasklist_lock);
+	task = find_task_by_vpid(pid);
+	if (task)
+		get_task_struct(task);
+	read_unlock(&tasklist_lock);
+
+	if (!task)
+		goto out;
+
+	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
+		err = -EPERM;
+		goto out;
+	}
+	  
+	rcu_read_lock();
+	if (task_nsproxy(task)) {
+		nsproxy = task_nsproxy(task);
+		get_nsproxy(nsproxy);
+	}
+	rcu_read_unlock();
+
+	if (!nsproxy)
+		goto out;
+
+	/* TODO: verify that the task is the container's root */
+	/* TODO: verify that the container is frozen */
+
+	ctx->root_pid = pid;
+	ctx->root_task = task;
+	ctx->root_nsproxy = nsproxy;
+
+	return 0;
+
+ out:
+	if (task)
+		put_task_struct(task);
+	return err;
+}
+
 /* unique checkpoint identifier (FIXME: should be per-container) */
 static atomic_t cr_ctx_count;
 
@@ -169,6 +225,8 @@ static void cr_ctx_free(struct cr_ctx *ctx)
 	cr_pgarr_free(ctx);
 	cr_objhash_free(ctx);
 
+	cr_ctx_put_container(ctx);
+
 	kfree(ctx);
 }
 
@@ -182,13 +240,16 @@ static struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
 		return ERR_PTR(-ENOMEM);
 
 	ctx->flags = flags;
-	ctx->pid = pid;
 
 	err = -EBADF;
 	ctx->file = fget(fd);
 	if (!ctx->file)
 		goto err;
 
+	err = cr_ctx_get_container(pid, ctx);
+	if (err < 0)
+		goto err;
+
 	err = -ENOMEM;
 	ctx->hbuf = kmalloc(CR_HBUF_TOTAL, GFP_KERNEL);
 	if (!ctx->hbuf)
@@ -201,7 +262,7 @@ static struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
 	 * assume checkpointer is in container's root vfs
 	 * FIXME: this works for now, but will change with real containers
 	 */
-	ctx->vfsroot = &current->fs->root;
+	ctx->vfsroot = &ctx->root_task->fs->root;
 	path_get(ctx->vfsroot);
 
 	INIT_LIST_HEAD(&ctx->pgarr_list);
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index b102346..b53663b 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -16,9 +16,12 @@
 #define CR_VERSION  2
 
 struct cr_ctx {
-	pid_t pid;		/* container identifier */
 	int crid;		/* unique checkpoint id */
 
+	pid_t root_pid;		/* container identifier */
+	struct task_struct *root_task;	/* container root task */
+	struct nsproxy *root_nsproxy;	/* container root nsproxy */
+
 	unsigned long flags;
 	unsigned long oflags;	/* restart: old flags */
 
-- 
1.5.4.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: External checkpoint patch
       [not found]   ` <Pine.LNX.4.64.0810211857140.22085-CXF6herHY6ykSYb+qCZC/1i27PF6R63G9nwVQlTi/Pw@public.gmane.org>
@ 2008-10-23  7:51     ` Cedric Le Goater
  2008-10-23 13:48     ` Cedric Le Goater
  1 sibling, 0 replies; 5+ messages in thread
From: Cedric Le Goater @ 2008-10-23  7:51 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers, Dave Hansen

minor issue,


> -struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
> +static struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
>  {
>  	struct cr_ctx *ctx;
> +	int err;
> 
>  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>  	if (!ctx)
>  		return ERR_PTR(-ENOMEM);

with the err goto, you're calling cr_ctx_free() in a few places where
ctx is partially initialized. you should at least do :

	INIT_LIST_HEAD(&ctx->pgarr_list);

right after the kzalloc() and probably check the value of ctx->hbuf. 
There might be other issues.

> +	ctx->flags = flags;
> +	ctx->pid = pid;
> +
> +	err = -EBADF;
>  	ctx->file = fget(fd);
> -	if (!ctx->file) {
> -		cr_ctx_free(ctx);
> -		return ERR_PTR(-EBADF);
> -	}
> +	if (!ctx->file)
> +		goto err;
>
> 
> +	err = -ENOMEM;
>  	ctx->hbuf = kmalloc(CR_HBUF_TOTAL, GFP_KERNEL);
> -	if (!ctx->hbuf) {
> -		cr_ctx_free(ctx);
> -		return ERR_PTR(-ENOMEM);
> -	}
> +	if (!ctx->hbuf)
> +		goto err;
> 
> -	if (cr_objhash_alloc(ctx) < 0) {
> -		cr_ctx_free(ctx);
> -		return ERR_PTR(-ENOMEM);
> -	}
> +	if (cr_objhash_alloc(ctx) < 0)
> +		goto err;
> 
>  	/*
>  	 * assume checkpointer is in container's root vfs
> @@ -206,12 +206,13 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
> 
>  	INIT_LIST_HEAD(&ctx->pgarr_list);
> 
> -	ctx->pid = pid;
> -	ctx->flags = flags;
> -
>  	ctx->crid = atomic_inc_return(&cr_ctx_count);
> 
>  	return ctx;
> +
> + err:
> +	cr_ctx_free(ctx);
> +	return ERR_PTR(err);
>  }
> 
>  /**

here's a patch for it.

C.

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/sys.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Index: 2.6.27-lxc/checkpoint/sys.c
===================================================================
--- 2.6.27-lxc.orig/checkpoint/sys.c
+++ 2.6.27-lxc/checkpoint/sys.c
@@ -217,7 +217,8 @@ static void cr_ctx_free(struct cr_ctx *c
 	if (ctx->file)
 		fput(ctx->file);
 
-	kfree(ctx->hbuf);
+	if (ctx->hbuf)
+		kfree(ctx->hbuf);
 
 	if (ctx->vfsroot)
 		path_put(ctx->vfsroot);
@@ -239,6 +240,7 @@ static struct cr_ctx *cr_ctx_alloc(pid_t
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
+	INIT_LIST_HEAD(&ctx->pgarr_list);
 	ctx->flags = flags;
 
 	err = -EBADF;
@@ -265,8 +267,6 @@ static struct cr_ctx *cr_ctx_alloc(pid_t
 	ctx->vfsroot = &ctx->root_task->fs->root;
 	path_get(ctx->vfsroot);
 
-	INIT_LIST_HEAD(&ctx->pgarr_list);
-
 	ctx->crid = atomic_inc_return(&cr_ctx_count);
 
 	return ctx;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: External checkpoint patch
       [not found]   ` <Pine.LNX.4.64.0810211857140.22085-CXF6herHY6ykSYb+qCZC/1i27PF6R63G9nwVQlTi/Pw@public.gmane.org>
  2008-10-23  7:51     ` Cedric Le Goater
@ 2008-10-23 13:48     ` Cedric Le Goater
  1 sibling, 0 replies; 5+ messages in thread
From: Cedric Le Goater @ 2008-10-23 13:48 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers, Daniel Lezcano, Dave Hansen

[-- Attachment #1: Type: text/plain, Size: 2314 bytes --]

Oren Laadan wrote:
>> Oren,
>>
>> Could you take a look over Cedric's external checkpoint patch?
>>
>> http://git.kernel.org/?p=linux/kernel/git/daveh/linux-2.6-cr.git;a=commit;h=28ffabbc17d3641eee2a7eb66f714c266c400263
>>
>> It seems pretty small to me.
>>
>> --
> 
> I committed a couple of patches on top of ckpt-v7 that do this.
> (the first one is actually a simple cleanup that is unrelated).
> 
> see:
> http://git.ncl.cs.columbia.edu/?p=linux-cr-dev.git;a=shortlog;h=refs/heads/ckpt-v7
> (or git://git.ncl.cs.columbia.edu/pub/git/linux-cr-dev.git ckpt-v7)
> 
> *  "Minor simplification of cr_ctx_alloc(), cr_ctx_free()"
> *  "External checkpoint of a task other than ourself"
> 
> To checkpoint another task, you need to freeze that other task, or to
> SIGSTOP it (if using a kernel without freezer cgroup).
> 
> Restart remains the same except the the original PID is not preserved
> (because we haven't solved yet the fork-with-specific-pid issue).
> 
> In reality, you would create a new names space, and have the task running
> in there call 'sys_restart()'.
> 
> Below are test1.c, ckpt.c (to checkpoint), and rstr.c (to restart), as
> well as the two patches.
> 
> I tested it this way:
> 	$ ./test.1 &
> 	[1] 3493
> 
> 	$ kill -STOP 3493
> 	$ ./ckpt 3493 > ckpt.image
> 	
> 	$ mv /tmp/cr-test.out /tmp/cr-test.out.orig
> 	$ cp /tmp/cr-test.out.orig /tmp/cr-test.out
> 
> 	$ kill -CONT 3493
> 
> 	$ ./rstr < ckpt.image
> now compare the output of the two output files


Oren, 

I've merged your v7 patches on top of the 2.6.27-lxc3 patchset : 

	http://lxc.sourceforge.net/patches/2.6.27/2.6.27-lxc3/

the -lxc patchset includes the just merged freezer and sysfs patches
required for the netns. 

the lxc user tools using the patchset are here (CVS head for the 
moment) : 

	 http://sourceforge.net/projects/lxc/

simple applications calculating decimals of PI (source attached) are 
successfully checkpointed and restarted with the lxc-checkpoint and 
lxc-restart tools. something like the following :

	$ lxc-create -n foo 
	$ lxc-start -n foo ./pi1 40000 &
	$ lxc-checkpoint -n foo > /tmp/statefile
	$ lxc-restart -n foo < /tmp/statefile

stdin should be closed of course and all output redirected to a regular 
file.

This is work in progress of course but it's a major milestone !

Thanks,

C.

[-- Attachment #2: pi1.c --]
[-- Type: text/x-csrc, Size: 4462 bytes --]

/****************************************/
/* Compute pi to arbitrary precision    */
/* Author Roy Williams February 1994    */
/*   roy-NQQVC5OOd4osTUYHLfqkUA@public.gmane.org               */
/* Uses Machin's formula...             */
/* pi/4 = 4 arctan(1/5) - arctan(1/239) */
/****************************************/
/* compile with cc -O -o pi pi.c        */
/* run as "pi 1000" for 1000 digits     */
/****************************************/
/* The last few digits may be wrong.......... */

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <unistd.h>

void arctan(int *result, int *w1, int *w2, int denom, int onestep);
void mult(int *result, int factor);
void sub(int *result, int *decrem);
void print(int *result);
void mdiv(int *result, int denom);
void set(int *result, int rhs);
void copy(int *result, int *from);
void add(int *result, int *increm);
int zero(int *result);

#define BASE    10000
int nblock;
int *tot;
int *t1;
int *t2;
int *t3;

int main(int argc, char **argv)
{
        int ndigit = 60;
        if(argc == 2)
                ndigit = atoi(argv[1]);
        else {
                fprintf(stderr, "Usage: %s ndigit\n", argv[0]);
                fprintf(stderr, "(Assuming 60 digits)\n");
        }
        if(ndigit < 20) ndigit = 20;
        nblock = ndigit/4;
        tot = (int *)malloc(nblock*sizeof(int));
        t1 = (int *)malloc(nblock*sizeof(int));
        t2 = (int *)malloc(nblock*sizeof(int));
        t3 = (int *)malloc(nblock*sizeof(int));
        if(!tot || !t1 || !t2 || !t3){
                fprintf(stderr, "Not enough memory\n");
                exit(1);
        }
	printf("pi1 - %d digits, %.1f kbytes\n", ndigit,
	       (4*nblock*sizeof(int))/1024.0);
	
        arctan(tot, t1, t2, 5, 1);
        mult(tot, 4);
        arctan(t3, t1, t2, 239, 2);
        sub(tot, t3);
        mult(tot, 4);
        print(tot);
	return 0;
}

void arctan(int *result, int *w1, int *w2, int denom, int onestep)
{
        int denom2 = denom*denom;
        int k = 1;

        set(result, 1);
        mdiv(result, denom);
        copy(w1, result);

        do{
                if(onestep)
                        mdiv(w1, denom2);
                else {
                        mdiv(w1, denom);
                        mdiv(w1, denom);
                }
                copy(w2, w1);
                mdiv(w2, 2*k+1);
                if(k%2)
                        sub(result, w2);
                else
                        add(result, w2);
                k++;
		// add tracing PG
		//printf(":%d\n",k);

        } while(!zero(w2));
}

void copy(int *result, int *from)
{
        int i;
        for(i=0; i<nblock; i++)
                result[i] = from[i];
}

int zero(int *result)
{
        int i;
        for(i=0; i<nblock; i++)
                if(result[i])
                        return 0;
        return 1;
}

void add(int *result, int *increm)
{
        int i;
        for(i=nblock-1; i>=0; i--){
                result[i] += increm[i];
                if(result[i] >= BASE){
                        result[i] -= BASE;
                        result[i-1]++;
                }
        }
}

void sub(int *result, int *decrem)
{
        int i;
        for(i=nblock-1; i>=0; i--){
                result[i] -= decrem[i];
                if(result[i] < 0){
                        result[i] += BASE;
                        result[i-1]--;
                }
        }
}

void mult(int *result, int factor)
{
        int i, carry = 0;
        for(i=nblock-1; i>=0; i--){
                result[i] *= factor;
                result[i] += carry;
                carry = result[i]/BASE;
                result[i] %= BASE;
        }
}

void mdiv(int *result, int denom)
{
        int i, carry = 0;

        for(i=0; i<nblock; i++){
                result[i] += carry*BASE;
                carry = result[i] % denom;
                result[i] /= denom;
        }
}

void set(int *result, int rhs)
{
        int i;
        for(i=0; i<nblock; i++)
                result[i] = 0;
        result[0] = rhs;
}

void print(int *result)
{
        int i, k;
        char s[10];
        printf("%1d.\n", result[0]);
        for(i=1; i<nblock; i++){
                sprintf(s, "%4d ", result[i]);
                for(k=0; k<5; k++)
                        if(s[k] == ' ') s[k] = '0';
                printf("%c%c%c%c", s[0], s[1], s[2], s[3]);
                if(i%15 == 0) printf("\n");
        }
        printf("\n");
}

[-- Attachment #3: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-10-23 13:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-21 18:16 External checkpoint patch Dave Hansen
2008-10-21 22:49 ` Oren Laadan
2008-10-21 23:07 ` Oren Laadan
     [not found]   ` <Pine.LNX.4.64.0810211857140.22085-CXF6herHY6ykSYb+qCZC/1i27PF6R63G9nwVQlTi/Pw@public.gmane.org>
2008-10-23  7:51     ` Cedric Le Goater
2008-10-23 13:48     ` Cedric Le Goater

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.