All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code
@ 2010-04-14  0:49 Sukadev Bhattiprolu
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-14  0:49 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers


Change the prefix of the USERCR apis to 'cr_' and reorg the code to avoid
duplication and reduce code size.

	[PATCH 1/6] Change API prefix to cr_
	[PATCH 2/6] Remove flags parameter to cr_checkpoint()
	[PATCH 3/6] Minor reorg of restart.c
	[PATCH 4/6] Move checkpoint() into restart.c
	[PATCH 5/6] Rename restart.c to cr_checkpoint.c
	[PATCH 6/6] Rename common.h to cr_log.h

With this change, USERCR for now "exports" just the following two files:

	cr_checkpoint.o
	cr_checkpoint.h

But this patchset does cause some churn, let me know if you think any of it
is unnecessary/noise.  I have tested for now along with the patch that removes
most exits() from cr_restart(). If the reorg makes sense, will run all tests
once more and resubmit.

TODO:
	cr_checkpoint.c has several forward declarations - most of these can
  	be removed by reordering the code in the file. This would also
	reduce maintenance overhead when the prototype of the functions
	change. But changing it would cause more noise in the history.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC][PATCH 1/6][usercr] Change API prefix to cr_
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-14  0:52   ` Sukadev Bhattiprolu
  2010-04-14  0:52   ` [RFC][PATCH 2/6][usercr] Remove flags parameter to cr_checkpoint() Sukadev Bhattiprolu
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-14  0:52 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 13 Apr 2010 12:26:03 -0700
Subject: [RFC][PATCH 1/6][usercr] Change API prefix to cr_

Change the "app_" prefix to "cr_" in function names and data structures.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 Makefile          |    2 +-
 app-checkpoint.h  |   45 ---------------------------------------------
 checkpoint-main.c |    8 ++++----
 checkpoint.c      |    6 +++---
 cr_checkpoint.h   |   45 +++++++++++++++++++++++++++++++++++++++++++++
 restart-main.c    |    8 ++++----
 restart.c         |    8 ++++----
 7 files changed, 61 insertions(+), 61 deletions(-)
 delete mode 100644 app-checkpoint.h
 create mode 100644 cr_checkpoint.h

diff --git a/Makefile b/Makefile
index 6c9ff93..afe144f 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ $(LIB_ECLONE):
 # restart needs to be thread-safe
 restart: CFLAGS += -D__REENTRANT -pthread
 
-$(CR_OBJS): common.h app-checkpoint.h
+$(CR_OBJS): common.h cr_checkpoint.h
 
 restart: restart.o restart-main.o
 
diff --git a/app-checkpoint.h b/app-checkpoint.h
deleted file mode 100644
index d2455f3..0000000
--- a/app-checkpoint.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <linux/checkpoint.h>
-#include <linux/checkpoint_hdr.h>
-
-struct app_checkpoint_args {
-	int outfd;
-	int logfd;
-	int uerrfd;
-	int container;
-	int verbose;
-};
-
-struct app_restart_args {
-	int self;
-	int pids;
-	int pidns;
-	int inspect;
-	char *root;
-	int wait;
-	int mntns;
-	int mnt_pty;
-	int show_status;
-	int copy_status;
-	char *freezer;
-	int keep_frozen;
-	int infd;
-	int klogfd;
-	int ulogfd;
-	int uerrfd;
-	long warn;
-	int debug;
-	int verbose;
-	long fail;
-	int keep_lsm;
-};
-
-/*
- * TODO: Implement an API to let callers choose if/how an interrupt be sent
- *  	 and remove global_send_sigint.
- */
-extern int global_send_sigint;
-
-extern int app_checkpoint(int pid, unsigned long flags,
-				struct app_checkpoint_args *args);
-
-extern int app_restart(struct app_restart_args *args);
diff --git a/checkpoint-main.c b/checkpoint-main.c
index bac65cc..48c54f6 100644
--- a/checkpoint-main.c
+++ b/checkpoint-main.c
@@ -9,7 +9,7 @@
 
 #include <linux/checkpoint.h>
 
-#include "app-checkpoint.h"
+#include "cr_checkpoint.h"
 #include "common.h"
 
 static int global_uerrfd = -1;
@@ -51,7 +51,7 @@ static int str2num(char *str)
 	return num;
 }
 
-static void parse_args(struct app_checkpoint_args *args, int argc, char *argv[])
+static void parse_args(struct cr_checkpoint_args *args, int argc, char *argv[])
 {
 	static struct option opts[] = {
 		{ "help",	no_argument,		NULL, 'h' },
@@ -146,7 +146,7 @@ static void parse_args(struct app_checkpoint_args *args, int argc, char *argv[])
 int main(int argc, char *argv[])
 {
 	int ret;
-	struct app_checkpoint_args args;
+	struct cr_checkpoint_args args;
 	unsigned long flags = 0;
 	pid_t pid;
 
@@ -168,6 +168,6 @@ int main(int argc, char *argv[])
 	if (!args.container)
 		flags |= CHECKPOINT_SUBTREE;
 
-	ret = app_checkpoint(pid, flags, &args);
+	ret = cr_checkpoint(pid, flags, &args);
 	return (ret > 0) ? 0 : 1;
 }
diff --git a/checkpoint.c b/checkpoint.c
index e0290c9..5472bfe 100644
--- a/checkpoint.c
+++ b/checkpoint.c
@@ -21,7 +21,7 @@
 
 #include <linux/checkpoint.h>
 
-#include "app-checkpoint.h"
+#include "cr_checkpoint.h"
 #include "common.h"
 
 static int global_uerrfd = -1;
@@ -31,8 +31,8 @@ inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
 	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
 }
 
-int app_checkpoint(int pid, unsigned long flags,
-				struct app_checkpoint_args *args)
+int cr_checkpoint(int pid, unsigned long flags,
+				struct cr_checkpoint_args *args)
 {
 	int ret;
 
diff --git a/cr_checkpoint.h b/cr_checkpoint.h
new file mode 100644
index 0000000..81fa2c2
--- /dev/null
+++ b/cr_checkpoint.h
@@ -0,0 +1,45 @@
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+
+struct cr_checkpoint_args {
+	int outfd;
+	int logfd;
+	int uerrfd;
+	int container;
+	int verbose;
+};
+
+struct cr_restart_args {
+	int self;
+	int pids;
+	int pidns;
+	int inspect;
+	char *root;
+	int wait;
+	int mntns;
+	int mnt_pty;
+	int show_status;
+	int copy_status;
+	char *freezer;
+	int keep_frozen;
+	int infd;
+	int klogfd;
+	int ulogfd;
+	int uerrfd;
+	long warn;
+	int debug;
+	int verbose;
+	long fail;
+	int keep_lsm;
+};
+
+/*
+ * TODO: Implement an API to let callers choose if/how an interrupt be sent
+ *  	 and remove global_send_sigint.
+ */
+extern int global_send_sigint;
+
+extern int cr_checkpoint(int pid, unsigned long flags,
+				struct cr_checkpoint_args *args);
+
+extern int cr_restart(struct cr_restart_args *args);
diff --git a/restart-main.c b/restart-main.c
index 656d49f..1f1a749 100644
--- a/restart-main.c
+++ b/restart-main.c
@@ -8,7 +8,7 @@
 #include <limits.h>
 #include <getopt.h>
 
-#include "app-checkpoint.h"
+#include "cr_checkpoint.h"
 #include "common.h"
 
 static int global_ulogfd;
@@ -105,7 +105,7 @@ static int str2sig(char *str)
 	return -1;
 }
 
-static void parse_args(struct app_restart_args *args, int argc, char *argv[])
+static void parse_args(struct cr_restart_args *args, int argc, char *argv[])
 {
 	static struct option opts[] = {
 		{ "help",	no_argument,		NULL, 'h' },
@@ -299,7 +299,7 @@ static void parse_args(struct app_restart_args *args, int argc, char *argv[])
 int main(int argc, char *argv[])
 {
 	int ret;
-	struct app_restart_args args;
+	struct cr_restart_args args;
 
 	/*
 	 * Initialize the log/error fds early so even parse_args() errors
@@ -312,7 +312,7 @@ int main(int argc, char *argv[])
 
 	parse_args(&args, argc, argv);
 
-	ret = app_restart(&args);
+	ret = cr_restart(&args);
 	if (ret > 0)
 		ret = 0;
 
diff --git a/restart.c b/restart.c
index 3224335..c98d377 100644
--- a/restart.c
+++ b/restart.c
@@ -40,7 +40,7 @@
 #include "eclone.h"
 #include "genstack.h"
 #include "compat.h"
-#include "app-checkpoint.h"
+#include "cr_checkpoint.h"
 #include "common.h"
 
 /*
@@ -162,7 +162,7 @@ struct ckpt_ctx {
 	char tree[BUFSIZE];
 	char vpids[BUFSIZE];
 	char buf[BUFSIZE];
-	struct app_restart_args *args;
+	struct cr_restart_args *args;
 
 	char *freezer;
 };
@@ -400,7 +400,7 @@ static int freezer_register(struct ckpt_ctx *ctx, pid_t pid)
  * Validate the specified arguments and initialize globals based on the
  * arguments. Return 0 on success.
  */
-int process_args(struct app_restart_args *args)
+int process_args(struct cr_restart_args *args)
 {
 	global_debug = args->debug;
 	global_verbose = args->verbose;
@@ -463,7 +463,7 @@ int process_args(struct app_restart_args *args)
 	return 0;
 }
 
-int app_restart(struct app_restart_args *args)
+int cr_restart(struct cr_restart_args *args)
 {
 	struct ckpt_ctx ctx;
 	int ret;
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC][PATCH 2/6][usercr] Remove flags parameter to cr_checkpoint()
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-14  0:52   ` [RFC][PATCH 1/6][usercr] Change API prefix to cr_ Sukadev Bhattiprolu
@ 2010-04-14  0:52   ` Sukadev Bhattiprolu
  2010-04-14  0:52   ` [RFC][PATCH 3/6][usercr] Minor reorg of restart.c Sukadev Bhattiprolu
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-14  0:52 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 13 Apr 2010 15:28:55 -0700
Subject: [RFC][PATCH 2/6][usercr] Remove flags parameter to cr_checkpoint()

For consistency with cr_restart(), remove 'flags' parameter to cr_checkpoint().
Given that we pass in the 'struct cr_checkpoint_args' the flags parameter is
anyway redundant.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 checkpoint-main.c |    6 +-----
 checkpoint.c      |    7 +++++--
 cr_checkpoint.h   |    4 +---
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/checkpoint-main.c b/checkpoint-main.c
index 48c54f6..b8f96a7 100644
--- a/checkpoint-main.c
+++ b/checkpoint-main.c
@@ -147,7 +147,6 @@ int main(int argc, char *argv[])
 {
 	int ret;
 	struct cr_checkpoint_args args;
-	unsigned long flags = 0;
 	pid_t pid;
 
 	global_uerrfd = fileno(stderr);
@@ -165,9 +164,6 @@ int main(int argc, char *argv[])
 		exit(1);
 	}
 
-	if (!args.container)
-		flags |= CHECKPOINT_SUBTREE;
-
-	ret = cr_checkpoint(pid, flags, &args);
+	ret = cr_checkpoint(pid, &args);
 	return (ret > 0) ? 0 : 1;
 }
diff --git a/checkpoint.c b/checkpoint.c
index 5472bfe..90d4d35 100644
--- a/checkpoint.c
+++ b/checkpoint.c
@@ -31,10 +31,10 @@ inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
 	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
 }
 
-int cr_checkpoint(int pid, unsigned long flags,
-				struct cr_checkpoint_args *args)
+int cr_checkpoint(int pid, struct cr_checkpoint_args *args)
 {
 	int ret;
+	unsigned long flags;
 
 	global_uerrfd = args->uerrfd;
 
@@ -46,6 +46,9 @@ int cr_checkpoint(int pid, unsigned long flags,
 	if (args->logfd < 0)
 		args->logfd = CHECKPOINT_FD_NONE;
 
+	if (!args->container)
+		flags |= CHECKPOINT_SUBTREE;
+
 	ret = checkpoint(pid, args->outfd, flags, args->logfd);
 
 	if (ret < 0) {
diff --git a/cr_checkpoint.h b/cr_checkpoint.h
index 81fa2c2..37f9f2a 100644
--- a/cr_checkpoint.h
+++ b/cr_checkpoint.h
@@ -39,7 +39,5 @@ struct cr_restart_args {
  */
 extern int global_send_sigint;
 
-extern int cr_checkpoint(int pid, unsigned long flags,
-				struct cr_checkpoint_args *args);
-
+extern int cr_checkpoint(int pid, struct cr_checkpoint_args *args);
 extern int cr_restart(struct cr_restart_args *args);
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC][PATCH 3/6][usercr] Minor reorg of restart.c
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-14  0:52   ` [RFC][PATCH 1/6][usercr] Change API prefix to cr_ Sukadev Bhattiprolu
  2010-04-14  0:52   ` [RFC][PATCH 2/6][usercr] Remove flags parameter to cr_checkpoint() Sukadev Bhattiprolu
@ 2010-04-14  0:52   ` Sukadev Bhattiprolu
  2010-04-14  0:52   ` [RFC][PATCH 4/6][usercr] Move checkpoint() into restart.c Sukadev Bhattiprolu
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-14  0:52 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 13 Apr 2010 12:34:28 -0700
Subject: [RFC][PATCH 3/6][usercr] Minor reorg of restart.c

Just a minor reorg to group global variables, declarations, system-call
wrappers and helper functions together. A follow-on patch will combine
the checkpoint and restart functionality into one file and this reorg
would help keep related code together.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 restart.c |   43 +++++++++++++++++++++----------------------
 1 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/restart.c b/restart.c
index c98d377..11628e0 100644
--- a/restart.c
+++ b/restart.c
@@ -68,24 +68,6 @@
  * of the checkpoint image stream.
  */
 
-static struct signal_array signal_array[] = INIT_SIGNAL_ARRAY;
-
-static char *sig2str(int sig)
-{
-	int i = 0;
-
-	do {
-		if (signal_array[i].signum == sig)
-			return signal_array[i].sigstr;
-	} while (signal_array[++i].signum >= 0);
-	return "UNKNOWN SIGNAL";
-}
-
-inline static int restart(pid_t pid, int fd, unsigned long flags, int klogfd)
-{
-	return syscall(__NR_restart, pid, fd, flags, klogfd);
-}
-
 struct hashent {
 	long key;
 	void *data;
@@ -167,6 +149,11 @@ struct ckpt_ctx {
 	char *freezer;
 };
 
+struct pid_swap {
+	pid_t old;
+	pid_t new;
+};
+
 /*
  * TODO: Do we need to direct user-space restart messages to two different
  * 	 fds (like stdout and stderr) or can we just use one ?
@@ -179,6 +166,7 @@ static pid_t global_child_pid;
 static int global_child_status;
 static int global_child_collected;
 static int global_sent_sigint;
+static struct signal_array signal_array[] = INIT_SIGNAL_ARRAY;
 
 /*
  * TODO: Implement an API to let callers choose if/how an interrupt be sent
@@ -244,10 +232,10 @@ static inline pid_t _getpid(void)
 	return syscall(__NR_getpid);
 }
 
-struct pid_swap {
-	pid_t old;
-	pid_t new;
-};
+static inline int restart(pid_t pid, int fd, unsigned long flags, int klogfd)
+{
+	return syscall(__NR_restart, pid, fd, flags, klogfd);
+}
 
 static inline int ckpt_cond_warn(struct ckpt_ctx *ctx, long mask)
 {
@@ -276,6 +264,17 @@ static void report_exit_status(int status, char *str, int debug)
 		ckpt_err("%s\n", msg);
 }
 
+static char *sig2str(int sig)
+{
+	int i = 0;
+
+	do {
+		if (signal_array[i].signum == sig)
+			return signal_array[i].sigstr;
+	} while (signal_array[++i].signum >= 0);
+	return "UNKNOWN SIGNAL";
+}
+
 static void sigchld_handler(int sig)
 {
 	int collected = 0;
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC][PATCH 4/6][usercr] Move checkpoint() into restart.c
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (2 preceding siblings ...)
  2010-04-14  0:52   ` [RFC][PATCH 3/6][usercr] Minor reorg of restart.c Sukadev Bhattiprolu
@ 2010-04-14  0:52   ` Sukadev Bhattiprolu
  2010-04-14  0:53   ` [RFC][PATCH 5/6][usercr] Rename restart.c to cr_checkpoint.c Sukadev Bhattiprolu
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-14  0:52 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 13 Apr 2010 14:35:32 -0700
Subject: [RFC][PATCH 4/6][usercr] Move checkpoint() into restart.c

checkpoint() function is pretty small and the file checkpoint.c mostly
has includes/declarations that are common to restart.c. We could just
fold both checkpoint() and restart() into a single file.

To keep the diff simple, this patch just does the move and the next
patch renames 'restart.c' to 'cr_checkpoint.c'.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 Makefile     |   10 ++++++--
 checkpoint.c |   62 ----------------------------------------------------------
 restart.c    |   35 ++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 65 deletions(-)
 delete mode 100644 checkpoint.c

diff --git a/Makefile b/Makefile
index afe144f..674b112 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ CKPT_HEADERS = include/linux/checkpoint.h \
 		include/linux/checkpoint_hdr.h \
 		include/asm/checkpoint_hdr.h
 
-CR_OBJS = checkpoint.o checkpoint-main.o restart.o restart-main.o
+CR_OBJS = restart.o checkpoint-main.o restart-main.o
 
 # detect architecture (for eclone)
 SUBARCH ?= $(patsubst i%86,x86_32,$(shell uname -m))
@@ -32,7 +32,7 @@ PREFIX ?= /usr/local
 BIN_INSTALL_DIR := $(PREFIX)/bin
 LIB_INSTALL_DIR := $(PREFIX)/lib
 
-ECLONE_PROGS = restart nsexec
+ECLONE_PROGS = checkpoint restart nsexec
 PROGS =	checkpoint ckptinfo $(ECLONE_PROGS)
 LIB_ECLONE = libeclone.a
 
@@ -55,8 +55,10 @@ restart: CFLAGS += -D__REENTRANT -pthread
 $(CR_OBJS): common.h cr_checkpoint.h
 
 restart: restart.o restart-main.o
+	$(CC) -o $@ $^
 
-checkpoint: checkpoint.o checkpoint-main.o
+checkpoint: restart.o checkpoint-main.o
+	$(CC) -o $@ $^
 
 # eclone() is architecture specific
 ifneq ($(SUBARCH),)
@@ -77,6 +79,8 @@ ASFLAGS += -m64
 $(LIB_ECLONE): clone_$(SUBARCH)_.o
 endif
 
+restart.o: $(LIB_ECLONE)
+
 # ckptinfo dependencies
 ckptinfo: ckptinfo_types.o
 
diff --git a/checkpoint.c b/checkpoint.c
deleted file mode 100644
index 90d4d35..0000000
--- a/checkpoint.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *  checkpoint.c: checkpoint one or multiple processes
- *
- *  Copyright (C) 2008-2009 Oren Laadan
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-
-#include <linux/checkpoint.h>
-
-#include "cr_checkpoint.h"
-#include "common.h"
-
-static int global_uerrfd = -1;
-
-inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
-{
-	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
-}
-
-int cr_checkpoint(int pid, struct cr_checkpoint_args *args)
-{
-	int ret;
-	unsigned long flags;
-
-	global_uerrfd = args->uerrfd;
-
-	/* output file descriptor (default: stdout) */
-	if (args->outfd < 0)
-		args->outfd = STDOUT_FILENO;
-
-	/* output file descriptor (default: none) */
-	if (args->logfd < 0)
-		args->logfd = CHECKPOINT_FD_NONE;
-
-	if (!args->container)
-		flags |= CHECKPOINT_SUBTREE;
-
-	ret = checkpoint(pid, args->outfd, flags, args->logfd);
-
-	if (ret < 0) {
-		ckpt_perror("checkpoint");
-		ckpt_err("(you may use 'ckptinfo -e' for more info)\n"); 
-	} else if (args->verbose) {
-		ckpt_err("checkpoint id %d\n", ret);
-	}
-
-	return ret;
-}
diff --git a/restart.c b/restart.c
index 11628e0..e01c08e 100644
--- a/restart.c
+++ b/restart.c
@@ -232,6 +232,11 @@ static inline pid_t _getpid(void)
 	return syscall(__NR_getpid);
 }
 
+inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
+{
+	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
+}
+
 static inline int restart(pid_t pid, int fd, unsigned long flags, int klogfd)
 {
 	return syscall(__NR_restart, pid, fd, flags, klogfd);
@@ -2582,3 +2587,33 @@ static void *hash_lookup(struct ckpt_ctx *ctx, long key)
 	}
 	return NULL;
 }
+
+int cr_checkpoint(int pid, struct cr_checkpoint_args *args)
+{
+	int ret;
+	unsigned long flags;
+
+	global_uerrfd = args->uerrfd;
+
+	/* output file descriptor (default: stdout) */
+	if (args->outfd < 0)
+		args->outfd = STDOUT_FILENO;
+
+	/* log file descriptor (default: none) */
+	if (args->logfd < 0)
+		args->logfd = CHECKPOINT_FD_NONE;
+
+	if (!args->container)
+		flags |= CHECKPOINT_SUBTREE;
+
+	ret = checkpoint(pid, args->outfd, flags, args->logfd);
+
+	if (ret < 0) {
+		ckpt_perror("checkpoint");
+		ckpt_err("(you may use 'ckptinfo -e' for more info)\n"); 
+	} else if (args->verbose) {
+		ckpt_err("checkpoint id %d\n", ret);
+	}
+
+	return ret;
+}
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC][PATCH 5/6][usercr] Rename restart.c to cr_checkpoint.c
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (3 preceding siblings ...)
  2010-04-14  0:52   ` [RFC][PATCH 4/6][usercr] Move checkpoint() into restart.c Sukadev Bhattiprolu
@ 2010-04-14  0:53   ` Sukadev Bhattiprolu
  2010-04-14  0:53   ` [RFC][PATCH 6/6][usercr] Rename common.h to cr_log.h Sukadev Bhattiprolu
  2010-04-20 18:07   ` [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code Sukadev Bhattiprolu
  6 siblings, 0 replies; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-14  0:53 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 13 Apr 2010 17:01:40 -0700
Subject: [RFC][PATCH 5/6][usercr] Rename restart.c to cr_checkpoint.c

restart.c now contains both the checkpoint and restart functionality.
Kernel files that are common to both checkpoint and restart have
"checkpoint" in their name, To follow the convention, rename
'restart.c' to 'cr_checkpoint.c'

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 Makefile        |    8 +-
 cr_checkpoint.c | 2619 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 restart.c       | 2619 -------------------------------------------------------
 3 files changed, 2623 insertions(+), 2623 deletions(-)
 create mode 100644 cr_checkpoint.c
 delete mode 100644 restart.c

diff --git a/Makefile b/Makefile
index 674b112..205320a 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ CKPT_HEADERS = include/linux/checkpoint.h \
 		include/linux/checkpoint_hdr.h \
 		include/asm/checkpoint_hdr.h
 
-CR_OBJS = restart.o checkpoint-main.o restart-main.o
+CR_OBJS = cr_checkpoint.o checkpoint-main.o restart-main.o
 
 # detect architecture (for eclone)
 SUBARCH ?= $(patsubst i%86,x86_32,$(shell uname -m))
@@ -54,10 +54,10 @@ restart: CFLAGS += -D__REENTRANT -pthread
 
 $(CR_OBJS): common.h cr_checkpoint.h
 
-restart: restart.o restart-main.o
+restart: cr_checkpoint.o restart-main.o
 	$(CC) -o $@ $^
 
-checkpoint: restart.o checkpoint-main.o
+checkpoint: cr_checkpoint.o checkpoint-main.o
 	$(CC) -o $@ $^
 
 # eclone() is architecture specific
@@ -79,7 +79,7 @@ ASFLAGS += -m64
 $(LIB_ECLONE): clone_$(SUBARCH)_.o
 endif
 
-restart.o: $(LIB_ECLONE)
+cr_checkpoint.o: $(LIB_ECLONE)
 
 # ckptinfo dependencies
 ckptinfo: ckptinfo_types.o
diff --git a/cr_checkpoint.c b/cr_checkpoint.c
new file mode 100644
index 0000000..e01c08e
--- /dev/null
+++ b/cr_checkpoint.c
@@ -0,0 +1,2619 @@
+/*
+ *  restart.c: restart process(es) from a checkpoint
+ *
+ *  Copyright (C) 2008-2009 Oren Laadan
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sched.h>
+#include <pthread.h>
+#include <signal.h>
+#include <dirent.h>
+#include <time.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <asm/unistd.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <sys/mount.h>
+#include <stdarg.h>
+
+#include <linux/sched.h>
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+
+#include "eclone.h"
+#include "genstack.h"
+#include "compat.h"
+#include "cr_checkpoint.h"
+#include "common.h"
+
+/*
+ * By default, 'restart' creates a new pid namespace in which the
+ * restart takes place, using the original pids from the time of the
+ * checkpoint. This requires that CLONE_NEWPID and eclone() be enabled.
+ *
+ * Restart can also occur in the current namespace, however pids from
+ * the time of the checkpoint may be already in use then. Therefore,
+ * by default, 'restart' creates an equivalen tree without restoring
+ * the original pids, assuming that the application can tolerate this.
+ * For this, the 'ckpt_pids' array is transformed on-the-fly before it
+ * is fed to the kernel.
+ *
+ * By default, "--pids" implied "--pidns" and vice-versa. The user can
+ * use "--pids --no-pidns" for a restart in the currnet namespace -
+ * 'restart' will attempt to create the new tree with the original pids
+ * from the time of the checkpoint, if possible. This requires that
+ * eclone() be enabled.
+ *
+ * To re-create the tasks tree in user space, 'restart' reads the
+ * header and tree data from the checkpoint image tree. It makes up
+ * for the data that was consumed by using a helper process that
+ * provides the data back to the restart syscall, followed by the rest
+ * of the checkpoint image stream.
+ */
+
+struct hashent {
+	long key;
+	void *data;
+	struct hashent *next;
+};
+
+struct task;
+struct ckpt_ctx;
+
+struct task {
+	int flags;		/* state and (later) actions */
+
+	struct task *children;	/* pointers to first child, next and prev */
+	struct task *next_sib;	/*   sibling, and the creator of a process */
+	struct task *prev_sib;
+	struct task *creator;
+
+	struct task *phantom;	/* pointer to place-holdler task (if any) */
+
+	int vidx;		/* index into vpid array, -1 if none */
+	int piddepth;
+
+	pid_t pid;		/* process IDs, our bread-&-butter */
+	pid_t ppid;
+	pid_t tgid;
+	pid_t sid;
+	
+	pid_t rpid;		/* [restart without vpids] actual (real) pid */
+
+	struct ckpt_ctx *ctx;	/* points back to the c/r context */
+
+	pid_t real_parent;	/* pid of task's real parent */
+};
+
+/* zero_task represents creator of root_task (all pids 0) */
+struct task zero_task;
+
+#define TASK_ROOT	0x1	/* root task */
+#define TASK_GHOST	0x2	/* dead task (pid used as sid/pgid) */
+#define TASK_THREAD	0x4	/* thread (non leader) */
+#define TASK_SIBLING	0x8	/* creator's sibling (use CLONE_PARENT) */
+#define TASK_SESSION	0x10	/* inherits creator's original sid */
+#define TASK_NEWPID	0x20	/* starts a new pid namespace */
+#define TASK_DEAD	0x40	/* dead task (dummy) */
+#define TASK_NEWROOT	0x80	/* task must chroot() */
+#define TASK_NEWPTS	0x100	/* remount devpts */
+#define TASK_NEWNS	0x200	/* unshare namespace/file-system */
+
+struct ckpt_ctx {
+	pid_t root_pid;
+	int pipe_in;
+	int pipe_out;
+	int pids_nr;
+	int vpids_nr;
+
+	int pipe_child[2];	/* for children to report status */
+	int pipe_feed[2];	/* for feeder to provide input */
+	int pipe_coord[2];	/* for coord to report status (if needed) */
+
+	struct ckpt_pids *pids_arr;
+	struct ckpt_pids *copy_arr;
+	__s32 *vpids_arr;
+
+	struct task *tasks_arr;
+	int tasks_nr;
+	int tasks_max;
+	int tasks_pid;
+
+	struct hashent **hash_arr;
+	
+	char header[BUFSIZE];
+	char header_arch[BUFSIZE];
+	char container[BUFSIZE];
+	char tree[BUFSIZE];
+	char vpids[BUFSIZE];
+	char buf[BUFSIZE];
+	struct cr_restart_args *args;
+
+	char *freezer;
+};
+
+struct pid_swap {
+	pid_t old;
+	pid_t new;
+};
+
+/*
+ * TODO: Do we need to direct user-space restart messages to two different
+ * 	 fds (like stdout and stderr) or can we just use one ?
+ */
+static int global_ulogfd;
+static int global_uerrfd;
+static int global_debug;
+static int global_verbose;
+static pid_t global_child_pid;
+static int global_child_status;
+static int global_child_collected;
+static int global_sent_sigint;
+static struct signal_array signal_array[] = INIT_SIGNAL_ARRAY;
+
+/*
+ * TODO: Implement an API to let callers choose if/how an interrupt be sent
+ * 	 and remove global_send_sigint.
+ */
+int global_send_sigint = -1;
+
+static int ckpt_remount_proc(struct ckpt_ctx *ctx);
+static int ckpt_remount_devpts(struct ckpt_ctx *ctx);
+
+static int ckpt_build_tree(struct ckpt_ctx *ctx);
+static int ckpt_init_tree(struct ckpt_ctx *ctx);
+static int assign_vpids(struct ckpt_ctx *ctx);
+static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task);
+static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task);
+static int ckpt_propagate_session(struct ckpt_ctx *ctx, struct task *session);
+
+static int ckpt_coordinator_pidns(struct ckpt_ctx *ctx);
+static int ckpt_coordinator(struct ckpt_ctx *ctx);
+
+static int ckpt_make_tree(struct ckpt_ctx *ctx, struct task *task);
+static int ckpt_collect_child(struct ckpt_ctx *ctx);
+static pid_t ckpt_fork_child(struct ckpt_ctx *ctx, struct task *child);
+static int ckpt_adjust_pids(struct ckpt_ctx *ctx);
+
+static void ckpt_abort(struct ckpt_ctx *ctx, char *str);
+static int ckpt_do_feeder(void *data);
+static int ckpt_fork_feeder(struct ckpt_ctx *ctx);
+
+static int ckpt_write(int fd, void *buf, int count);
+static int ckpt_write_obj(struct ckpt_ctx *ctx, struct ckpt_hdr *h);
+
+static int ckpt_write_header(struct ckpt_ctx *ctx);
+static int ckpt_write_header_arch(struct ckpt_ctx *ctx);
+static int ckpt_write_container(struct ckpt_ctx *ctx);
+static int ckpt_write_tree(struct ckpt_ctx *ctx);
+static int ckpt_write_vpids(struct ckpt_ctx *ctx);
+
+static int _ckpt_read(int fd, void *buf, int count);
+static int ckpt_read(int fd, void *buf, int count);
+static int ckpt_read_obj(struct ckpt_ctx *ctx,
+			 struct ckpt_hdr *h, void *buf, int n);
+static int ckpt_read_obj_type(struct ckpt_ctx *ctx, void *b, int n, int type);
+
+static int ckpt_read_header(struct ckpt_ctx *ctx);
+static int ckpt_read_header_arch(struct ckpt_ctx *ctx);
+static int ckpt_read_container(struct ckpt_ctx *ctx);
+static int ckpt_read_tree(struct ckpt_ctx *ctx);
+static int ckpt_read_vpids(struct ckpt_ctx *ctx);
+
+static int hash_init(struct ckpt_ctx *ctx);
+static void hash_exit(struct ckpt_ctx *ctx);
+static int hash_insert(struct ckpt_ctx *ctx, long key, void *data);
+static void *hash_lookup(struct ckpt_ctx *ctx, long key);
+
+static inline pid_t _gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+
+static inline pid_t _getpid(void)
+{
+	return syscall(__NR_getpid);
+}
+
+inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
+{
+	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
+}
+
+static inline int restart(pid_t pid, int fd, unsigned long flags, int klogfd)
+{
+	return syscall(__NR_restart, pid, fd, flags, klogfd);
+}
+
+static inline int ckpt_cond_warn(struct ckpt_ctx *ctx, long mask)
+{
+	return (ctx->args->warn & mask);
+}
+		
+static inline int ckpt_cond_fail(struct ckpt_ctx *ctx, long mask)
+{
+	return (ctx->args->fail & mask);
+}
+
+static void report_exit_status(int status, char *str, int debug)
+{
+	char msg[64];
+
+	if (WIFEXITED(status))
+		sprintf(msg, "%s exited status %d", str, WEXITSTATUS(status));
+	else if (WIFSIGNALED(status))
+		sprintf(msg, "%s killed signal %d", str, WTERMSIG(status));
+	else
+		sprintf(msg, "%s dies somehow ... raw status %d", str, status);
+
+	if (debug)
+		ckpt_dbg("%s\n", msg);
+	else
+		ckpt_err("%s\n", msg);
+}
+
+static char *sig2str(int sig)
+{
+	int i = 0;
+
+	do {
+		if (signal_array[i].signum == sig)
+			return signal_array[i].sigstr;
+	} while (signal_array[++i].signum >= 0);
+	return "UNKNOWN SIGNAL";
+}
+
+static void sigchld_handler(int sig)
+{
+	int collected = 0;
+	int status;
+	pid_t pid;
+
+	while (1) {
+		pid = waitpid(-1, &status, WNOHANG | __WALL);
+		if (pid == 0) {
+			ckpt_dbg("SIGCHLD: child not ready\n");
+			break;
+		} else if (pid > 0) {
+			/* inform collection coordinator or root-task */
+			if (pid == global_child_pid) {
+				global_child_status = status;
+				global_child_collected = 1;
+				report_exit_status(status, "SIGCHLD: ", 1);
+			}
+			ckpt_dbg("SIGCHLD: collected child %d\n", pid);
+			collected = 1;
+		} else if (errno == EINTR) {
+			ckpt_dbg("SIGCHLD: waitpid interrupted\n");
+		} else if (errno == ECHILD) {
+			break;
+		} else {
+			ckpt_perror("WEIRD !! child collection failed");
+			exit(1);
+		}
+	}
+
+	if (!collected)
+		ckpt_dbg("SIGCHLD: already collected\n");
+}
+
+static void sigint_handler(int sig)
+{
+	pid_t pid = global_child_pid;
+
+	sig = global_send_sigint;
+	if (!sig) {
+		ckpt_verbose("Interrupt attempt .. ignored.\n");
+		return;
+	}
+
+	ckpt_verbose("Interrupted: sent SIG%s to "
+		     "restarted tasks\n", sig2str(sig));
+
+	if (pid) {
+		ckpt_dbg("delegating SIG%s to child %d "
+			 "(coordinator/root task)\n",
+			 sig2str(sig), pid);
+		kill(-pid, sig);
+		kill(pid, sig);
+	}
+}
+
+static int freezer_prepare(struct ckpt_ctx *ctx)
+{
+	char *freezer;
+	int fd, n, ret;
+
+#define FREEZER_THAWED  "THAWED"
+
+	freezer = malloc(strlen(ctx->args->freezer) + 32);
+	if (!freezer) {
+		ckpt_perror("malloc freezer buf");
+		return -1;
+	}
+
+	ret = -1;
+	sprintf(freezer, "%s/freezer.state", ctx->args->freezer);
+
+	fd = open(freezer, O_WRONLY, 0);
+	if (fd < 0) {
+		ckpt_perror("freezer path");
+		goto out_free;
+	}
+
+	n = write(fd, FREEZER_THAWED, sizeof(FREEZER_THAWED)); 
+	if (n != sizeof(FREEZER_THAWED)) {
+		ckpt_perror("thawing freezer");
+		goto out_close;
+	}
+
+	sprintf(freezer, "%s/tasks", ctx->args->freezer);
+	ctx->freezer = freezer;
+	ret = 0;
+
+out_close:
+	close(fd);
+
+out_free:
+	free(freezer);
+	return ret;
+}
+
+static int freezer_register(struct ckpt_ctx *ctx, pid_t pid)
+{
+	char pidstr[16];
+	int fd, n, ret;
+
+	fd = open(ctx->freezer, O_WRONLY, 0);
+	if (fd < 0) {
+		ckpt_perror("freezer path");
+		return -1;
+	}
+
+	n = sprintf(pidstr, "%d", pid);
+	ret = write(fd, pidstr, n);
+	if (ret != n) {
+		ckpt_perror("adding pid %d to freezer");
+		ret = -1;
+	} else {
+		ret = 0;
+	}
+
+	close(fd);
+	return ret;
+}
+
+/*
+ * Validate the specified arguments and initialize globals based on the
+ * arguments. Return 0 on success.
+ */
+int process_args(struct cr_restart_args *args)
+{
+	global_debug = args->debug;
+	global_verbose = args->verbose;
+	global_ulogfd = args->ulogfd;
+	global_uerrfd = args->uerrfd;
+
+	/* input file descriptor (default: stdin) */
+	if (args->infd >= 0) {
+		if (dup2(args->infd, STDIN_FILENO) < 0) {
+			ckpt_perror("dup2 input file");
+			return -1;
+		}
+		if (args->infd != STDIN_FILENO)
+			close(args->infd);
+	}
+
+	/* output file descriptor (default: none) */
+	if (args->klogfd < 0)
+		args->klogfd = CHECKPOINT_FD_NONE;
+
+	if (args->mnt_pty)
+		args->mntns = 1;
+
+#ifndef CLONE_NEWPID
+	if (args->pidns) {
+		ckpt_err("This version of restart was compiled without "
+		       "support for --pidns.\n");
+		return -1;
+	}
+#endif
+
+#ifndef CHECKPOINT_DEBUG
+	if (global_debug) {
+		ckpt_err("This version of restart was compiled without "
+		       "support for --debug.\n");
+		return -1;
+	}
+#endif
+
+	if (args->pidns)
+		args->pids = 1;
+
+#if 0   /* Defered until __NR_eclone makes it to standard headers */
+#ifndef __NR_eclone
+	if (args->pids) {
+		ckpt_err("This version of restart was compiled without "
+		       "support for --pids.\n");
+		return -1;
+	}
+#endif
+#endif
+
+	if (args->self &&
+	    (args->pids || args->pidns || args->show_status ||
+	     args->copy_status || args->freezer)) {
+		ckpt_err("Invalid mix of --self with multiprocess options\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int cr_restart(struct cr_restart_args *args)
+{
+	struct ckpt_ctx ctx;
+	int ret;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.args = args;
+
+	ret = process_args(args);
+	if (ret < 0)
+		return ret;
+
+	/* freezer preparation */
+	if (args->freezer && freezer_prepare(&ctx) < 0)
+		return -1;
+
+	ret = -1;
+	/* self-restart ends here: */
+	if (args->self) {
+		/* private mounts namespace ? */
+		if (args->mntns && unshare(CLONE_NEWNS | CLONE_FS) < 0) {
+			ckpt_perror("unshare");
+			goto cleanup_freezer;
+		}
+		/* chroot ? */
+		if (args->root && chroot(args->root) < 0) {
+			ckpt_perror("chroot");
+			goto cleanup_freezer;
+		}
+		/* remount /dev/pts ? */
+		if (args->mnt_pty && ckpt_remount_devpts(&ctx) < 0)
+			goto cleanup_freezer;
+
+		restart(getpid(), STDIN_FILENO, RESTART_TASKSELF, args->klogfd);
+
+		/* reach here if restart(2) failed ! */
+		ckpt_perror("restart");
+		goto cleanup_freezer;
+	}
+
+	setpgrp();
+
+	ret = ckpt_read_header(&ctx);
+	if (ret < 0) {
+		ckpt_perror("read c/r header");
+		goto cleanup_freezer;
+	}
+		
+	ret = ckpt_read_header_arch(&ctx);
+	if (ret < 0) {
+		ckpt_perror("read c/r header arch");
+		goto cleanup_freezer;
+	}
+
+	ret = ckpt_read_container(&ctx);
+	if (ret < 0) {
+		ckpt_perror("read c/r container section");
+		goto cleanup_freezer;
+	}
+
+	ret = ckpt_read_tree(&ctx);
+	if (ret < 0) {
+		ckpt_perror("read c/r tree");
+		goto cleanup_freezer;
+	}
+
+	ret = ckpt_read_vpids(&ctx);
+	if (ret < 0) {
+		ckpt_perror("read c/r tree");
+		goto cleanup_freezer;
+	}
+
+	/* build creator-child-relationship tree */
+	ret = hash_init(&ctx);
+	if (ret < 0)
+		goto cleanup_freezer;
+
+	ret = ckpt_build_tree(&ctx);
+	hash_exit(&ctx);
+	if (ret < 0)
+		goto cleanup_freezer;
+
+	ret = assign_vpids(&ctx);
+	if (ret < 0)
+		goto cleanup_freezer;
+
+	ret = ckpt_fork_feeder(&ctx);
+	if (ret < 0)
+		goto cleanup_freezer;
+
+	/*
+	 * Have the first child in the restarted process tree
+	 * setup devpts, root-dir and /proc if necessary, ...
+	 */
+	if (ctx.args->mnt_pty)
+		ctx.tasks_arr[0].flags |= TASK_NEWPTS;
+	if (ctx.args->mntns)
+		ctx.tasks_arr[0].flags |= TASK_NEWNS;
+	if (ctx.args->root)
+		ctx.tasks_arr[0].flags |= TASK_NEWROOT;
+
+	if (ctx.args->pidns && ctx.tasks_arr[0].pid != 1) {
+		ckpt_dbg("new pidns without init\n");
+		if (global_send_sigint == -1)
+			global_send_sigint = SIGINT;
+		/*
+		 * ...unless we have an explicit coordinator, in which case
+		 * the coordinator should set up the filesystems and
+		 * not the first process in the application process tree.
+		 */
+		ctx.tasks_arr[0].flags &=
+			~(TASK_NEWPTS | TASK_NEWROOT |TASK_NEWNS);
+		ret = ckpt_coordinator_pidns(&ctx);
+	} else if (ctx.args->pidns) {
+		ckpt_dbg("new pidns with init\n");
+		ctx.tasks_arr[0].flags |= TASK_NEWPID | TASK_NEWNS;
+		if (global_send_sigint == -1)
+			global_send_sigint = SIGKILL;
+		ret = ckpt_coordinator(&ctx);
+	} else {
+		ckpt_dbg("subtree (existing pidns)\n");
+		if (global_send_sigint == -1)
+			global_send_sigint = SIGINT;
+		ret = ckpt_coordinator(&ctx);
+	}
+
+	/*
+	 * On success, return pid of root of the restart process tree.
+	 */
+	if (ret >= 0)
+		ret = global_child_pid;
+
+cleanup_freezer:
+	free(ctx.freezer);
+	return ret;
+}
+
+static int ckpt_parse_status(int status, int mimic, int verbose)
+{
+	int sig = 0;
+	int ret = 0;
+
+	if (verbose && global_sent_sigint)
+		ckpt_verbose("Terminated\n");
+	if (WIFSIGNALED(status)) {
+		sig = WTERMSIG(status);
+		if (verbose && !global_sent_sigint)
+			ckpt_verbose("Killed %d\n", sig);
+		ckpt_dbg("task terminated with signal %d\n", sig);
+	} else if (WIFEXITED(status)) {
+		ret = WEXITSTATUS(status);
+		if (verbose)
+			ckpt_verbose("Exited %d\n", ret);
+		ckpt_dbg("task exited with status %d\n", ret);
+	}
+
+	if (mimic) {
+		if (sig) {
+			ckpt_dbg("mimic sig %d\n", sig);
+			signal(sig, SIG_DFL);  /* so kill() affects us */
+			kill(_getpid(), sig);
+		} else {
+			ckpt_dbg("mimic ret %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int ckpt_collect_child(struct ckpt_ctx *ctx)
+{
+	int mimic = ctx->args->copy_status;
+	int verbose = ctx->args->show_status;
+	int status;
+	pid_t pid;
+
+	/*
+	 * if sigchld_handler() collected the child by now, it set
+	 * @global_child_collected and then left the status inside
+	 * @global_child_status.
+	 */
+	while (!global_child_collected) {
+		pid = waitpid(global_child_pid, &status, __WALL);
+		if (pid == global_child_pid)
+			break;
+	}
+	/*
+	 * moreover, the child may have terminated right before the
+	 * call to waitpid()
+	 */
+	if (global_child_collected) {
+		status = global_child_status;
+	} else if (pid < 0) {
+		ckpt_perror("WEIRD: collect child task");
+		return -1;
+	}
+
+	return ckpt_parse_status(status, mimic, verbose);
+}
+
+static int ckpt_remount_devpts(struct ckpt_ctx *ctx)
+{
+	struct stat ptystat;
+
+	/* make sure /dev/ptmx is a link else we'll just break */
+	if (lstat("/dev/ptmx", &ptystat) < 0) {
+		ckpt_perror("stat /dev/ptmx");
+		return -1;
+	}
+	if ((ptystat.st_mode & S_IFMT) != S_IFLNK) {
+		ckpt_err("Error: /dev/ptmx must be a link to /dev/pts/ptmx\n");
+		return -1;
+	}
+
+	/* this is unlikely, but maybe we don't want to fail */
+	if (umount2("/dev/pts", MNT_DETACH) < 0) {
+		if (ckpt_cond_fail(ctx, CKPT_COND_MNTPTY)) {
+			ckpt_perror("umount -l /dev/pts");
+			return -1;
+		}
+		if (ckpt_cond_warn(ctx, CKPT_COND_MNTPTY))
+			ckpt_err("[warn] failed to un-mount old /dev/pts\n");
+	}
+	if (mount("pts", "/dev/pts", "devpts", 0,
+		  "ptmxmode=666,newinstance") < 0) {
+		ckpt_perror("mount -t devpts -o newinstance");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int ckpt_close_files(void)
+{
+	char fdpath[64], *endp;
+        struct dirent *dent;
+        DIR *dirp;
+	int fd;
+
+	/*
+	 * Close all the open files reported in /proc/self/task/TID/fd.
+	 * If that file is unavailable, do it the traditional way...
+	 */
+        snprintf(fdpath, PATH_MAX, "/proc/self/task/%d/fd", _gettid());
+	dirp = opendir(fdpath);
+	if (dirp) {
+		while ((dent = readdir(dirp))) {
+			fd = strtol(dent->d_name, &endp, 10);
+			if (dent->d_name != endp && *endp == '\0')
+				close(fd);
+		}
+		closedir(dirp);
+	} else {
+		struct rlimit rlim;
+		getrlimit(RLIMIT_NOFILE, &rlim);
+		for (fd = 0; fd < rlim.rlim_max; fd++)
+			close(fd);
+	}
+
+	return 0;
+}
+
+static int ckpt_pretend_reaper(struct ckpt_ctx *ctx)
+{
+	int status;
+	pid_t pid;
+
+	while (1) {
+		pid = waitpid(-1, &status, __WALL);
+		if (pid < 0 && errno == ECHILD)
+			break;
+		if (!global_child_collected && pid == global_child_pid) {
+			global_child_collected = 1;
+			global_child_status = status;
+		}
+	}
+
+	return ckpt_parse_status(global_child_status, 1, 0);
+}
+
+static int ckpt_probe_child(pid_t pid, char *str)
+{
+	int status, ret;
+
+	/* use waitpid() to probe that a child is still alive */
+	ret = waitpid(pid, &status, WNOHANG);
+	if (ret == pid) {
+		report_exit_status(status, str, 0);
+		return -1;
+	} else if (ret < 0 && errno == ECHILD) {
+		ckpt_err("WEIRD: %s exited without trace (%s)\n",
+			 str, strerror(errno));
+		return -1;
+	} else if (ret != 0) {
+		ckpt_err("waitpid for %s (%s)", str, strerror(errno));
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Remount the /proc with a new instance: tasks that start a new
+ * pid-ns need a fresh mount of /proc to reflect their pid-ns.
+ */
+static int ckpt_remount_proc(struct ckpt_ctx *ctx)
+{
+	/* this is unlikely, but we don't want to fail */
+	if (umount2("/proc", MNT_DETACH) < 0) {
+		if (ckpt_cond_fail(ctx, CKPT_COND_MNTPROC)) {
+			ckpt_perror("umount -l /proc");
+			return -1;
+		}
+		if (ckpt_cond_warn(ctx, CKPT_COND_MNTPROC))
+			ckpt_err("[warn] failed to un-mount old /proc\n");
+	}
+	if (mount("proc", "/proc", "proc", 0, NULL) < 0) {
+		ckpt_perror("mount -t proc");
+		return -1;
+	}
+
+	return 0;
+}
+
+#ifdef CLONE_NEWPID
+static int __ckpt_coordinator(void *arg)
+{
+	struct ckpt_ctx *ctx = (struct ckpt_ctx *) arg;
+
+	/* chroot ? */
+	if (ctx->args->root && chroot(ctx->args->root) < 0) {
+		ckpt_perror("chroot");
+		_exit(1);
+	}
+	/* tasks with new pid-ns need new /proc mount */
+	if (ckpt_remount_proc(ctx) < 0)
+		_exit(1);
+	/* remount /dev/pts ? */
+	if (ctx->args->mnt_pty && ckpt_remount_devpts(ctx) < 0)
+		_exit(1);
+
+	if (!ctx->args->wait)
+		close(ctx->pipe_coord[0]);
+
+	return ckpt_coordinator(ctx);
+}
+
+static int ckpt_coordinator_status(struct ckpt_ctx *ctx)
+{
+	int status = -1;
+	int ret;
+
+	close(ctx->pipe_coord[1]);
+
+	ret = read(ctx->pipe_coord[0], &status, sizeof(status));
+	if (ret < 0)
+		ckpt_perror("read coordinator status");
+	else if (ret == 0) {
+		/* coordinator failed to report */
+		ckpt_dbg("Coordinator failed to report status.");
+	} else
+		status = 0;
+
+	close(ctx->pipe_coord[0]);
+	return status;
+}
+
+static int ckpt_coordinator_pidns(struct ckpt_ctx *ctx)
+{
+	unsigned long flags;
+	pid_t coord_pid;
+	int copy, ret;
+	genstack stk;
+	void *sp;
+
+	ckpt_dbg("forking coordinator in new pidns\n");
+
+	/*
+	 * The coordinator report restart susccess/failure via pipe.
+	 * (It cannot use return value, because in the default case
+	 * of --wait --copy-status it is already used to report the
+	 * root-task's return value).
+	 */
+	if (pipe(ctx->pipe_coord) < 0) {
+		ckpt_perror("pipe");
+		return -1;
+	}
+
+	ret = -1;
+	stk = genstack_alloc(PTHREAD_STACK_MIN);
+	if (!stk) {
+		ckpt_perror("coordinator genstack_alloc");
+		goto out_close;
+	}
+	sp = genstack_sp(stk);
+
+	copy = ctx->args->copy_status;
+	ctx->args->copy_status = 1;
+
+	/* in new pidns, we need these: */
+	flags = SIGCHLD | CLONE_NEWPID | CLONE_NEWNS;
+
+	coord_pid = clone(__ckpt_coordinator, sp, flags, ctx);
+	genstack_release(stk);
+	if (coord_pid < 0) {
+		ckpt_perror("clone coordinator");
+		goto out_close;
+	}
+	global_child_pid = coord_pid;
+
+	/* catch SIGCHLD to detect errors in coordinator */
+	signal(SIGCHLD, sigchld_handler);
+	/* catch SIGINT to propagate ctrl-c to the coordinator */
+	signal(SIGINT, sigint_handler);
+
+	/*
+	 * The child (coordinator) may have already exited before the
+	 * signal handler was plugged; verify that it's still there.
+	 */
+	if (ckpt_probe_child(coord_pid, "coordinator") < 0)
+		goto out_close;
+
+	ctx->args->copy_status = copy;
+
+	ret = ckpt_coordinator_status(ctx);
+
+	if (ret == 0 && ctx->args->wait)
+		ret = ckpt_collect_child(ctx);
+
+out_close:
+	if (ret < 0) {
+		close(ctx->pipe_coord[0]);
+		close(ctx->pipe_coord[1]);
+	}
+	return ret;
+}
+#else /* CLONE_NEWPID */
+static int ckpt_coordinator_pidns(struct ckpt_ctx *ctx)
+{
+	ckpt_err("logical error: ckpt_coordinator_pidns unexpected\n");
+	return -1;
+}
+#endif /* CLONE_NEWPID */
+
+static int ckpt_coordinator(struct ckpt_ctx *ctx)
+{
+	unsigned long flags = 0;
+	pid_t root_pid;
+	int ret;
+
+	root_pid = ckpt_fork_child(ctx, &ctx->tasks_arr[0]);
+	if (root_pid < 0)
+		return -1;
+	global_child_pid = root_pid;
+
+	/* catch SIGCHLD to detect errors during hierarchy creation */
+	signal(SIGCHLD, sigchld_handler);
+	/* catch SIGINT to propagate ctrl-c to the restarted tasks */
+	signal(SIGINT, sigint_handler);
+
+	/*
+	 * The child (root_task) may have already exited before the
+	 * signal handler was plugged; verify that it's still there.
+	 */
+	if (ckpt_probe_child(root_pid, "root task") < 0)
+		return -1;
+
+	if (ctx->args->keep_frozen)
+		flags |= RESTART_FROZEN;
+	if (ctx->args->keep_lsm)
+		flags |= RESTART_KEEP_LSM;
+
+	ret = restart(root_pid, STDIN_FILENO, flags, ctx->args->klogfd);
+
+	if (ret < 0) {
+		ckpt_perror("restart failed");
+		ckpt_verbose("Failed\n");
+		ckpt_dbg("restart failed ?\n");
+		return -1;
+	}
+
+	ckpt_verbose("Success\n");
+	ckpt_dbg("restart succeeded\n");
+
+	ret = 0;
+
+	if (ctx->args->pidns && ctx->tasks_arr[0].pid != 1) {
+		/* Report success/failure to the parent */
+		if (write(ctx->pipe_coord[1], &ret, sizeof(ret)) < 0) {
+			ckpt_perror("failed to report status");
+			return -1;
+		}
+
+		/*
+		 * Close all open files to eliminate dependencies on
+		 * the outside of the container. Else, a subsequent
+		 * container-checkpoint will fail due to leaks. (Skip
+		 * when debugging to keep output fro us visible).
+		 */
+		if (!global_debug) {
+			ckpt_close_files();
+			global_verbose = 0;
+		}
+
+		/*
+		 * If root task isn't container init, we must stay
+		 * around and be reaper until all tasks are gone.
+		 * Otherwise, container will die as soon as we exit.
+		 */
+		ret = ckpt_pretend_reaper(ctx);
+	} else if (ctx->args->wait) {
+		ret = ckpt_collect_child(ctx);
+	}
+
+	if (ret < 0)
+		ckpt_dbg("c/r failed ?\n");
+	else
+		ckpt_dbg("c/r succeeded\n");
+
+	return ret;
+}
+
+static inline struct task *ckpt_init_task(struct ckpt_ctx *ctx)
+{
+	return (&ctx->tasks_arr[0]);
+}
+
+/*
+ * ckpt_build_tree - build the task tree data structure which provides
+ * the "instructions" to re-create the task tree
+ */
+static int ckpt_build_tree(struct ckpt_ctx *ctx)
+{
+	struct task *task;
+	int i;
+
+	/*
+	 * Allow for additional tasks to be added on demand for
+	 * referenced pids of dead tasks (each task can introduce at
+	 * most two: session and process group IDs), as well as for
+	 * placeholder tasks (each session id may have at most one)
+	 */
+	ctx->tasks_max = ctx->pids_nr * 4;
+	ctx->tasks_arr = malloc(sizeof(*ctx->tasks_arr) * ctx->tasks_max);
+	if (!ctx->tasks_arr) {
+		ckpt_perror("malloc tasks array");
+		return -1;
+	}
+
+	/* initialize tree */
+	if (ckpt_init_tree(ctx) < 0) {
+		free(ctx->tasks_arr);
+		ctx->tasks_arr = NULL;
+		return -1;
+	}
+
+	/* assign a creator to each task */
+	for (i = 0; i < ctx->tasks_nr; i++) {
+		task = &ctx->tasks_arr[i];
+		if (task->creator)
+			continue;
+		if (ckpt_set_creator(ctx, task) < 0) {
+			free(ctx->tasks_arr);
+			ctx->tasks_arr = NULL;
+			return -1;
+		}
+	}
+
+#ifdef CHECKPOINT_DEBUG
+	ckpt_dbg("====== TASKS\n");
+	for (i = 0; i < ctx->tasks_nr; i++) {
+		task = &ctx->tasks_arr[i];
+		ckpt_dbg("\t[%d] pid %d ppid %d sid %d creator %d",
+			 i, task->pid, task->ppid, task->sid,
+			 task->creator->pid);
+		if (task->next_sib)
+			ckpt_dbg_cont(" next %d", task->next_sib->pid);
+		if (task->prev_sib)
+			ckpt_dbg_cont(" prev %d", task->prev_sib->pid);
+		if (task->phantom)
+			ckpt_dbg_cont(" placeholder %d", task->phantom->pid);
+		ckpt_dbg_cont(" %c%c%c%c%c%c",
+		       (task->flags & TASK_THREAD) ? 'T' : ' ',
+		       (task->flags & TASK_SIBLING) ? 'P' : ' ',
+		       (task->flags & TASK_SESSION) ? 'S' : ' ',
+		       (task->flags & TASK_NEWPID) ? 'N' : ' ',
+		       (task->flags & TASK_GHOST) ? 'G' : ' ',
+		       (task->flags & TASK_DEAD) ? 'D' : ' ');
+		ckpt_dbg_cont("\n");
+	}
+	ckpt_dbg("............\n");
+#endif
+
+	return 0;
+}		
+
+static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid, pid_t ppid)
+{
+	struct task *task;
+
+	if (pid == 0)  /* ignore if outside namespace */
+		return 0;
+
+	if (hash_lookup(ctx, pid))  /* already handled */
+		return 0;
+
+	task = &ctx->tasks_arr[ctx->tasks_nr++];
+
+	task->flags = TASK_GHOST;
+
+	task->pid = pid;
+	task->ppid = ppid;
+	task->tgid = pid;
+	task->sid = ppid;
+
+	task->children = NULL;
+	task->next_sib = NULL;
+	task->prev_sib = NULL;
+	task->creator = NULL;
+	task->phantom = NULL;
+
+	task->rpid = -1;
+	task->ctx = ctx;
+
+	if (hash_insert(ctx, pid, task) < 0)
+		return -1;
+
+	/* remember the max pid seen */
+	if (task->pid > ctx->tasks_pid)
+		ctx->tasks_pid = task->pid;
+
+	return 0;
+}
+
+static int ckpt_valid_pid(struct ckpt_ctx *ctx, pid_t pid, char *which, int i)
+{
+	if (pid < 0) {
+		ckpt_err("Invalid %s %d (for task#%d)\n", which, pid, i);
+		return 0;
+	}
+	if (!ctx->args->pidns && pid == 0) {
+		if (ckpt_cond_fail(ctx, CKPT_COND_PIDZERO)) {
+			ckpt_err("[err] task # %d with %s zero"
+				 " (requires --pidns)\n", i + 1, which);
+			return 0;
+		} else if (ckpt_cond_warn(ctx, CKPT_COND_PIDZERO)) {
+			ckpt_err("[warn] task # %d with %s zero"
+				 " (consider --pidns)\n", i + 1, which);
+		}
+	}
+	return 1;
+}
+
+static int ckpt_alloc_pid(struct ckpt_ctx *ctx)
+{
+	int n = 0;
+
+	/*
+	 * allocate an unused pid for the placeholder
+	 * (this will become inefficient if pid-space is exhausted)
+	 */
+	do {
+		if (ctx->tasks_pid == INT_MAX)
+			ctx->tasks_pid = 2;
+		else
+			ctx->tasks_pid++;
+
+		if (n++ == INT_MAX) {	/* ohhh... */
+			ckpt_err("pid namsepace exhausted");
+			return -1;
+		}
+	} while (hash_lookup(ctx, ctx->tasks_pid));
+
+	return ctx->tasks_pid;
+}
+
+static int ckpt_zero_pid(struct ckpt_ctx *ctx)
+{
+	pid_t pid;
+
+	pid = ckpt_alloc_pid(ctx);
+	if (pid < 0)
+		return -1;
+	if (ckpt_setup_task(ctx, pid, ctx->pids_arr[0].vpid) < 0)
+		return -1;
+	return pid;
+}
+
+static int ckpt_init_tree(struct ckpt_ctx *ctx)
+{
+	struct ckpt_pids *pids_arr = ctx->pids_arr;
+	int pids_nr = ctx->pids_nr;
+	struct task *task;
+	pid_t root_pid;
+	pid_t root_sid;
+	pid_t zero_pid = 0;
+	int i;
+
+	root_pid = pids_arr[0].vpid;
+	root_sid = pids_arr[0].vsid;
+
+	/*
+	 * The case where root_sid != root_pid is special. It must be
+	 * from a subtree checkpoint (in container, root_sid is either
+	 * same as root_pid or 0), and root_sid was inherited from an
+	 * ancestor of that subtree.
+	 *
+	 * If we restart with --pidns, make the root-task also inherit
+	 * sid from its ancestor (== coordinator), whatever 'restart'
+	 * task currently has.  For that, we force the root-task's sid
+	 * and all references to it from other tasks (via sid and
+	 * pgid), to 0. Later, the feeder will substitute the
+	 * cooridnator's sid for them.
+	 *
+	 * (Note that this still works even if the coordinator's sid
+	 * is "used" by a restarting task: a new-pidns restart will
+	 * fail because the pid is in use, and in an old-pidns restart
+	 * the task will be assigned a new pid anyway).
+	 *
+	 * If we restart with --no-pidns, we'll add a ghost task below
+	 * whose pid will be used instead of these zeroed entried.
+	 */
+
+	/* forcing root_sid to -1, will make comparisons below fail */
+	if (root_sid == root_pid)
+		root_sid = -1;
+
+	/* populate with known tasks */
+	for (i = 0; i < pids_nr; i++) {
+		task = &ctx->tasks_arr[i];
+
+		task->flags = 0;
+
+		if (!ckpt_valid_pid(ctx, pids_arr[i].vpid, "pid", i))
+			return -1;
+		else if (!ckpt_valid_pid(ctx, pids_arr[i].vtgid, "tgid", i))
+			return -1;
+		else if (!ckpt_valid_pid(ctx, pids_arr[i].vsid, "sid", i))
+			return -1;
+		else if (!ckpt_valid_pid(ctx, pids_arr[i].vpgid, "pgid", i))
+			return -1;
+
+		if (pids_arr[i].vsid == root_sid)
+			pids_arr[i].vsid = 0;
+		if (pids_arr[i].vpgid == root_sid)
+			pids_arr[i].vpgid = 0;
+
+		task->pid = pids_arr[i].vpid;
+		task->ppid = pids_arr[i].vppid;
+		task->tgid = pids_arr[i].vtgid;
+		task->sid = pids_arr[i].vsid;
+
+		task->children = NULL;
+		task->next_sib = NULL;
+		task->prev_sib = NULL;
+		task->creator = NULL;
+		task->phantom = NULL;
+
+		task->rpid = -1;
+		task->ctx = ctx;
+
+		if (hash_insert(ctx, task->pid, task) < 0)
+			return -1;
+	}
+
+	ctx->tasks_nr = pids_nr;
+
+	/* add pids unaccounted for (no tasks) */
+	for (i = 0; i < pids_nr; i++) {
+		pid_t sid;
+
+		sid = pids_arr[i].vsid;
+
+		/* Remember if we find any vsid/vpgid - see below */
+		if (pids_arr[i].vsid == 0 || pids_arr[i].vpgid == 0)
+			zero_pid = 1;
+		/*
+		 * An unaccounted-for sid belongs to a task that was a
+		 * session leader and died. We can safe set its parent
+		 * (and creator) to be the root task.
+		 */
+		if (ckpt_setup_task(ctx, sid, root_pid) < 0)
+			return -1;
+
+		/*
+		 * An sid == 0 means that the session was inherited an
+		 * ancestor of root_task, and more specifically, via
+		 * root_task itself: make root_task our parent.
+		 */
+		if (sid == 0)
+			sid = root_pid;
+
+		/*
+		 * If a pid belongs to a dead thread group leader, we
+		 * need to add it with the same sid as current (and
+		 * other) threads.
+		 */
+		if (ckpt_setup_task(ctx, pids_arr[i].vtgid, sid) < 0)
+			return -1;
+
+		/*
+		 * If pgrp == sid, then the pgrp/sid will already have
+		 * been hashed by now (e.g. by the call above) and the
+		 * ckpt_setup_task() will return promptly.
+		 * If pgrp != sid, then the pgrp 'owner' must have the
+		 * same sid as us: all tasks with same pgrp must have
+		 * their sid matching.
+		 */
+		if (ckpt_setup_task(ctx, pids_arr[i].vpgid, sid) < 0)
+			return -1;
+	}
+
+	/*
+	 * Zero sid/pgid is disallowed in --no-pidns mode. If there
+	 * were any, we invent a new ghost-zero task and substitute
+	 * its pid for those any sid/pgid.
+	 */
+	if (zero_pid && !ctx->args->pidns) {
+		zero_pid = ckpt_zero_pid(ctx);
+		if (zero_pid < 0)
+			return -1;
+		for (i = 0; i < pids_nr; i++) {
+			if (pids_arr[i].vsid == 0) {
+				pids_arr[i].vsid = zero_pid;
+				pids_arr[i].vppid = zero_pid;
+			}
+			if (pids_arr[i].vpgid == 0) {
+				pids_arr[i].vpgid = zero_pid;
+				pids_arr[i].vppid = zero_pid;
+			}
+		}
+	}
+
+	/* mark root task(s), and set its "creator" to be zero_task */
+	ckpt_init_task(ctx)->flags |= TASK_ROOT;
+	ckpt_init_task(ctx)->creator = &zero_task;
+
+	ckpt_dbg("total tasks (including ghosts): %d\n", ctx->tasks_nr);
+	return 0;
+}
+
+/*
+ * Algorithm DumpForest
+ * "Transparent Checkpoint/Restart of Multiple Processes on Commodity
+ * Operating Systems" in USENIX 2007
+ * http://www.usenix.org/events/usenix07/tech/full_papers/laadan/laadan_html/paper.html
+ *
+ * The algorithm captures the state of the task forest. It considers
+ * all pid values, even of dead tasks (appearing in sid/pgid of other
+ * tasks). The input is a table with all pids; the output is a tree
+ * structure imposed on that table. The goal of is to determine the
+ * creating parent (creator) of each task. At restart, the init task
+ * will recursively create the remaining tasks as instructed by the
+ * table.
+ *
+ * Each entry in the table consists of the following set of fields:
+ * flags, pid, tgid, sid, pgid, and pointers to the a creator, next
+ * and previous sibling, and first child task. Note that the creator
+ * may not necessarily correspond to the parent. The possible flags
+ * are TASK_ROOT, TASK_GHOST, TASK_THREAD, TASK_SIBLING (that asks to
+ * inherit the parent via CLONE_PARENT), TASK_SESSION (that asks to
+ * inherit a session id), TASK_NEWPID (that asks to start a new pid
+ * namespace), and TASK_DEAD. The algorithm loops through all the
+ * entries in the table:
+ *
+ * If the entry is a thread and not the thread group leader, we set
+ * the creator to be the thread group leader and set TASK_THREAD.
+ *
+ * Otherwise, if the entry is a session leader, it must have called
+ * setsid(), and does not need to inherit its session. The creator is
+ * set to its real parent.
+ *
+ * Otherwise, if the entry is a dead task (no current task exists
+ * with the given pid), the only constraint is that it inherit the
+ * correct session id. The session leader is set as its creator.
+ *
+ * Otherwise, if the entry is an orphan task, it cannot inherit the
+ * correct session id from init. We add a placeholder task in the
+ * table whose function on restart is to inherit the session id from
+ * the session leader, create the task, then terminate so that the
+ * task will be orphaned. The placeholder is given an arbitrary pid
+ * not already in the table, and the sid identifying the session, and
+ * is marked TASK_DEAD.
+ *
+ * Otherwise, if the entry's sid is equal to its parent's, the only
+ * constraint is that it inherit the correct session id from its
+ * parent. This is simply done by setting its parent as its creator.
+ *
+ * Otherwise, the entry corresponds to a task which is not a session
+ * leader, does not share the session id with its parent, and hence
+ * whose session id must be inherited from an ancestor further up the
+ * tree forest. The task was forked by its parent before the parent
+ * changed its own sid. Its creator is set to be its parent, and it is
+ * marked TASK_SESSION. This flag is propagated up its ancestry until
+ * reaching an entry with that session id. This ensures that the sid
+ * correctly descend via inheritance to the current entry.
+ *
+ * If the traversal fails to find an entry with the same sid, it will
+ * stop at an entry of a leader of another session. This entry must
+ * have formerly been a descendant of the original session leader.
+ * Its creator will have already been set init.  Because we now know
+ * that it needs to pass the original sid to its own descendants, we
+ * re-parent the entry to become a descendant of the original session
+ * leader.  This is done using a placeholder in a manner similar to
+ * how we handle orphans that are not session leaders.
+ */
+static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task)
+{
+	struct task *session = hash_lookup(ctx, task->sid);
+	struct task *parent = hash_lookup(ctx, task->ppid);
+	struct task *creator;
+
+	if (task == ckpt_init_task(ctx)) {
+		ckpt_err("pid %d: logical error\n", ckpt_init_task(ctx)->pid);
+		return -1;
+	}
+
+	/* sid == 0 must have been inherited from outside the container */
+	if (task->sid == 0)
+		session = ckpt_init_task(ctx);
+
+	if (task->tgid != task->pid) {
+		/* thread: creator is thread-group-leader */
+		ckpt_dbg("pid %d: thread tgid %d\n", task->pid, task->tgid);
+		creator = hash_lookup(ctx, task->tgid);
+		if (!creator) {
+			/* oops... thread group leader MIA */
+			ckpt_err("pid %d: no leader %d\n", task->pid, task->tgid);
+			return -1;
+		}
+		task->flags |= TASK_THREAD;
+	} else if (task->ppid == 0 || !parent) {
+		/* only root_task can have ppid == 0, parent must always exist */
+		ckpt_err("pid %d: invalid ppid %d\n", task->pid, task->ppid);
+		return -1;
+	} else if (task->pid == task->sid) {
+		/* session leader: creator is parent */
+		ckpt_dbg("pid %d: session leader\n", task->pid);
+		creator = parent;
+	} else if (task->flags & TASK_DEAD) {
+		/* dead: creator is session leader */
+		ckpt_dbg("pid %d: task is dead\n", task->pid);
+		creator = session;
+	} else if (task->sid == parent->sid) {
+		/* (non-session-leader) inherit: creator is parent */
+		ckpt_dbg("pid %d: inherit sid %d\n", task->pid, task->sid);
+		creator = parent;
+	} else if (task->ppid == 1) {
+		/* (non-session-leader) orphan: creator is dummy */
+		ckpt_dbg("pid %d: orphan session %d\n", task->pid, task->sid);
+		if (!session->phantom)
+			if (ckpt_placeholder_task(ctx, task) < 0)
+				return -1;
+		creator = session->phantom;
+	} else {
+		/* first make sure we know the session's creator */
+		if (!session->creator) {
+			/* (non-session-leader) recursive: session's creator */
+			ckpt_dbg("pid %d: recursive session creator %d\n",
+			       task->pid, task->sid);
+			if (ckpt_set_creator(ctx, session) < 0)
+				return -1;
+		}
+		/* then use it to decide what to do */
+		if (session->creator->pid == task->ppid) {
+			/* init must not be sibling creator (CLONE_PARENT) */
+			if (session == ckpt_init_task(ctx)) {
+				ckpt_err("pid %d: sibling session prohibited"
+				       " with init as creator\n", task->pid);
+				return -1;
+			}
+			/* (non-session-leader) sibling: creator is sibling */
+			ckpt_dbg("pid %d: sibling session %d\n",
+			       task->pid, task->sid);
+			creator = session;
+			task->flags |= TASK_SIBLING;
+		} else {
+			/* (non-session-leader) session: fork before setsid */
+			ckpt_dbg("pid %d: propagate session %d\n",
+			       task->pid, task->sid);
+			creator = parent;
+			task->flags |= TASK_SESSION;
+		}
+	}
+
+	if (creator->children) {
+		struct task *next = creator->children;
+
+		task->next_sib = next;
+		next->prev_sib = task;
+	}
+
+	ckpt_dbg("pid %d: creator set to %d\n", task->pid, creator->pid);
+	task->creator = creator;
+	creator->children = task;
+
+	if (task->flags & TASK_SESSION)
+		if (ckpt_propagate_session(ctx, task) < 0)
+			return -1;
+
+	return 0;
+}
+
+static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task)
+{
+	struct task *session = hash_lookup(ctx, task->sid);
+	struct task *holder = &ctx->tasks_arr[ctx->tasks_nr++];
+	pid_t pid;
+
+	if (ctx->tasks_nr > ctx->tasks_max) {
+		/* shouldn't happen, beacuse we prepared enough */
+		ckpt_err("out of space in task table !");
+		return -1;
+	}
+
+	pid = ckpt_alloc_pid(ctx);
+	if (pid < 0)
+		return -1;
+
+	holder->flags = TASK_DEAD;
+
+	holder->pid = pid;
+	holder->ppid = ckpt_init_task(ctx)->pid;
+	holder->tgid = pid;
+	holder->sid = task->sid;
+
+	holder->children = NULL;
+	holder->next_sib = NULL;
+	holder->prev_sib = NULL;
+	holder->creator = NULL;
+	holder->phantom = NULL;
+
+	holder->rpid = -1;
+	holder->ctx = ctx;
+
+	holder->creator = session;
+	if (session->children) {
+		holder->next_sib = session->children;
+		session->children->prev_sib = holder;
+	}
+	session->children = holder;
+	session->phantom = holder;
+
+	/* reparent entry if necssary */
+	if (task->next_sib)
+		task->next_sib->prev_sib = task->prev_sib;
+	if (task->prev_sib)
+		task->prev_sib->next_sib = task->next_sib;
+	if (task->creator)
+		task->creator->children = task->next_sib;
+
+	task->creator = holder;
+	task->next_sib = NULL;
+	task->prev_sib = NULL;
+
+	return 0;
+}
+
+static int ckpt_propagate_session(struct ckpt_ctx *ctx, struct task *task)
+{
+	struct task *session = hash_lookup(ctx, task->sid);
+	struct task *creator;
+	pid_t sid = task->sid;
+
+	do {
+		ckpt_dbg("pid %d: set session\n", task->pid);
+		task->flags |= TASK_SESSION;
+
+		creator = task->creator;
+		if (creator->pid == 1) {
+			if (ckpt_placeholder_task(ctx, task) < 0)
+				return -1;
+		}
+
+		ckpt_dbg("pid %d: moving up to %d\n", task->pid, creator->pid);
+		task = creator;
+
+		if(!task->creator) {
+			if (ckpt_set_creator(ctx, task) < 0)
+				return -1;
+		}
+	} while (task->sid != sid &&
+		 task != ckpt_init_task(ctx) &&
+		 !(task->flags & TASK_SESSION) &&
+		 task->creator != session);
+
+	return 0;
+}
+
+/*
+ * Algorithm MakeForest
+ * "Transparent Checkpoint/Restart of Multiple Processes on Commodity
+ * Operating Systems" in USENIX 2007
+ * http://www.usenix.org/events/usenix07/tech/full_papers/laadan/laadan_html/paper.html
+ *
+ * The algorithm reconstructs the task hierarchy and relationships.
+ * It works in a recursive manner by following the instructions set
+ * forth by the task forest data structure. It begins with a single
+ * init task, that will fork the tasks that have init set as their
+ * creator. Each task then creates its own children.
+ *
+ * The algorithm loops through the list of children of a task three
+ * times, during which the children are forked or cleaned up.  Each
+ * child that is forked executes the same algorithm recursively until
+ * all tasks have been created.
+ *
+ * In the first pass the current task spawns children that are marked
+ * with TASK_SESSION and thereby need to be forked before the current
+ * session id is changed.  The tasks then changes its session id if
+ * needed. In the second pass the task forks the remainder of the
+ * children. In both passes, a child that is marked TASK_THREAD is
+ * created as a thread and a child that is marked TASK_SIBLING is
+ * created with parent inheritance. Finally, the task will invoke
+ * sys_restart() which does not return (if successful).
+ *
+ * Tasks marked TASK_DEAD or TASK_GHOST are both destined to terminate
+ * anyway; both use flag RESTART_GHOST for sys_restart(), which will
+ * result in a call to do_exit().
+ */
+static int ckpt_make_tree(struct ckpt_ctx *ctx, struct task *task)
+{
+	struct task *child;
+	struct pid_swap swap;
+	unsigned long flags = 0;
+	pid_t newpid;
+	int ret;
+
+	ckpt_dbg("pid %d: pid %d sid %d parent %d\n",
+	       task->pid, _gettid(), getsid(0), getppid());
+
+	/* 1st pass: fork children that inherit our old session-id */
+	for (child = task->children; child; child = child->next_sib) {
+		if (child->flags & TASK_SESSION) {
+			ckpt_dbg("pid %d: fork child %d with session\n",
+			       task->pid, child->pid);
+			newpid = ckpt_fork_child(ctx, child);
+			if (newpid < 0)
+				return -1;
+			child->rpid = newpid;
+		}
+	}
+
+	/* change session id, if necessary */
+	if (task->pid == task->sid) {
+		ret = setsid();
+		if (ret < 0 && task != ckpt_init_task(ctx)) {
+			ckpt_perror("setsid");
+			return -1;
+		}
+	}
+
+	/* 2st pass: fork children that inherit our new session-id */
+	for (child = task->children; child; child = child->next_sib) {
+		if (!(child->flags & TASK_SESSION)) {
+			ckpt_dbg("pid %d: fork child %d without session\n",
+			       task->pid, child->pid);
+			newpid = ckpt_fork_child(ctx, child);
+			if (newpid < 0)
+				return -1;
+			child->rpid = newpid;
+		}
+	}
+	
+	/*
+	 * In '--no-pidns' (and not '--pids') mode we restart with
+	 * pids different than originals, so report old/new pids via
+	 * pipe to the feeder.
+	 *
+	 * However, even with '--pids', coordinator needs to create
+	 * entire hierarchy before (in kernel) reading the image.
+	 * Always reporting pids ensures that feeder only feeds data
+	 * (and coodinator reads data), the when hierarchy is ready.
+	 */
+
+	/* communicate via pipe that all is well */
+	swap.old = task->pid;
+	swap.new = _gettid();
+	ret = write(ctx->pipe_out, &swap, sizeof(swap));
+	if (ret != sizeof(swap)) {
+		ckpt_perror("write swap");
+		return -1;
+	}
+	close(ctx->pipe_out);
+
+	/*
+	 * At this point restart may have already begun in the kernel.
+	 * We shouldn't be doing much until sys_restart() below. With
+	 * threads, sys_restart() ensures that all members of a thread
+	 * group are ready before restoring any of them.
+	 */
+
+	/*
+	 * Ghost tasks are not restarted and end up dead, but their
+	 * pids are referred to by other tasks' pgids (also sids, that
+	 * are already properly set by now). Therefore, they stick
+	 * around until those tasks actually restore their pgrp, and
+	 * then exit (more precisely, killed). The RESTART_GHOST flag
+	 * tells the kernel that they are not to be restored.
+	 */
+	if (task->flags & (TASK_GHOST | TASK_DEAD))
+		flags |= RESTART_GHOST;
+
+	/* on success this doesn't return */
+	ckpt_dbg("about to call sys_restart(), flags %#lx\n", flags);
+	ret = restart(0, STDIN_FILENO, flags, CHECKPOINT_FD_NONE);
+	if (ret < 0)
+		ckpt_perror("task restore failed");
+	return ret;
+}
+
+int ckpt_fork_stub(void *data)
+{
+	struct task *task = (struct task *) data;
+	struct ckpt_ctx *ctx = task->ctx;
+
+	/* chroot ? */
+	if ((task->flags & TASK_NEWROOT) && chroot(ctx->args->root) < 0)
+		return -1;
+	/* tasks with new pid-ns need new /proc mount */
+	if ((task->flags & TASK_NEWPID) && ckpt_remount_proc(ctx) < 0)
+		return -1;
+	/* remount /dev/pts ? */
+	if ((task->flags & TASK_NEWPTS) && ckpt_remount_devpts(ctx) < 0)
+		return -1;
+
+	/*
+	 * In restart into a new pid namespace (--pidns), coordinator
+	 * is the container init, hence if it terminated permatutely
+	 * then the task hierarchy will be cleaned up automagically.
+	 *
+	 * In restart into existing namespace (--no-pidns) we ensure
+	 * proper cleanup of the new hierarchy in case of coordinator
+	 * death by asking to be killed then. When restart succeeds,
+	 * it will have replaced this with the original value.
+	 *
+	 * This works because in the --no-pids case, the hierarchy of
+	 * tasks does not contain zombies (else, there must also be a
+	 * container init, whose pid (==1) is clearly already taken).
+	 *
+	 * Thus, if a the parent of this task dies before this prctl()
+	 * call, it suffices to test getppid() == task->parent_pid.
+	 */
+	if (!ctx->args->pidns) {
+		if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) < 0) {
+			ckpt_perror("prctl");
+			return -1;
+		}
+		if (getppid() != task->real_parent) {
+			ckpt_err("[%d]: parent is MIA (%d != %d)\n",
+				 _getpid(), getppid(), task->real_parent);
+			return -1;
+		}
+	}
+
+	/* if user requested freeze at end - add ourself to cgroup */
+	if (ctx->args->freezer && freezer_register(ctx, _getpid())) {
+		ckpt_err("[%d]: failed add to freezer cgroup\n", _getpid());
+		return -1;
+	}
+
+	/* root has some extra work */
+	if (task->flags & TASK_ROOT) {
+		ctx->root_pid = _getpid();
+		ckpt_dbg("root task pid %d\n", _getpid());
+	}
+
+	return ckpt_make_tree(ctx, task);
+}
+
+static pid_t ckpt_fork_child(struct ckpt_ctx *ctx, struct task *child)
+{
+	struct clone_args clone_args;
+	genstack stk;
+	unsigned long flags = SIGCHLD;
+	pid_t pid = 0;
+	pid_t *pids = &pid;
+	int i, j, depth;
+
+	ckpt_dbg("forking child vpid %d flags %#x\n", child->pid, child->flags);
+
+	stk = genstack_alloc(PTHREAD_STACK_MIN);
+	if (!stk) {
+		ckpt_perror("ckpt_fork_child genstack_alloc");
+		return -1;
+	}
+
+	if (child->flags & TASK_THREAD)
+		flags |= CLONE_THREAD | CLONE_SIGHAND | CLONE_VM;
+	else if (child->flags & TASK_SIBLING)
+		flags |= CLONE_PARENT;
+
+	memset(&clone_args, 0, sizeof(clone_args));
+	clone_args.nr_pids = 1;
+	/* select pid if --pids, otherwise it's 0 */
+	if (ctx->args->pids) {
+		depth = child->piddepth + 1;
+		clone_args.nr_pids = depth;
+
+		pids = malloc(sizeof(pid_t) * depth);
+		if (!pids) {
+			perror("ckpt_fork_child pids malloc");
+			return -1;
+		}
+
+		memset(pids, 0, sizeof(pid_t) * depth);
+		pids[0] = child->pid;
+
+		for (i = child->piddepth - 1, j = 0; i >= 0; i--, j++)
+			pids[j + 1] = ctx->vpids_arr[child->vidx + j];
+
+#ifndef CLONE_NEWPID
+		if (child->piddepth > child->creator->piddepth) {
+			ckpt_err("nested pidns but CLONE_NEWPID undefined");
+			errno = -EINVAL;
+			return -1;
+		} else if (child->flags & TASK_NEWPID) {
+			ckpt_err("TASK_NEWPID set but CLONE_NEWPID undefined");
+			errno = -EINVAL;
+			return -1;
+		}
+#else /* CLONE_NEWPID */
+		if (child->piddepth > child->creator->piddepth) {
+			child->flags |= TASK_NEWPID;
+			flags |= CLONE_NEWPID;
+			clone_args.nr_pids--;
+		} else if (child->flags & TASK_NEWPID) {
+			/* The TASK_NEWPID could have been set for root task */
+			pids[0] = 0;
+			flags |= CLONE_NEWPID;
+		}
+		if (flags & CLONE_NEWPID && !ctx->args->pidns) {
+			ckpt_err("Must use --pidns for nested pidns container");
+			errno = -EINVAL;
+			return -1;
+		}
+#if 0
+		if (flags & CLONE_NEWPID)
+			clone_args.nr_pids--;
+#endif
+#endif /* CLONE_NEWPID */
+	}
+
+	if (child->flags & TASK_NEWNS)
+		flags |= CLONE_NEWNS;
+
+	if (child->flags & (TASK_SIBLING | TASK_THREAD))
+		child->real_parent = getppid();
+	else
+		child->real_parent = _getpid();
+
+	clone_args.child_stack = (unsigned long)genstack_base(stk);
+	clone_args.child_stack_size = genstack_size(stk);
+
+	ckpt_dbg("task %d forking with flags %lx numpids %d\n",
+		child->pid, flags, clone_args.nr_pids);
+	for (i = 0; i < clone_args.nr_pids; i++)
+		ckpt_dbg("task %d pid[%d]=%d\n", child->pid, i, pids[i]);
+
+	pid = eclone(ckpt_fork_stub, child, flags, &clone_args, pids);
+	if (pid < 0)
+		ckpt_perror("eclone");
+
+	if (pid < 0 || !(child->flags & TASK_THREAD))
+		genstack_release(stk);
+
+	if (pids != &pid)
+		free(pids);
+
+	ckpt_dbg("forked child vpid %d (asked %d)\n", pid, child->pid);
+	return pid;
+}
+
+/*
+ * ckpt_fork_feeder: create the feeder process and set a pipe to deliver
+ * the feeder's stdout to our stdin.
+ *
+ * Also setup another pipe through which new tasks will report their
+ * old- and new-pid (see ckpt_adjust_pids). This was originally used
+ * only for '--no-pids', but now also ensures that all restarting
+ * tasks are created by the time coordinator calls restart(2).
+ */
+static int ckpt_fork_feeder(struct ckpt_ctx *ctx)
+{
+	genstack stk;
+	pid_t pid;
+
+	if (pipe(ctx->pipe_feed)) {
+		ckpt_perror("pipe");
+		return -1;
+	}
+
+	if (pipe(ctx->pipe_child) < 0) {
+		ckpt_perror("pipe");
+		return -1;
+	}
+
+	/*
+	 * Use clone() without SIGCHLD so that the when the feeder
+	 * terminates it does not notify the parent (coordinator), as
+	 * this may interfere with the restart.
+	 */
+
+	stk = genstack_alloc(PTHREAD_STACK_MIN);
+	if (!stk) {
+		ckpt_perror("ckpt_fork_feeder genstack_alloc");
+		return -1;
+	}
+
+	pid = clone(ckpt_do_feeder, genstack_sp(stk),
+		    CLONE_THREAD | CLONE_SIGHAND | CLONE_VM, ctx);
+	if (pid < 0) {
+		ckpt_perror("feeder thread");
+		return -1;
+	}
+
+	/* children pipe */
+	close(ctx->pipe_child[0]);
+	ctx->pipe_out = ctx->pipe_child[1];
+	/* feeder pipe */
+	close(ctx->pipe_feed[1]);
+	if (ctx->pipe_feed[0] != STDIN_FILENO) {
+		dup2(ctx->pipe_feed[0], STDIN_FILENO);
+		close(ctx->pipe_feed[0]);
+	}
+
+	return 0;
+}
+
+static void ckpt_abort(struct ckpt_ctx *ctx, char *str)
+{
+	ckpt_perror(str);
+	kill(-(ctx->root_pid), SIGKILL);
+	exit(1);
+}
+
+/* read/write image data as is, blindly */
+static void ckpt_read_write_blind(struct ckpt_ctx *ctx)
+{
+	int ret;
+
+	while (1) {
+		ret = read(STDIN_FILENO, ctx->buf, BUFSIZE);
+		ckpt_dbg("c/r read input %d\n", ret);
+		if (ret == 0)
+			break;
+		if (ret < 0)
+			ckpt_abort(ctx, "read input");
+		ret = ckpt_write(STDOUT_FILENO, ctx->buf, ret);
+		if (ret < 0)
+			ckpt_abort(ctx, "write output");
+	}
+}
+
+/* read/write image data while inspecting it */
+static void ckpt_read_write_inspect(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr h;
+	int len, ret;
+
+	while (1) {
+		ret = _ckpt_read(STDIN_FILENO, &h, sizeof(h));
+ckpt_dbg("ret %d len %d type %d\n", ret, h.len, h.type);
+		if (ret == 0)
+			break;
+		if (ret < 0)
+			ckpt_abort(ctx, "read input");
+		if (h.len < sizeof(h)) {
+			errno = EINVAL;
+			ckpt_abort(ctx, "invalid record");
+		}
+
+		ret = ckpt_write(STDOUT_FILENO, &h, sizeof(h));
+		if (ret < 0)
+			ckpt_abort(ctx, "write output");
+
+		h.len -= sizeof(h);
+		if (h.type == CKPT_HDR_ERROR) {
+			len = (h.len > BUFSIZE ? BUFSIZE : h.len);
+			ret = read(STDIN_FILENO, ctx->buf, len);
+			if (ret < 0)
+				ckpt_abort(ctx, "error record");
+			errno = EIO;
+			ctx->buf[len - 1] = '\0';
+			ckpt_abort(ctx, &ctx->buf[1]);
+		}
+		ckpt_dbg("c/r read input %d\n", h.len);
+
+		while (h.len) {
+			len = (h.len > BUFSIZE ? BUFSIZE : h.len);
+			ret = read(STDIN_FILENO, ctx->buf, len);
+			if (ret == 0)
+				ckpt_abort(ctx, "short record");
+			if (ret < 0)
+				ckpt_abort(ctx, "read input");
+
+			h.len -= ret;
+			ret = ckpt_write(STDOUT_FILENO, ctx->buf, ret);
+ckpt_dbg("write len %d (%d)\n", len, ret);
+			if (ret < 0)
+				ckpt_abort(ctx, "write output");
+		}
+	}
+}
+
+/*
+ * feeder process: delegates checkpoint image stream to the kernel.
+ * In '--no-pids' mode, transform the pids array (struct ckpt_pids)
+ * on the fly and feed the result to the "init" task of the restart
+ */
+static int ckpt_do_feeder(void *data)
+{
+	struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
+
+	/* children pipe */
+	close(ctx->pipe_child[1]);
+	ctx->pipe_in = ctx->pipe_child[0];
+	/* feeder pipe */
+	close(ctx->pipe_feed[0]);
+	if (ctx->pipe_feed[1] != STDOUT_FILENO) {
+		dup2(ctx->pipe_feed[1], STDOUT_FILENO);
+		close(ctx->pipe_feed[1]);
+	}
+
+	if (ckpt_adjust_pids(ctx) < 0)
+		ckpt_abort(ctx, "collect pids");
+
+	if (ckpt_write_header(ctx) < 0)
+		ckpt_abort(ctx, "write c/r header");
+
+	if (ckpt_write_header_arch(ctx) < 0)
+		ckpt_abort(ctx, "write c/r header arch");
+
+	if (ckpt_write_container(ctx) < 0)
+		ckpt_abort(ctx, "write container section");
+
+	if (ckpt_write_tree(ctx) < 0)
+		ckpt_abort(ctx, "write c/r tree");
+
+	if (ckpt_write_vpids(ctx) < 0)
+		ckpt_abort(ctx, "write vpids");
+
+	/* read rest -> write rest */
+	if (ctx->args->inspect)
+		ckpt_read_write_inspect(ctx);
+	else
+		ckpt_read_write_blind(ctx);
+
+	/* All is well: feeder thread is done.  However, we must
+	 * invoke the exit system call directly. Otherwise, upon
+	 * return from this function, glibc's clone wrapper will call
+	 * _exit, which calls exit_group, which will terminate the
+	 * whole process, which is not what we want.
+	 */
+	syscall(SYS_exit, 0);
+
+	/* not reached */
+	return 0;
+}
+
+/*
+ * ckpt_adjust_pids: transform the pids array (struct ckpt_pids) by
+ * substituing actual pid values for original pid values.
+ *
+ * Collect pids reported by the newly created tasks; each task sends
+ * a 'struct pid_swap' indicating old- and new-pid. Then modify the
+ * the pids array accordingly.
+ */
+static int ckpt_adjust_pids(struct ckpt_ctx *ctx)
+{
+	struct pid_swap swap;
+	int n, m, len, ret;
+	pid_t coord_sid;
+
+	coord_sid = getsid(0);
+
+	/*
+	 * Make a copy of the original array to fix a nifty bug where
+	 * two tasks in original image with pids A, B, restart with
+	 * pids swapped (B and A), for instance -
+	 *    original pid array:    [][][A][][B][]...
+	 *    first, swap B with A:  [][][A][][A][]...
+	 *    then, swap A with B:   [][][B][][B][]...
+	 *    but correct should be: [][][B][][A][]...
+	 */
+
+	len = sizeof(struct ckpt_pids) * ctx->pids_nr;
+
+#ifdef CHECKPOINT_DEBUG
+	ckpt_dbg("====== PIDS ARRAY\n");
+	for (m = 0; m < ctx->pids_nr; m++) {
+		struct ckpt_pids *p;
+		p = &ctx->pids_arr[m];
+		ckpt_dbg("[%d] pid %d ppid %d sid %d pgid %d\n",
+			 m, p->vpid, p->vppid, p->vsid, p->vpgid);
+	}
+	ckpt_dbg("............\n");
+#endif
+
+	memcpy(ctx->copy_arr, ctx->pids_arr, len);
+
+	/* read in 'pid_swap' data and adjust ctx->pids_arr */
+	for (n = 0; n < ctx->tasks_nr; n++) {
+		/* get pid info from next task */
+		ret = read(ctx->pipe_in, &swap, sizeof(swap));
+		if (ret < 0)
+			ckpt_abort(ctx, "read pipe");
+
+		/* swapping isn't needed with '--pids' */
+		if (ctx->args->pids)
+			continue;
+
+		ckpt_dbg("c/r swap old %d new %d\n", swap.old, swap.new);
+		for (m = 0; m < ctx->pids_nr; m++) {
+			if (ctx->pids_arr[m].vpid == swap.old)
+				ctx->copy_arr[m].vpid = swap.new;
+			if (ctx->pids_arr[m].vtgid == swap.old)
+				ctx->copy_arr[m].vtgid = swap.new;
+			if (ctx->pids_arr[m].vsid == swap.old)
+				ctx->copy_arr[m].vsid = swap.new;
+			if (ctx->pids_arr[m].vpgid == swap.old)
+				ctx->copy_arr[m].vpgid = swap.new;
+		}
+	}
+
+	memcpy(ctx->pids_arr, ctx->copy_arr, len);
+
+#ifdef CHECKPOINT_DEBUG
+	if (!ctx->args->pids) {
+		ckpt_dbg("====== PIDS ARRAY (swaped)\n");
+		for (m = 0; m < ctx->pids_nr; m++) {
+			struct ckpt_pids *p;
+			p = &ctx->pids_arr[m];
+			ckpt_dbg("[%d] pid %d ppid %d sid %d pgid %d\n",
+				 m, p->vpid, p->vppid, p->vsid, p->vpgid);
+		}
+		ckpt_dbg("............\n");
+	}
+#endif
+
+	close(ctx->pipe_in);
+	return 0;
+}
+
+/*
+ * low-level write
+ *   ckpt_write - write 'count' bytes to 'buf'
+ *   ckpt_write_obj - write object
+ *   ckpt_write_obj_buffer - write buffer object
+ */
+static int ckpt_write(int fd, void *buf, int count)
+{
+	ssize_t nwrite;
+	int nleft;
+
+	for (nleft = count; nleft; nleft -= nwrite) {
+		nwrite = write(fd, buf, nleft);
+		if (nwrite < 0 && errno == EAGAIN)
+			continue;
+		if (nwrite < 0)
+			return -1;
+		buf += nwrite;
+	}
+	return 0;
+}
+
+int ckpt_write_obj(struct ckpt_ctx *ctx, struct ckpt_hdr *h)
+{
+	return ckpt_write(STDOUT_FILENO, h, h->len);
+}
+
+int ckpt_write_obj_ptr(struct ckpt_ctx *ctx, void *buf, int n, int type)
+{
+	struct ckpt_hdr h;
+	int ret;
+
+	h.type = type;
+	h.len = n + sizeof(h);
+	ret = ckpt_write(STDOUT_FILENO, &h, sizeof(h));
+	if (!ret)
+		ret = ckpt_write(STDOUT_FILENO, buf, n);
+	return ret;
+}
+
+/*
+ * low-level read
+ *   _ckpt_read - read 'count' bytes to 'buf', or EOF
+ *   ckpt_read - read 'count' bytes to 'buf' (EOF disallowed)
+ *   ckpt_read_obj - read up to 'n' bytes of object into 'buf'
+ *   ckpt_read_obj_type - read up to 'n' bytes of object type 'type' into 'buf'
+ *   ckpt_read_obj_ptr - like ckpt_read_obj_type, but discards header
+ */
+static int _ckpt_read(int fd, void *buf, int count)
+{
+	ssize_t nread;
+	int nleft;
+
+	for (nleft = count; nleft; nleft -= nread) {
+		nread = read(fd, buf, nleft);
+		if (nread < 0 && errno == EAGAIN)
+			continue;
+		if (nread == 0 && nleft == count)
+			return 0;
+		if (nread <= 0)
+			return -1;
+		buf += nread;
+	}
+	return count;
+}
+
+static int ckpt_read(int fd, void *buf, int count)
+{
+	int ret;
+
+	ret = _ckpt_read(fd, buf, count);
+	if (ret == 0 && count) {
+		errno = EINVAL;
+		ret = -1;
+	}
+	return (ret < 0 ? ret : 0);
+}
+
+static int ckpt_read_obj(struct ckpt_ctx *ctx,
+			 struct ckpt_hdr *h, void *buf, int n)
+{
+	int ret;
+
+	ret = ckpt_read(STDIN_FILENO, h, sizeof(*h));
+	if (ret < 0)
+		return ret;
+	if (h->len < sizeof(*h) || h->len > n) {
+		errno = EINVAL;
+		return -1;
+	}
+	if (h->len == sizeof(*h))
+		return 0;
+	return ckpt_read(STDIN_FILENO, buf, h->len - sizeof(*h));
+}
+
+static int ckpt_read_obj_type(struct ckpt_ctx *ctx, void *buf, int n, int type)
+{
+	struct ckpt_hdr *h = (struct ckpt_hdr *) buf;
+	int ret;
+
+	ret = ckpt_read_obj(ctx, h, (void *) (h + 1), n);
+	if (ret < 0)
+		return ret;
+	if (h->type != type) {
+		errno = EINVAL;
+		return -1;
+	}
+	return 0;
+}
+
+static int ckpt_read_obj_ptr(struct ckpt_ctx *ctx, void *buf, int n, int type)
+{
+	struct ckpt_hdr h;
+	int ret;
+
+	ret = ckpt_read_obj(ctx, &h, buf, n + sizeof(h));
+	if (ret < 0)
+		return ret;
+	if (h.type != type) {
+		errno = EINVAL;
+		return -1;
+	}
+	return 0;
+}
+
+static int ckpt_read_obj_buffer(struct ckpt_ctx *ctx, void *buf, int n)
+{
+	return ckpt_read_obj_type(ctx, buf, BUFSIZE, CKPT_HDR_BUFFER);
+}
+
+/*
+ * read/write the checkpoint image: similar to in-kernel code
+ */
+
+static int ckpt_read_header(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_header *h;
+	char *ptr;
+	int ret;
+
+	h = (struct ckpt_hdr_header *) ctx->header;
+	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_HEADER);
+	if (ret < 0)
+		return ret;
+
+	if (h->constants.uts_release_len > BUFSIZE / 4 ||
+	    h->constants.uts_version_len > BUFSIZE / 4 ||
+	    h->constants.uts_machine_len > BUFSIZE / 4) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	ptr = (char *) h;
+
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_read_obj_buffer(ctx, ptr, h->constants.uts_release_len);
+	if (ret < 0)
+		return ret;
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_read_obj_buffer(ctx, ptr, h->constants.uts_version_len);
+	if (ret < 0)
+		return ret;
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_read_obj_buffer(ctx, ptr, h->constants.uts_machine_len);
+	if (ret < 0)
+		return ret;
+
+	/* FIXME: skip version validation for now */
+
+	return 0;
+}
+
+static int ckpt_read_header_arch(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_header_arch *h;
+	int ret;
+
+	h = (struct ckpt_hdr_header_arch *) ctx->header_arch;
+	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_HEADER_ARCH);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int ckpt_read_container(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_container *h;
+	char *ptr;
+	int ret;
+
+	h = (struct ckpt_hdr_container *) ctx->container;
+	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_CONTAINER);
+	if (ret < 0)
+		return ret;
+
+	ptr = (char *) h;
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_read_obj_buffer(ctx, ptr, CHECKPOINT_LSM_NAME_MAX + 1);
+	if (ret < 0)
+		return ret;
+
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	return ckpt_read_obj_type(ctx, ptr, 200, CKPT_HDR_LSM_INFO);
+}
+
+static int ckpt_read_tree(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_tree *h;
+	int len, ret;
+
+	h = (struct ckpt_hdr_tree *) ctx->tree;
+	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_TREE);
+	if (ret < 0)
+		return ret;
+
+	ckpt_dbg("number of tasks: %d\n", h->nr_tasks);
+
+	if (h->nr_tasks <= 0) {
+		ckpt_err("invalid number of tasks %d", h->nr_tasks);
+		errno = EINVAL;
+		return -1;
+	}
+
+	/* get a working a copy of header */
+	memcpy(ctx->buf, ctx->tree, BUFSIZE);
+
+	ctx->pids_nr = h->nr_tasks;
+
+	len = sizeof(struct ckpt_pids) * ctx->pids_nr;
+
+	ctx->pids_arr = malloc(len);
+	ctx->copy_arr = malloc(len);
+	if (!ctx->pids_arr || !ctx->copy_arr) {
+		if (ctx->pids_arr)
+			free(ctx->pids_arr);
+		return -1;
+	}
+
+	ret = ckpt_read_obj_ptr(ctx, ctx->pids_arr, len, CKPT_HDR_BUFFER);
+	if (ret < 0) {
+		free(ctx->pids_arr);
+		return ret;
+	}
+
+	return ret;
+}
+
+/* set the vpids pointers in all the tasks */
+static int assign_vpids(struct ckpt_ctx *ctx)
+{
+	int d, hidx, tidx;
+	struct task *t;
+
+	for (hidx = 0, tidx = 0; tidx < ctx->pids_nr; tidx++) {
+		t = &ctx->tasks_arr[tidx];
+		d = t->piddepth = ctx->pids_arr[tidx].depth;
+		if (!d) {
+			ckpt_dbg("task[%d].vidx = -1\n", tidx);
+			t->vidx = -1;
+			continue;
+		}
+		t->vidx = hidx;
+		ckpt_dbg("task[%d].vidx = %d (depth %d, rpid %d)\n",
+			tidx, hidx, t->piddepth, ctx->pids_arr[tidx].vpid);
+		int i;
+		for (i=0; i<t->piddepth; i++)
+			ckpt_dbg("task[%d].vpid[%d] = %d\n", tidx, i,
+				ctx->vpids_arr[hidx+i]);
+		hidx += d;
+		if (hidx > ctx->vpids_nr) {
+			ckpt_err("Error parsing vpids array");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int ckpt_read_vpids(struct ckpt_ctx *ctx)
+{
+	int i, len, ret;
+
+	for (i = 0; i < ctx->pids_nr; i++)
+		ctx->vpids_nr += ctx->pids_arr[i].depth;
+
+	ckpt_dbg("number of vpids: %d\n", ctx->vpids_nr);
+
+	if (ctx->vpids_nr < 0) {
+		ckpt_err("Invalid number of vpids %d", ctx->vpids_nr);
+		errno = -EINVAL;
+		return -1;
+	}
+
+	if (!ctx->vpids_nr)
+		return 0;
+
+	len = sizeof(__s32) * ctx->vpids_nr;
+	if (len < 0) {
+		ckpt_err("Length of vpids array overflowed");
+		errno = -EINVAL;
+		return -1;
+	}
+
+	ctx->vpids_arr = malloc(len);
+	if (!ctx->pids_arr)
+		return -1;
+
+	ret = ckpt_read_obj_ptr(ctx, ctx->vpids_arr, len, CKPT_HDR_BUFFER);
+	if (ret < 0)
+		free(ctx->vpids_arr);
+
+	return ret;
+}
+
+static int ckpt_write_header(struct ckpt_ctx *ctx)
+{
+	char *ptr;
+	int ret;
+
+	ptr = (char *) ctx->header;
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
+	if (ret < 0)
+		return ret;
+
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
+	if (ret < 0)
+		return ret;
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
+	if (ret < 0)
+		return ret;
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
+
+	return ret;
+}
+
+static int ckpt_write_header_arch(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_header_arch *h;
+
+	h = (struct ckpt_hdr_header_arch *) ctx->header_arch;
+	return ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
+}
+
+static int ckpt_write_container(struct ckpt_ctx *ctx)
+{
+	char *ptr;
+	int ret;
+
+	ptr = (char *) ctx->container;
+	/* write the container info section */
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
+	if (ret < 0)
+		return ret;
+
+	/* write the lsm name buffer */
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
+	if (ret < 0)
+		return ret;
+
+	/* write the lsm policy section */
+	ptr += ((struct ckpt_hdr *) ptr)->len;
+	return ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
+}
+
+static int ckpt_write_tree(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_tree *h;
+	int len;
+
+	h = (struct ckpt_hdr_tree *) ctx->tree;
+	if (ckpt_write_obj(ctx, (struct ckpt_hdr *) h) < 0)
+		ckpt_abort(ctx, "write tree");
+
+	len = sizeof(struct ckpt_pids) * ctx->pids_nr;
+	if (ckpt_write_obj_ptr(ctx, ctx->pids_arr, len, CKPT_HDR_BUFFER) < 0)
+		ckpt_abort(ctx, "write pids");
+
+	return 0;
+}
+
+static int ckpt_write_vpids(struct ckpt_ctx *ctx)
+{
+	int len;
+
+	if (!ctx->vpids_nr)
+		return 0;
+	len = sizeof(__s32) * ctx->vpids_nr;
+	if (ckpt_write_obj_ptr(ctx, ctx->vpids_arr, len, CKPT_HDR_BUFFER) < 0)
+		ckpt_abort(ctx, "write vpids");
+	ckpt_dbg("wrote %d bytes for %d vpids\n", len, ctx->vpids_nr);
+
+	return 0;
+}
+
+/*
+ * a simple hash implementation
+ */
+
+#define HASH_BITS	11
+#define HASH_BUCKETS	(2 << (HASH_BITS - 1))
+
+static int hash_init(struct ckpt_ctx *ctx)
+{
+	struct hashent **hash;
+
+	ctx->hash_arr = malloc(sizeof(*hash) * HASH_BUCKETS);
+	if (!ctx->hash_arr) {
+		ckpt_perror("malloc hash table");
+		return -1;
+	}
+	memset(ctx->hash_arr, 0, sizeof(*hash) * HASH_BUCKETS);
+	return 0;
+}
+
+static void hash_exit(struct ckpt_ctx *ctx)
+{
+	struct hashent *hash, *next;
+	int i;
+
+	for (i = 0; i < HASH_BUCKETS; i++) {
+		for (hash = ctx->hash_arr[i]; hash; hash = next) {
+			next = hash->next;
+			free(hash);
+		}
+	}
+
+	free(ctx->hash_arr);
+}
+
+/* see linux kernel's include/linux/hash.h */
+
+/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
+#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
+
+static inline int hash_func(long key)
+{
+	unsigned long hash = key * GOLDEN_RATIO_PRIME_32;
+	return (hash >> (sizeof(key)*8 - HASH_BITS));
+}
+
+static int hash_insert(struct ckpt_ctx *ctx, long key, void *data)
+{
+	struct hashent *hash;
+	int bucket;
+
+	hash = malloc(sizeof(*hash));
+	if (!hash) {
+		ckpt_perror("malloc hash");
+		return -1;
+	}
+	hash->key = key;
+	hash->data = data;
+
+	bucket = hash_func(key);
+	hash->next = ctx->hash_arr[bucket];
+	ctx->hash_arr[bucket] = hash;
+
+	return 0;
+}
+
+static void *hash_lookup(struct ckpt_ctx *ctx, long key)
+{
+	struct hashent *hash;
+	int bucket;
+
+	bucket = hash_func(key);
+	for (hash = ctx->hash_arr[bucket]; hash; hash = hash->next) {
+		if (hash->key == key)
+			return hash->data;
+	}
+	return NULL;
+}
+
+int cr_checkpoint(int pid, struct cr_checkpoint_args *args)
+{
+	int ret;
+	unsigned long flags;
+
+	global_uerrfd = args->uerrfd;
+
+	/* output file descriptor (default: stdout) */
+	if (args->outfd < 0)
+		args->outfd = STDOUT_FILENO;
+
+	/* log file descriptor (default: none) */
+	if (args->logfd < 0)
+		args->logfd = CHECKPOINT_FD_NONE;
+
+	if (!args->container)
+		flags |= CHECKPOINT_SUBTREE;
+
+	ret = checkpoint(pid, args->outfd, flags, args->logfd);
+
+	if (ret < 0) {
+		ckpt_perror("checkpoint");
+		ckpt_err("(you may use 'ckptinfo -e' for more info)\n"); 
+	} else if (args->verbose) {
+		ckpt_err("checkpoint id %d\n", ret);
+	}
+
+	return ret;
+}
diff --git a/restart.c b/restart.c
deleted file mode 100644
index e01c08e..0000000
--- a/restart.c
+++ /dev/null
@@ -1,2619 +0,0 @@
-/*
- *  restart.c: restart process(es) from a checkpoint
- *
- *  Copyright (C) 2008-2009 Oren Laadan
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <getopt.h>
-#include <limits.h>
-#include <unistd.h>
-#include <sched.h>
-#include <pthread.h>
-#include <signal.h>
-#include <dirent.h>
-#include <time.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <asm/unistd.h>
-#include <sys/syscall.h>
-#include <sys/prctl.h>
-#include <sys/mount.h>
-#include <stdarg.h>
-
-#include <linux/sched.h>
-#include <linux/checkpoint.h>
-#include <linux/checkpoint_hdr.h>
-
-#include "eclone.h"
-#include "genstack.h"
-#include "compat.h"
-#include "cr_checkpoint.h"
-#include "common.h"
-
-/*
- * By default, 'restart' creates a new pid namespace in which the
- * restart takes place, using the original pids from the time of the
- * checkpoint. This requires that CLONE_NEWPID and eclone() be enabled.
- *
- * Restart can also occur in the current namespace, however pids from
- * the time of the checkpoint may be already in use then. Therefore,
- * by default, 'restart' creates an equivalen tree without restoring
- * the original pids, assuming that the application can tolerate this.
- * For this, the 'ckpt_pids' array is transformed on-the-fly before it
- * is fed to the kernel.
- *
- * By default, "--pids" implied "--pidns" and vice-versa. The user can
- * use "--pids --no-pidns" for a restart in the currnet namespace -
- * 'restart' will attempt to create the new tree with the original pids
- * from the time of the checkpoint, if possible. This requires that
- * eclone() be enabled.
- *
- * To re-create the tasks tree in user space, 'restart' reads the
- * header and tree data from the checkpoint image tree. It makes up
- * for the data that was consumed by using a helper process that
- * provides the data back to the restart syscall, followed by the rest
- * of the checkpoint image stream.
- */
-
-struct hashent {
-	long key;
-	void *data;
-	struct hashent *next;
-};
-
-struct task;
-struct ckpt_ctx;
-
-struct task {
-	int flags;		/* state and (later) actions */
-
-	struct task *children;	/* pointers to first child, next and prev */
-	struct task *next_sib;	/*   sibling, and the creator of a process */
-	struct task *prev_sib;
-	struct task *creator;
-
-	struct task *phantom;	/* pointer to place-holdler task (if any) */
-
-	int vidx;		/* index into vpid array, -1 if none */
-	int piddepth;
-
-	pid_t pid;		/* process IDs, our bread-&-butter */
-	pid_t ppid;
-	pid_t tgid;
-	pid_t sid;
-	
-	pid_t rpid;		/* [restart without vpids] actual (real) pid */
-
-	struct ckpt_ctx *ctx;	/* points back to the c/r context */
-
-	pid_t real_parent;	/* pid of task's real parent */
-};
-
-/* zero_task represents creator of root_task (all pids 0) */
-struct task zero_task;
-
-#define TASK_ROOT	0x1	/* root task */
-#define TASK_GHOST	0x2	/* dead task (pid used as sid/pgid) */
-#define TASK_THREAD	0x4	/* thread (non leader) */
-#define TASK_SIBLING	0x8	/* creator's sibling (use CLONE_PARENT) */
-#define TASK_SESSION	0x10	/* inherits creator's original sid */
-#define TASK_NEWPID	0x20	/* starts a new pid namespace */
-#define TASK_DEAD	0x40	/* dead task (dummy) */
-#define TASK_NEWROOT	0x80	/* task must chroot() */
-#define TASK_NEWPTS	0x100	/* remount devpts */
-#define TASK_NEWNS	0x200	/* unshare namespace/file-system */
-
-struct ckpt_ctx {
-	pid_t root_pid;
-	int pipe_in;
-	int pipe_out;
-	int pids_nr;
-	int vpids_nr;
-
-	int pipe_child[2];	/* for children to report status */
-	int pipe_feed[2];	/* for feeder to provide input */
-	int pipe_coord[2];	/* for coord to report status (if needed) */
-
-	struct ckpt_pids *pids_arr;
-	struct ckpt_pids *copy_arr;
-	__s32 *vpids_arr;
-
-	struct task *tasks_arr;
-	int tasks_nr;
-	int tasks_max;
-	int tasks_pid;
-
-	struct hashent **hash_arr;
-	
-	char header[BUFSIZE];
-	char header_arch[BUFSIZE];
-	char container[BUFSIZE];
-	char tree[BUFSIZE];
-	char vpids[BUFSIZE];
-	char buf[BUFSIZE];
-	struct cr_restart_args *args;
-
-	char *freezer;
-};
-
-struct pid_swap {
-	pid_t old;
-	pid_t new;
-};
-
-/*
- * TODO: Do we need to direct user-space restart messages to two different
- * 	 fds (like stdout and stderr) or can we just use one ?
- */
-static int global_ulogfd;
-static int global_uerrfd;
-static int global_debug;
-static int global_verbose;
-static pid_t global_child_pid;
-static int global_child_status;
-static int global_child_collected;
-static int global_sent_sigint;
-static struct signal_array signal_array[] = INIT_SIGNAL_ARRAY;
-
-/*
- * TODO: Implement an API to let callers choose if/how an interrupt be sent
- * 	 and remove global_send_sigint.
- */
-int global_send_sigint = -1;
-
-static int ckpt_remount_proc(struct ckpt_ctx *ctx);
-static int ckpt_remount_devpts(struct ckpt_ctx *ctx);
-
-static int ckpt_build_tree(struct ckpt_ctx *ctx);
-static int ckpt_init_tree(struct ckpt_ctx *ctx);
-static int assign_vpids(struct ckpt_ctx *ctx);
-static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task);
-static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task);
-static int ckpt_propagate_session(struct ckpt_ctx *ctx, struct task *session);
-
-static int ckpt_coordinator_pidns(struct ckpt_ctx *ctx);
-static int ckpt_coordinator(struct ckpt_ctx *ctx);
-
-static int ckpt_make_tree(struct ckpt_ctx *ctx, struct task *task);
-static int ckpt_collect_child(struct ckpt_ctx *ctx);
-static pid_t ckpt_fork_child(struct ckpt_ctx *ctx, struct task *child);
-static int ckpt_adjust_pids(struct ckpt_ctx *ctx);
-
-static void ckpt_abort(struct ckpt_ctx *ctx, char *str);
-static int ckpt_do_feeder(void *data);
-static int ckpt_fork_feeder(struct ckpt_ctx *ctx);
-
-static int ckpt_write(int fd, void *buf, int count);
-static int ckpt_write_obj(struct ckpt_ctx *ctx, struct ckpt_hdr *h);
-
-static int ckpt_write_header(struct ckpt_ctx *ctx);
-static int ckpt_write_header_arch(struct ckpt_ctx *ctx);
-static int ckpt_write_container(struct ckpt_ctx *ctx);
-static int ckpt_write_tree(struct ckpt_ctx *ctx);
-static int ckpt_write_vpids(struct ckpt_ctx *ctx);
-
-static int _ckpt_read(int fd, void *buf, int count);
-static int ckpt_read(int fd, void *buf, int count);
-static int ckpt_read_obj(struct ckpt_ctx *ctx,
-			 struct ckpt_hdr *h, void *buf, int n);
-static int ckpt_read_obj_type(struct ckpt_ctx *ctx, void *b, int n, int type);
-
-static int ckpt_read_header(struct ckpt_ctx *ctx);
-static int ckpt_read_header_arch(struct ckpt_ctx *ctx);
-static int ckpt_read_container(struct ckpt_ctx *ctx);
-static int ckpt_read_tree(struct ckpt_ctx *ctx);
-static int ckpt_read_vpids(struct ckpt_ctx *ctx);
-
-static int hash_init(struct ckpt_ctx *ctx);
-static void hash_exit(struct ckpt_ctx *ctx);
-static int hash_insert(struct ckpt_ctx *ctx, long key, void *data);
-static void *hash_lookup(struct ckpt_ctx *ctx, long key);
-
-static inline pid_t _gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-
-static inline pid_t _getpid(void)
-{
-	return syscall(__NR_getpid);
-}
-
-inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
-{
-	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
-}
-
-static inline int restart(pid_t pid, int fd, unsigned long flags, int klogfd)
-{
-	return syscall(__NR_restart, pid, fd, flags, klogfd);
-}
-
-static inline int ckpt_cond_warn(struct ckpt_ctx *ctx, long mask)
-{
-	return (ctx->args->warn & mask);
-}
-		
-static inline int ckpt_cond_fail(struct ckpt_ctx *ctx, long mask)
-{
-	return (ctx->args->fail & mask);
-}
-
-static void report_exit_status(int status, char *str, int debug)
-{
-	char msg[64];
-
-	if (WIFEXITED(status))
-		sprintf(msg, "%s exited status %d", str, WEXITSTATUS(status));
-	else if (WIFSIGNALED(status))
-		sprintf(msg, "%s killed signal %d", str, WTERMSIG(status));
-	else
-		sprintf(msg, "%s dies somehow ... raw status %d", str, status);
-
-	if (debug)
-		ckpt_dbg("%s\n", msg);
-	else
-		ckpt_err("%s\n", msg);
-}
-
-static char *sig2str(int sig)
-{
-	int i = 0;
-
-	do {
-		if (signal_array[i].signum == sig)
-			return signal_array[i].sigstr;
-	} while (signal_array[++i].signum >= 0);
-	return "UNKNOWN SIGNAL";
-}
-
-static void sigchld_handler(int sig)
-{
-	int collected = 0;
-	int status;
-	pid_t pid;
-
-	while (1) {
-		pid = waitpid(-1, &status, WNOHANG | __WALL);
-		if (pid == 0) {
-			ckpt_dbg("SIGCHLD: child not ready\n");
-			break;
-		} else if (pid > 0) {
-			/* inform collection coordinator or root-task */
-			if (pid == global_child_pid) {
-				global_child_status = status;
-				global_child_collected = 1;
-				report_exit_status(status, "SIGCHLD: ", 1);
-			}
-			ckpt_dbg("SIGCHLD: collected child %d\n", pid);
-			collected = 1;
-		} else if (errno == EINTR) {
-			ckpt_dbg("SIGCHLD: waitpid interrupted\n");
-		} else if (errno == ECHILD) {
-			break;
-		} else {
-			ckpt_perror("WEIRD !! child collection failed");
-			exit(1);
-		}
-	}
-
-	if (!collected)
-		ckpt_dbg("SIGCHLD: already collected\n");
-}
-
-static void sigint_handler(int sig)
-{
-	pid_t pid = global_child_pid;
-
-	sig = global_send_sigint;
-	if (!sig) {
-		ckpt_verbose("Interrupt attempt .. ignored.\n");
-		return;
-	}
-
-	ckpt_verbose("Interrupted: sent SIG%s to "
-		     "restarted tasks\n", sig2str(sig));
-
-	if (pid) {
-		ckpt_dbg("delegating SIG%s to child %d "
-			 "(coordinator/root task)\n",
-			 sig2str(sig), pid);
-		kill(-pid, sig);
-		kill(pid, sig);
-	}
-}
-
-static int freezer_prepare(struct ckpt_ctx *ctx)
-{
-	char *freezer;
-	int fd, n, ret;
-
-#define FREEZER_THAWED  "THAWED"
-
-	freezer = malloc(strlen(ctx->args->freezer) + 32);
-	if (!freezer) {
-		ckpt_perror("malloc freezer buf");
-		return -1;
-	}
-
-	ret = -1;
-	sprintf(freezer, "%s/freezer.state", ctx->args->freezer);
-
-	fd = open(freezer, O_WRONLY, 0);
-	if (fd < 0) {
-		ckpt_perror("freezer path");
-		goto out_free;
-	}
-
-	n = write(fd, FREEZER_THAWED, sizeof(FREEZER_THAWED)); 
-	if (n != sizeof(FREEZER_THAWED)) {
-		ckpt_perror("thawing freezer");
-		goto out_close;
-	}
-
-	sprintf(freezer, "%s/tasks", ctx->args->freezer);
-	ctx->freezer = freezer;
-	ret = 0;
-
-out_close:
-	close(fd);
-
-out_free:
-	free(freezer);
-	return ret;
-}
-
-static int freezer_register(struct ckpt_ctx *ctx, pid_t pid)
-{
-	char pidstr[16];
-	int fd, n, ret;
-
-	fd = open(ctx->freezer, O_WRONLY, 0);
-	if (fd < 0) {
-		ckpt_perror("freezer path");
-		return -1;
-	}
-
-	n = sprintf(pidstr, "%d", pid);
-	ret = write(fd, pidstr, n);
-	if (ret != n) {
-		ckpt_perror("adding pid %d to freezer");
-		ret = -1;
-	} else {
-		ret = 0;
-	}
-
-	close(fd);
-	return ret;
-}
-
-/*
- * Validate the specified arguments and initialize globals based on the
- * arguments. Return 0 on success.
- */
-int process_args(struct cr_restart_args *args)
-{
-	global_debug = args->debug;
-	global_verbose = args->verbose;
-	global_ulogfd = args->ulogfd;
-	global_uerrfd = args->uerrfd;
-
-	/* input file descriptor (default: stdin) */
-	if (args->infd >= 0) {
-		if (dup2(args->infd, STDIN_FILENO) < 0) {
-			ckpt_perror("dup2 input file");
-			return -1;
-		}
-		if (args->infd != STDIN_FILENO)
-			close(args->infd);
-	}
-
-	/* output file descriptor (default: none) */
-	if (args->klogfd < 0)
-		args->klogfd = CHECKPOINT_FD_NONE;
-
-	if (args->mnt_pty)
-		args->mntns = 1;
-
-#ifndef CLONE_NEWPID
-	if (args->pidns) {
-		ckpt_err("This version of restart was compiled without "
-		       "support for --pidns.\n");
-		return -1;
-	}
-#endif
-
-#ifndef CHECKPOINT_DEBUG
-	if (global_debug) {
-		ckpt_err("This version of restart was compiled without "
-		       "support for --debug.\n");
-		return -1;
-	}
-#endif
-
-	if (args->pidns)
-		args->pids = 1;
-
-#if 0   /* Defered until __NR_eclone makes it to standard headers */
-#ifndef __NR_eclone
-	if (args->pids) {
-		ckpt_err("This version of restart was compiled without "
-		       "support for --pids.\n");
-		return -1;
-	}
-#endif
-#endif
-
-	if (args->self &&
-	    (args->pids || args->pidns || args->show_status ||
-	     args->copy_status || args->freezer)) {
-		ckpt_err("Invalid mix of --self with multiprocess options\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-int cr_restart(struct cr_restart_args *args)
-{
-	struct ckpt_ctx ctx;
-	int ret;
-
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.args = args;
-
-	ret = process_args(args);
-	if (ret < 0)
-		return ret;
-
-	/* freezer preparation */
-	if (args->freezer && freezer_prepare(&ctx) < 0)
-		return -1;
-
-	ret = -1;
-	/* self-restart ends here: */
-	if (args->self) {
-		/* private mounts namespace ? */
-		if (args->mntns && unshare(CLONE_NEWNS | CLONE_FS) < 0) {
-			ckpt_perror("unshare");
-			goto cleanup_freezer;
-		}
-		/* chroot ? */
-		if (args->root && chroot(args->root) < 0) {
-			ckpt_perror("chroot");
-			goto cleanup_freezer;
-		}
-		/* remount /dev/pts ? */
-		if (args->mnt_pty && ckpt_remount_devpts(&ctx) < 0)
-			goto cleanup_freezer;
-
-		restart(getpid(), STDIN_FILENO, RESTART_TASKSELF, args->klogfd);
-
-		/* reach here if restart(2) failed ! */
-		ckpt_perror("restart");
-		goto cleanup_freezer;
-	}
-
-	setpgrp();
-
-	ret = ckpt_read_header(&ctx);
-	if (ret < 0) {
-		ckpt_perror("read c/r header");
-		goto cleanup_freezer;
-	}
-		
-	ret = ckpt_read_header_arch(&ctx);
-	if (ret < 0) {
-		ckpt_perror("read c/r header arch");
-		goto cleanup_freezer;
-	}
-
-	ret = ckpt_read_container(&ctx);
-	if (ret < 0) {
-		ckpt_perror("read c/r container section");
-		goto cleanup_freezer;
-	}
-
-	ret = ckpt_read_tree(&ctx);
-	if (ret < 0) {
-		ckpt_perror("read c/r tree");
-		goto cleanup_freezer;
-	}
-
-	ret = ckpt_read_vpids(&ctx);
-	if (ret < 0) {
-		ckpt_perror("read c/r tree");
-		goto cleanup_freezer;
-	}
-
-	/* build creator-child-relationship tree */
-	ret = hash_init(&ctx);
-	if (ret < 0)
-		goto cleanup_freezer;
-
-	ret = ckpt_build_tree(&ctx);
-	hash_exit(&ctx);
-	if (ret < 0)
-		goto cleanup_freezer;
-
-	ret = assign_vpids(&ctx);
-	if (ret < 0)
-		goto cleanup_freezer;
-
-	ret = ckpt_fork_feeder(&ctx);
-	if (ret < 0)
-		goto cleanup_freezer;
-
-	/*
-	 * Have the first child in the restarted process tree
-	 * setup devpts, root-dir and /proc if necessary, ...
-	 */
-	if (ctx.args->mnt_pty)
-		ctx.tasks_arr[0].flags |= TASK_NEWPTS;
-	if (ctx.args->mntns)
-		ctx.tasks_arr[0].flags |= TASK_NEWNS;
-	if (ctx.args->root)
-		ctx.tasks_arr[0].flags |= TASK_NEWROOT;
-
-	if (ctx.args->pidns && ctx.tasks_arr[0].pid != 1) {
-		ckpt_dbg("new pidns without init\n");
-		if (global_send_sigint == -1)
-			global_send_sigint = SIGINT;
-		/*
-		 * ...unless we have an explicit coordinator, in which case
-		 * the coordinator should set up the filesystems and
-		 * not the first process in the application process tree.
-		 */
-		ctx.tasks_arr[0].flags &=
-			~(TASK_NEWPTS | TASK_NEWROOT |TASK_NEWNS);
-		ret = ckpt_coordinator_pidns(&ctx);
-	} else if (ctx.args->pidns) {
-		ckpt_dbg("new pidns with init\n");
-		ctx.tasks_arr[0].flags |= TASK_NEWPID | TASK_NEWNS;
-		if (global_send_sigint == -1)
-			global_send_sigint = SIGKILL;
-		ret = ckpt_coordinator(&ctx);
-	} else {
-		ckpt_dbg("subtree (existing pidns)\n");
-		if (global_send_sigint == -1)
-			global_send_sigint = SIGINT;
-		ret = ckpt_coordinator(&ctx);
-	}
-
-	/*
-	 * On success, return pid of root of the restart process tree.
-	 */
-	if (ret >= 0)
-		ret = global_child_pid;
-
-cleanup_freezer:
-	free(ctx.freezer);
-	return ret;
-}
-
-static int ckpt_parse_status(int status, int mimic, int verbose)
-{
-	int sig = 0;
-	int ret = 0;
-
-	if (verbose && global_sent_sigint)
-		ckpt_verbose("Terminated\n");
-	if (WIFSIGNALED(status)) {
-		sig = WTERMSIG(status);
-		if (verbose && !global_sent_sigint)
-			ckpt_verbose("Killed %d\n", sig);
-		ckpt_dbg("task terminated with signal %d\n", sig);
-	} else if (WIFEXITED(status)) {
-		ret = WEXITSTATUS(status);
-		if (verbose)
-			ckpt_verbose("Exited %d\n", ret);
-		ckpt_dbg("task exited with status %d\n", ret);
-	}
-
-	if (mimic) {
-		if (sig) {
-			ckpt_dbg("mimic sig %d\n", sig);
-			signal(sig, SIG_DFL);  /* so kill() affects us */
-			kill(_getpid(), sig);
-		} else {
-			ckpt_dbg("mimic ret %d\n", ret);
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int ckpt_collect_child(struct ckpt_ctx *ctx)
-{
-	int mimic = ctx->args->copy_status;
-	int verbose = ctx->args->show_status;
-	int status;
-	pid_t pid;
-
-	/*
-	 * if sigchld_handler() collected the child by now, it set
-	 * @global_child_collected and then left the status inside
-	 * @global_child_status.
-	 */
-	while (!global_child_collected) {
-		pid = waitpid(global_child_pid, &status, __WALL);
-		if (pid == global_child_pid)
-			break;
-	}
-	/*
-	 * moreover, the child may have terminated right before the
-	 * call to waitpid()
-	 */
-	if (global_child_collected) {
-		status = global_child_status;
-	} else if (pid < 0) {
-		ckpt_perror("WEIRD: collect child task");
-		return -1;
-	}
-
-	return ckpt_parse_status(status, mimic, verbose);
-}
-
-static int ckpt_remount_devpts(struct ckpt_ctx *ctx)
-{
-	struct stat ptystat;
-
-	/* make sure /dev/ptmx is a link else we'll just break */
-	if (lstat("/dev/ptmx", &ptystat) < 0) {
-		ckpt_perror("stat /dev/ptmx");
-		return -1;
-	}
-	if ((ptystat.st_mode & S_IFMT) != S_IFLNK) {
-		ckpt_err("Error: /dev/ptmx must be a link to /dev/pts/ptmx\n");
-		return -1;
-	}
-
-	/* this is unlikely, but maybe we don't want to fail */
-	if (umount2("/dev/pts", MNT_DETACH) < 0) {
-		if (ckpt_cond_fail(ctx, CKPT_COND_MNTPTY)) {
-			ckpt_perror("umount -l /dev/pts");
-			return -1;
-		}
-		if (ckpt_cond_warn(ctx, CKPT_COND_MNTPTY))
-			ckpt_err("[warn] failed to un-mount old /dev/pts\n");
-	}
-	if (mount("pts", "/dev/pts", "devpts", 0,
-		  "ptmxmode=666,newinstance") < 0) {
-		ckpt_perror("mount -t devpts -o newinstance");
-		return -1;
-	}
-
-	return 0;
-}
-
-static int ckpt_close_files(void)
-{
-	char fdpath[64], *endp;
-        struct dirent *dent;
-        DIR *dirp;
-	int fd;
-
-	/*
-	 * Close all the open files reported in /proc/self/task/TID/fd.
-	 * If that file is unavailable, do it the traditional way...
-	 */
-        snprintf(fdpath, PATH_MAX, "/proc/self/task/%d/fd", _gettid());
-	dirp = opendir(fdpath);
-	if (dirp) {
-		while ((dent = readdir(dirp))) {
-			fd = strtol(dent->d_name, &endp, 10);
-			if (dent->d_name != endp && *endp == '\0')
-				close(fd);
-		}
-		closedir(dirp);
-	} else {
-		struct rlimit rlim;
-		getrlimit(RLIMIT_NOFILE, &rlim);
-		for (fd = 0; fd < rlim.rlim_max; fd++)
-			close(fd);
-	}
-
-	return 0;
-}
-
-static int ckpt_pretend_reaper(struct ckpt_ctx *ctx)
-{
-	int status;
-	pid_t pid;
-
-	while (1) {
-		pid = waitpid(-1, &status, __WALL);
-		if (pid < 0 && errno == ECHILD)
-			break;
-		if (!global_child_collected && pid == global_child_pid) {
-			global_child_collected = 1;
-			global_child_status = status;
-		}
-	}
-
-	return ckpt_parse_status(global_child_status, 1, 0);
-}
-
-static int ckpt_probe_child(pid_t pid, char *str)
-{
-	int status, ret;
-
-	/* use waitpid() to probe that a child is still alive */
-	ret = waitpid(pid, &status, WNOHANG);
-	if (ret == pid) {
-		report_exit_status(status, str, 0);
-		return -1;
-	} else if (ret < 0 && errno == ECHILD) {
-		ckpt_err("WEIRD: %s exited without trace (%s)\n",
-			 str, strerror(errno));
-		return -1;
-	} else if (ret != 0) {
-		ckpt_err("waitpid for %s (%s)", str, strerror(errno));
-		return -1;
-	}
-	return 0;
-}
-
-/*
- * Remount the /proc with a new instance: tasks that start a new
- * pid-ns need a fresh mount of /proc to reflect their pid-ns.
- */
-static int ckpt_remount_proc(struct ckpt_ctx *ctx)
-{
-	/* this is unlikely, but we don't want to fail */
-	if (umount2("/proc", MNT_DETACH) < 0) {
-		if (ckpt_cond_fail(ctx, CKPT_COND_MNTPROC)) {
-			ckpt_perror("umount -l /proc");
-			return -1;
-		}
-		if (ckpt_cond_warn(ctx, CKPT_COND_MNTPROC))
-			ckpt_err("[warn] failed to un-mount old /proc\n");
-	}
-	if (mount("proc", "/proc", "proc", 0, NULL) < 0) {
-		ckpt_perror("mount -t proc");
-		return -1;
-	}
-
-	return 0;
-}
-
-#ifdef CLONE_NEWPID
-static int __ckpt_coordinator(void *arg)
-{
-	struct ckpt_ctx *ctx = (struct ckpt_ctx *) arg;
-
-	/* chroot ? */
-	if (ctx->args->root && chroot(ctx->args->root) < 0) {
-		ckpt_perror("chroot");
-		_exit(1);
-	}
-	/* tasks with new pid-ns need new /proc mount */
-	if (ckpt_remount_proc(ctx) < 0)
-		_exit(1);
-	/* remount /dev/pts ? */
-	if (ctx->args->mnt_pty && ckpt_remount_devpts(ctx) < 0)
-		_exit(1);
-
-	if (!ctx->args->wait)
-		close(ctx->pipe_coord[0]);
-
-	return ckpt_coordinator(ctx);
-}
-
-static int ckpt_coordinator_status(struct ckpt_ctx *ctx)
-{
-	int status = -1;
-	int ret;
-
-	close(ctx->pipe_coord[1]);
-
-	ret = read(ctx->pipe_coord[0], &status, sizeof(status));
-	if (ret < 0)
-		ckpt_perror("read coordinator status");
-	else if (ret == 0) {
-		/* coordinator failed to report */
-		ckpt_dbg("Coordinator failed to report status.");
-	} else
-		status = 0;
-
-	close(ctx->pipe_coord[0]);
-	return status;
-}
-
-static int ckpt_coordinator_pidns(struct ckpt_ctx *ctx)
-{
-	unsigned long flags;
-	pid_t coord_pid;
-	int copy, ret;
-	genstack stk;
-	void *sp;
-
-	ckpt_dbg("forking coordinator in new pidns\n");
-
-	/*
-	 * The coordinator report restart susccess/failure via pipe.
-	 * (It cannot use return value, because in the default case
-	 * of --wait --copy-status it is already used to report the
-	 * root-task's return value).
-	 */
-	if (pipe(ctx->pipe_coord) < 0) {
-		ckpt_perror("pipe");
-		return -1;
-	}
-
-	ret = -1;
-	stk = genstack_alloc(PTHREAD_STACK_MIN);
-	if (!stk) {
-		ckpt_perror("coordinator genstack_alloc");
-		goto out_close;
-	}
-	sp = genstack_sp(stk);
-
-	copy = ctx->args->copy_status;
-	ctx->args->copy_status = 1;
-
-	/* in new pidns, we need these: */
-	flags = SIGCHLD | CLONE_NEWPID | CLONE_NEWNS;
-
-	coord_pid = clone(__ckpt_coordinator, sp, flags, ctx);
-	genstack_release(stk);
-	if (coord_pid < 0) {
-		ckpt_perror("clone coordinator");
-		goto out_close;
-	}
-	global_child_pid = coord_pid;
-
-	/* catch SIGCHLD to detect errors in coordinator */
-	signal(SIGCHLD, sigchld_handler);
-	/* catch SIGINT to propagate ctrl-c to the coordinator */
-	signal(SIGINT, sigint_handler);
-
-	/*
-	 * The child (coordinator) may have already exited before the
-	 * signal handler was plugged; verify that it's still there.
-	 */
-	if (ckpt_probe_child(coord_pid, "coordinator") < 0)
-		goto out_close;
-
-	ctx->args->copy_status = copy;
-
-	ret = ckpt_coordinator_status(ctx);
-
-	if (ret == 0 && ctx->args->wait)
-		ret = ckpt_collect_child(ctx);
-
-out_close:
-	if (ret < 0) {
-		close(ctx->pipe_coord[0]);
-		close(ctx->pipe_coord[1]);
-	}
-	return ret;
-}
-#else /* CLONE_NEWPID */
-static int ckpt_coordinator_pidns(struct ckpt_ctx *ctx)
-{
-	ckpt_err("logical error: ckpt_coordinator_pidns unexpected\n");
-	return -1;
-}
-#endif /* CLONE_NEWPID */
-
-static int ckpt_coordinator(struct ckpt_ctx *ctx)
-{
-	unsigned long flags = 0;
-	pid_t root_pid;
-	int ret;
-
-	root_pid = ckpt_fork_child(ctx, &ctx->tasks_arr[0]);
-	if (root_pid < 0)
-		return -1;
-	global_child_pid = root_pid;
-
-	/* catch SIGCHLD to detect errors during hierarchy creation */
-	signal(SIGCHLD, sigchld_handler);
-	/* catch SIGINT to propagate ctrl-c to the restarted tasks */
-	signal(SIGINT, sigint_handler);
-
-	/*
-	 * The child (root_task) may have already exited before the
-	 * signal handler was plugged; verify that it's still there.
-	 */
-	if (ckpt_probe_child(root_pid, "root task") < 0)
-		return -1;
-
-	if (ctx->args->keep_frozen)
-		flags |= RESTART_FROZEN;
-	if (ctx->args->keep_lsm)
-		flags |= RESTART_KEEP_LSM;
-
-	ret = restart(root_pid, STDIN_FILENO, flags, ctx->args->klogfd);
-
-	if (ret < 0) {
-		ckpt_perror("restart failed");
-		ckpt_verbose("Failed\n");
-		ckpt_dbg("restart failed ?\n");
-		return -1;
-	}
-
-	ckpt_verbose("Success\n");
-	ckpt_dbg("restart succeeded\n");
-
-	ret = 0;
-
-	if (ctx->args->pidns && ctx->tasks_arr[0].pid != 1) {
-		/* Report success/failure to the parent */
-		if (write(ctx->pipe_coord[1], &ret, sizeof(ret)) < 0) {
-			ckpt_perror("failed to report status");
-			return -1;
-		}
-
-		/*
-		 * Close all open files to eliminate dependencies on
-		 * the outside of the container. Else, a subsequent
-		 * container-checkpoint will fail due to leaks. (Skip
-		 * when debugging to keep output fro us visible).
-		 */
-		if (!global_debug) {
-			ckpt_close_files();
-			global_verbose = 0;
-		}
-
-		/*
-		 * If root task isn't container init, we must stay
-		 * around and be reaper until all tasks are gone.
-		 * Otherwise, container will die as soon as we exit.
-		 */
-		ret = ckpt_pretend_reaper(ctx);
-	} else if (ctx->args->wait) {
-		ret = ckpt_collect_child(ctx);
-	}
-
-	if (ret < 0)
-		ckpt_dbg("c/r failed ?\n");
-	else
-		ckpt_dbg("c/r succeeded\n");
-
-	return ret;
-}
-
-static inline struct task *ckpt_init_task(struct ckpt_ctx *ctx)
-{
-	return (&ctx->tasks_arr[0]);
-}
-
-/*
- * ckpt_build_tree - build the task tree data structure which provides
- * the "instructions" to re-create the task tree
- */
-static int ckpt_build_tree(struct ckpt_ctx *ctx)
-{
-	struct task *task;
-	int i;
-
-	/*
-	 * Allow for additional tasks to be added on demand for
-	 * referenced pids of dead tasks (each task can introduce at
-	 * most two: session and process group IDs), as well as for
-	 * placeholder tasks (each session id may have at most one)
-	 */
-	ctx->tasks_max = ctx->pids_nr * 4;
-	ctx->tasks_arr = malloc(sizeof(*ctx->tasks_arr) * ctx->tasks_max);
-	if (!ctx->tasks_arr) {
-		ckpt_perror("malloc tasks array");
-		return -1;
-	}
-
-	/* initialize tree */
-	if (ckpt_init_tree(ctx) < 0) {
-		free(ctx->tasks_arr);
-		ctx->tasks_arr = NULL;
-		return -1;
-	}
-
-	/* assign a creator to each task */
-	for (i = 0; i < ctx->tasks_nr; i++) {
-		task = &ctx->tasks_arr[i];
-		if (task->creator)
-			continue;
-		if (ckpt_set_creator(ctx, task) < 0) {
-			free(ctx->tasks_arr);
-			ctx->tasks_arr = NULL;
-			return -1;
-		}
-	}
-
-#ifdef CHECKPOINT_DEBUG
-	ckpt_dbg("====== TASKS\n");
-	for (i = 0; i < ctx->tasks_nr; i++) {
-		task = &ctx->tasks_arr[i];
-		ckpt_dbg("\t[%d] pid %d ppid %d sid %d creator %d",
-			 i, task->pid, task->ppid, task->sid,
-			 task->creator->pid);
-		if (task->next_sib)
-			ckpt_dbg_cont(" next %d", task->next_sib->pid);
-		if (task->prev_sib)
-			ckpt_dbg_cont(" prev %d", task->prev_sib->pid);
-		if (task->phantom)
-			ckpt_dbg_cont(" placeholder %d", task->phantom->pid);
-		ckpt_dbg_cont(" %c%c%c%c%c%c",
-		       (task->flags & TASK_THREAD) ? 'T' : ' ',
-		       (task->flags & TASK_SIBLING) ? 'P' : ' ',
-		       (task->flags & TASK_SESSION) ? 'S' : ' ',
-		       (task->flags & TASK_NEWPID) ? 'N' : ' ',
-		       (task->flags & TASK_GHOST) ? 'G' : ' ',
-		       (task->flags & TASK_DEAD) ? 'D' : ' ');
-		ckpt_dbg_cont("\n");
-	}
-	ckpt_dbg("............\n");
-#endif
-
-	return 0;
-}		
-
-static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid, pid_t ppid)
-{
-	struct task *task;
-
-	if (pid == 0)  /* ignore if outside namespace */
-		return 0;
-
-	if (hash_lookup(ctx, pid))  /* already handled */
-		return 0;
-
-	task = &ctx->tasks_arr[ctx->tasks_nr++];
-
-	task->flags = TASK_GHOST;
-
-	task->pid = pid;
-	task->ppid = ppid;
-	task->tgid = pid;
-	task->sid = ppid;
-
-	task->children = NULL;
-	task->next_sib = NULL;
-	task->prev_sib = NULL;
-	task->creator = NULL;
-	task->phantom = NULL;
-
-	task->rpid = -1;
-	task->ctx = ctx;
-
-	if (hash_insert(ctx, pid, task) < 0)
-		return -1;
-
-	/* remember the max pid seen */
-	if (task->pid > ctx->tasks_pid)
-		ctx->tasks_pid = task->pid;
-
-	return 0;
-}
-
-static int ckpt_valid_pid(struct ckpt_ctx *ctx, pid_t pid, char *which, int i)
-{
-	if (pid < 0) {
-		ckpt_err("Invalid %s %d (for task#%d)\n", which, pid, i);
-		return 0;
-	}
-	if (!ctx->args->pidns && pid == 0) {
-		if (ckpt_cond_fail(ctx, CKPT_COND_PIDZERO)) {
-			ckpt_err("[err] task # %d with %s zero"
-				 " (requires --pidns)\n", i + 1, which);
-			return 0;
-		} else if (ckpt_cond_warn(ctx, CKPT_COND_PIDZERO)) {
-			ckpt_err("[warn] task # %d with %s zero"
-				 " (consider --pidns)\n", i + 1, which);
-		}
-	}
-	return 1;
-}
-
-static int ckpt_alloc_pid(struct ckpt_ctx *ctx)
-{
-	int n = 0;
-
-	/*
-	 * allocate an unused pid for the placeholder
-	 * (this will become inefficient if pid-space is exhausted)
-	 */
-	do {
-		if (ctx->tasks_pid == INT_MAX)
-			ctx->tasks_pid = 2;
-		else
-			ctx->tasks_pid++;
-
-		if (n++ == INT_MAX) {	/* ohhh... */
-			ckpt_err("pid namsepace exhausted");
-			return -1;
-		}
-	} while (hash_lookup(ctx, ctx->tasks_pid));
-
-	return ctx->tasks_pid;
-}
-
-static int ckpt_zero_pid(struct ckpt_ctx *ctx)
-{
-	pid_t pid;
-
-	pid = ckpt_alloc_pid(ctx);
-	if (pid < 0)
-		return -1;
-	if (ckpt_setup_task(ctx, pid, ctx->pids_arr[0].vpid) < 0)
-		return -1;
-	return pid;
-}
-
-static int ckpt_init_tree(struct ckpt_ctx *ctx)
-{
-	struct ckpt_pids *pids_arr = ctx->pids_arr;
-	int pids_nr = ctx->pids_nr;
-	struct task *task;
-	pid_t root_pid;
-	pid_t root_sid;
-	pid_t zero_pid = 0;
-	int i;
-
-	root_pid = pids_arr[0].vpid;
-	root_sid = pids_arr[0].vsid;
-
-	/*
-	 * The case where root_sid != root_pid is special. It must be
-	 * from a subtree checkpoint (in container, root_sid is either
-	 * same as root_pid or 0), and root_sid was inherited from an
-	 * ancestor of that subtree.
-	 *
-	 * If we restart with --pidns, make the root-task also inherit
-	 * sid from its ancestor (== coordinator), whatever 'restart'
-	 * task currently has.  For that, we force the root-task's sid
-	 * and all references to it from other tasks (via sid and
-	 * pgid), to 0. Later, the feeder will substitute the
-	 * cooridnator's sid for them.
-	 *
-	 * (Note that this still works even if the coordinator's sid
-	 * is "used" by a restarting task: a new-pidns restart will
-	 * fail because the pid is in use, and in an old-pidns restart
-	 * the task will be assigned a new pid anyway).
-	 *
-	 * If we restart with --no-pidns, we'll add a ghost task below
-	 * whose pid will be used instead of these zeroed entried.
-	 */
-
-	/* forcing root_sid to -1, will make comparisons below fail */
-	if (root_sid == root_pid)
-		root_sid = -1;
-
-	/* populate with known tasks */
-	for (i = 0; i < pids_nr; i++) {
-		task = &ctx->tasks_arr[i];
-
-		task->flags = 0;
-
-		if (!ckpt_valid_pid(ctx, pids_arr[i].vpid, "pid", i))
-			return -1;
-		else if (!ckpt_valid_pid(ctx, pids_arr[i].vtgid, "tgid", i))
-			return -1;
-		else if (!ckpt_valid_pid(ctx, pids_arr[i].vsid, "sid", i))
-			return -1;
-		else if (!ckpt_valid_pid(ctx, pids_arr[i].vpgid, "pgid", i))
-			return -1;
-
-		if (pids_arr[i].vsid == root_sid)
-			pids_arr[i].vsid = 0;
-		if (pids_arr[i].vpgid == root_sid)
-			pids_arr[i].vpgid = 0;
-
-		task->pid = pids_arr[i].vpid;
-		task->ppid = pids_arr[i].vppid;
-		task->tgid = pids_arr[i].vtgid;
-		task->sid = pids_arr[i].vsid;
-
-		task->children = NULL;
-		task->next_sib = NULL;
-		task->prev_sib = NULL;
-		task->creator = NULL;
-		task->phantom = NULL;
-
-		task->rpid = -1;
-		task->ctx = ctx;
-
-		if (hash_insert(ctx, task->pid, task) < 0)
-			return -1;
-	}
-
-	ctx->tasks_nr = pids_nr;
-
-	/* add pids unaccounted for (no tasks) */
-	for (i = 0; i < pids_nr; i++) {
-		pid_t sid;
-
-		sid = pids_arr[i].vsid;
-
-		/* Remember if we find any vsid/vpgid - see below */
-		if (pids_arr[i].vsid == 0 || pids_arr[i].vpgid == 0)
-			zero_pid = 1;
-		/*
-		 * An unaccounted-for sid belongs to a task that was a
-		 * session leader and died. We can safe set its parent
-		 * (and creator) to be the root task.
-		 */
-		if (ckpt_setup_task(ctx, sid, root_pid) < 0)
-			return -1;
-
-		/*
-		 * An sid == 0 means that the session was inherited an
-		 * ancestor of root_task, and more specifically, via
-		 * root_task itself: make root_task our parent.
-		 */
-		if (sid == 0)
-			sid = root_pid;
-
-		/*
-		 * If a pid belongs to a dead thread group leader, we
-		 * need to add it with the same sid as current (and
-		 * other) threads.
-		 */
-		if (ckpt_setup_task(ctx, pids_arr[i].vtgid, sid) < 0)
-			return -1;
-
-		/*
-		 * If pgrp == sid, then the pgrp/sid will already have
-		 * been hashed by now (e.g. by the call above) and the
-		 * ckpt_setup_task() will return promptly.
-		 * If pgrp != sid, then the pgrp 'owner' must have the
-		 * same sid as us: all tasks with same pgrp must have
-		 * their sid matching.
-		 */
-		if (ckpt_setup_task(ctx, pids_arr[i].vpgid, sid) < 0)
-			return -1;
-	}
-
-	/*
-	 * Zero sid/pgid is disallowed in --no-pidns mode. If there
-	 * were any, we invent a new ghost-zero task and substitute
-	 * its pid for those any sid/pgid.
-	 */
-	if (zero_pid && !ctx->args->pidns) {
-		zero_pid = ckpt_zero_pid(ctx);
-		if (zero_pid < 0)
-			return -1;
-		for (i = 0; i < pids_nr; i++) {
-			if (pids_arr[i].vsid == 0) {
-				pids_arr[i].vsid = zero_pid;
-				pids_arr[i].vppid = zero_pid;
-			}
-			if (pids_arr[i].vpgid == 0) {
-				pids_arr[i].vpgid = zero_pid;
-				pids_arr[i].vppid = zero_pid;
-			}
-		}
-	}
-
-	/* mark root task(s), and set its "creator" to be zero_task */
-	ckpt_init_task(ctx)->flags |= TASK_ROOT;
-	ckpt_init_task(ctx)->creator = &zero_task;
-
-	ckpt_dbg("total tasks (including ghosts): %d\n", ctx->tasks_nr);
-	return 0;
-}
-
-/*
- * Algorithm DumpForest
- * "Transparent Checkpoint/Restart of Multiple Processes on Commodity
- * Operating Systems" in USENIX 2007
- * http://www.usenix.org/events/usenix07/tech/full_papers/laadan/laadan_html/paper.html
- *
- * The algorithm captures the state of the task forest. It considers
- * all pid values, even of dead tasks (appearing in sid/pgid of other
- * tasks). The input is a table with all pids; the output is a tree
- * structure imposed on that table. The goal of is to determine the
- * creating parent (creator) of each task. At restart, the init task
- * will recursively create the remaining tasks as instructed by the
- * table.
- *
- * Each entry in the table consists of the following set of fields:
- * flags, pid, tgid, sid, pgid, and pointers to the a creator, next
- * and previous sibling, and first child task. Note that the creator
- * may not necessarily correspond to the parent. The possible flags
- * are TASK_ROOT, TASK_GHOST, TASK_THREAD, TASK_SIBLING (that asks to
- * inherit the parent via CLONE_PARENT), TASK_SESSION (that asks to
- * inherit a session id), TASK_NEWPID (that asks to start a new pid
- * namespace), and TASK_DEAD. The algorithm loops through all the
- * entries in the table:
- *
- * If the entry is a thread and not the thread group leader, we set
- * the creator to be the thread group leader and set TASK_THREAD.
- *
- * Otherwise, if the entry is a session leader, it must have called
- * setsid(), and does not need to inherit its session. The creator is
- * set to its real parent.
- *
- * Otherwise, if the entry is a dead task (no current task exists
- * with the given pid), the only constraint is that it inherit the
- * correct session id. The session leader is set as its creator.
- *
- * Otherwise, if the entry is an orphan task, it cannot inherit the
- * correct session id from init. We add a placeholder task in the
- * table whose function on restart is to inherit the session id from
- * the session leader, create the task, then terminate so that the
- * task will be orphaned. The placeholder is given an arbitrary pid
- * not already in the table, and the sid identifying the session, and
- * is marked TASK_DEAD.
- *
- * Otherwise, if the entry's sid is equal to its parent's, the only
- * constraint is that it inherit the correct session id from its
- * parent. This is simply done by setting its parent as its creator.
- *
- * Otherwise, the entry corresponds to a task which is not a session
- * leader, does not share the session id with its parent, and hence
- * whose session id must be inherited from an ancestor further up the
- * tree forest. The task was forked by its parent before the parent
- * changed its own sid. Its creator is set to be its parent, and it is
- * marked TASK_SESSION. This flag is propagated up its ancestry until
- * reaching an entry with that session id. This ensures that the sid
- * correctly descend via inheritance to the current entry.
- *
- * If the traversal fails to find an entry with the same sid, it will
- * stop at an entry of a leader of another session. This entry must
- * have formerly been a descendant of the original session leader.
- * Its creator will have already been set init.  Because we now know
- * that it needs to pass the original sid to its own descendants, we
- * re-parent the entry to become a descendant of the original session
- * leader.  This is done using a placeholder in a manner similar to
- * how we handle orphans that are not session leaders.
- */
-static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task)
-{
-	struct task *session = hash_lookup(ctx, task->sid);
-	struct task *parent = hash_lookup(ctx, task->ppid);
-	struct task *creator;
-
-	if (task == ckpt_init_task(ctx)) {
-		ckpt_err("pid %d: logical error\n", ckpt_init_task(ctx)->pid);
-		return -1;
-	}
-
-	/* sid == 0 must have been inherited from outside the container */
-	if (task->sid == 0)
-		session = ckpt_init_task(ctx);
-
-	if (task->tgid != task->pid) {
-		/* thread: creator is thread-group-leader */
-		ckpt_dbg("pid %d: thread tgid %d\n", task->pid, task->tgid);
-		creator = hash_lookup(ctx, task->tgid);
-		if (!creator) {
-			/* oops... thread group leader MIA */
-			ckpt_err("pid %d: no leader %d\n", task->pid, task->tgid);
-			return -1;
-		}
-		task->flags |= TASK_THREAD;
-	} else if (task->ppid == 0 || !parent) {
-		/* only root_task can have ppid == 0, parent must always exist */
-		ckpt_err("pid %d: invalid ppid %d\n", task->pid, task->ppid);
-		return -1;
-	} else if (task->pid == task->sid) {
-		/* session leader: creator is parent */
-		ckpt_dbg("pid %d: session leader\n", task->pid);
-		creator = parent;
-	} else if (task->flags & TASK_DEAD) {
-		/* dead: creator is session leader */
-		ckpt_dbg("pid %d: task is dead\n", task->pid);
-		creator = session;
-	} else if (task->sid == parent->sid) {
-		/* (non-session-leader) inherit: creator is parent */
-		ckpt_dbg("pid %d: inherit sid %d\n", task->pid, task->sid);
-		creator = parent;
-	} else if (task->ppid == 1) {
-		/* (non-session-leader) orphan: creator is dummy */
-		ckpt_dbg("pid %d: orphan session %d\n", task->pid, task->sid);
-		if (!session->phantom)
-			if (ckpt_placeholder_task(ctx, task) < 0)
-				return -1;
-		creator = session->phantom;
-	} else {
-		/* first make sure we know the session's creator */
-		if (!session->creator) {
-			/* (non-session-leader) recursive: session's creator */
-			ckpt_dbg("pid %d: recursive session creator %d\n",
-			       task->pid, task->sid);
-			if (ckpt_set_creator(ctx, session) < 0)
-				return -1;
-		}
-		/* then use it to decide what to do */
-		if (session->creator->pid == task->ppid) {
-			/* init must not be sibling creator (CLONE_PARENT) */
-			if (session == ckpt_init_task(ctx)) {
-				ckpt_err("pid %d: sibling session prohibited"
-				       " with init as creator\n", task->pid);
-				return -1;
-			}
-			/* (non-session-leader) sibling: creator is sibling */
-			ckpt_dbg("pid %d: sibling session %d\n",
-			       task->pid, task->sid);
-			creator = session;
-			task->flags |= TASK_SIBLING;
-		} else {
-			/* (non-session-leader) session: fork before setsid */
-			ckpt_dbg("pid %d: propagate session %d\n",
-			       task->pid, task->sid);
-			creator = parent;
-			task->flags |= TASK_SESSION;
-		}
-	}
-
-	if (creator->children) {
-		struct task *next = creator->children;
-
-		task->next_sib = next;
-		next->prev_sib = task;
-	}
-
-	ckpt_dbg("pid %d: creator set to %d\n", task->pid, creator->pid);
-	task->creator = creator;
-	creator->children = task;
-
-	if (task->flags & TASK_SESSION)
-		if (ckpt_propagate_session(ctx, task) < 0)
-			return -1;
-
-	return 0;
-}
-
-static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task)
-{
-	struct task *session = hash_lookup(ctx, task->sid);
-	struct task *holder = &ctx->tasks_arr[ctx->tasks_nr++];
-	pid_t pid;
-
-	if (ctx->tasks_nr > ctx->tasks_max) {
-		/* shouldn't happen, beacuse we prepared enough */
-		ckpt_err("out of space in task table !");
-		return -1;
-	}
-
-	pid = ckpt_alloc_pid(ctx);
-	if (pid < 0)
-		return -1;
-
-	holder->flags = TASK_DEAD;
-
-	holder->pid = pid;
-	holder->ppid = ckpt_init_task(ctx)->pid;
-	holder->tgid = pid;
-	holder->sid = task->sid;
-
-	holder->children = NULL;
-	holder->next_sib = NULL;
-	holder->prev_sib = NULL;
-	holder->creator = NULL;
-	holder->phantom = NULL;
-
-	holder->rpid = -1;
-	holder->ctx = ctx;
-
-	holder->creator = session;
-	if (session->children) {
-		holder->next_sib = session->children;
-		session->children->prev_sib = holder;
-	}
-	session->children = holder;
-	session->phantom = holder;
-
-	/* reparent entry if necssary */
-	if (task->next_sib)
-		task->next_sib->prev_sib = task->prev_sib;
-	if (task->prev_sib)
-		task->prev_sib->next_sib = task->next_sib;
-	if (task->creator)
-		task->creator->children = task->next_sib;
-
-	task->creator = holder;
-	task->next_sib = NULL;
-	task->prev_sib = NULL;
-
-	return 0;
-}
-
-static int ckpt_propagate_session(struct ckpt_ctx *ctx, struct task *task)
-{
-	struct task *session = hash_lookup(ctx, task->sid);
-	struct task *creator;
-	pid_t sid = task->sid;
-
-	do {
-		ckpt_dbg("pid %d: set session\n", task->pid);
-		task->flags |= TASK_SESSION;
-
-		creator = task->creator;
-		if (creator->pid == 1) {
-			if (ckpt_placeholder_task(ctx, task) < 0)
-				return -1;
-		}
-
-		ckpt_dbg("pid %d: moving up to %d\n", task->pid, creator->pid);
-		task = creator;
-
-		if(!task->creator) {
-			if (ckpt_set_creator(ctx, task) < 0)
-				return -1;
-		}
-	} while (task->sid != sid &&
-		 task != ckpt_init_task(ctx) &&
-		 !(task->flags & TASK_SESSION) &&
-		 task->creator != session);
-
-	return 0;
-}
-
-/*
- * Algorithm MakeForest
- * "Transparent Checkpoint/Restart of Multiple Processes on Commodity
- * Operating Systems" in USENIX 2007
- * http://www.usenix.org/events/usenix07/tech/full_papers/laadan/laadan_html/paper.html
- *
- * The algorithm reconstructs the task hierarchy and relationships.
- * It works in a recursive manner by following the instructions set
- * forth by the task forest data structure. It begins with a single
- * init task, that will fork the tasks that have init set as their
- * creator. Each task then creates its own children.
- *
- * The algorithm loops through the list of children of a task three
- * times, during which the children are forked or cleaned up.  Each
- * child that is forked executes the same algorithm recursively until
- * all tasks have been created.
- *
- * In the first pass the current task spawns children that are marked
- * with TASK_SESSION and thereby need to be forked before the current
- * session id is changed.  The tasks then changes its session id if
- * needed. In the second pass the task forks the remainder of the
- * children. In both passes, a child that is marked TASK_THREAD is
- * created as a thread and a child that is marked TASK_SIBLING is
- * created with parent inheritance. Finally, the task will invoke
- * sys_restart() which does not return (if successful).
- *
- * Tasks marked TASK_DEAD or TASK_GHOST are both destined to terminate
- * anyway; both use flag RESTART_GHOST for sys_restart(), which will
- * result in a call to do_exit().
- */
-static int ckpt_make_tree(struct ckpt_ctx *ctx, struct task *task)
-{
-	struct task *child;
-	struct pid_swap swap;
-	unsigned long flags = 0;
-	pid_t newpid;
-	int ret;
-
-	ckpt_dbg("pid %d: pid %d sid %d parent %d\n",
-	       task->pid, _gettid(), getsid(0), getppid());
-
-	/* 1st pass: fork children that inherit our old session-id */
-	for (child = task->children; child; child = child->next_sib) {
-		if (child->flags & TASK_SESSION) {
-			ckpt_dbg("pid %d: fork child %d with session\n",
-			       task->pid, child->pid);
-			newpid = ckpt_fork_child(ctx, child);
-			if (newpid < 0)
-				return -1;
-			child->rpid = newpid;
-		}
-	}
-
-	/* change session id, if necessary */
-	if (task->pid == task->sid) {
-		ret = setsid();
-		if (ret < 0 && task != ckpt_init_task(ctx)) {
-			ckpt_perror("setsid");
-			return -1;
-		}
-	}
-
-	/* 2st pass: fork children that inherit our new session-id */
-	for (child = task->children; child; child = child->next_sib) {
-		if (!(child->flags & TASK_SESSION)) {
-			ckpt_dbg("pid %d: fork child %d without session\n",
-			       task->pid, child->pid);
-			newpid = ckpt_fork_child(ctx, child);
-			if (newpid < 0)
-				return -1;
-			child->rpid = newpid;
-		}
-	}
-	
-	/*
-	 * In '--no-pidns' (and not '--pids') mode we restart with
-	 * pids different than originals, so report old/new pids via
-	 * pipe to the feeder.
-	 *
-	 * However, even with '--pids', coordinator needs to create
-	 * entire hierarchy before (in kernel) reading the image.
-	 * Always reporting pids ensures that feeder only feeds data
-	 * (and coodinator reads data), the when hierarchy is ready.
-	 */
-
-	/* communicate via pipe that all is well */
-	swap.old = task->pid;
-	swap.new = _gettid();
-	ret = write(ctx->pipe_out, &swap, sizeof(swap));
-	if (ret != sizeof(swap)) {
-		ckpt_perror("write swap");
-		return -1;
-	}
-	close(ctx->pipe_out);
-
-	/*
-	 * At this point restart may have already begun in the kernel.
-	 * We shouldn't be doing much until sys_restart() below. With
-	 * threads, sys_restart() ensures that all members of a thread
-	 * group are ready before restoring any of them.
-	 */
-
-	/*
-	 * Ghost tasks are not restarted and end up dead, but their
-	 * pids are referred to by other tasks' pgids (also sids, that
-	 * are already properly set by now). Therefore, they stick
-	 * around until those tasks actually restore their pgrp, and
-	 * then exit (more precisely, killed). The RESTART_GHOST flag
-	 * tells the kernel that they are not to be restored.
-	 */
-	if (task->flags & (TASK_GHOST | TASK_DEAD))
-		flags |= RESTART_GHOST;
-
-	/* on success this doesn't return */
-	ckpt_dbg("about to call sys_restart(), flags %#lx\n", flags);
-	ret = restart(0, STDIN_FILENO, flags, CHECKPOINT_FD_NONE);
-	if (ret < 0)
-		ckpt_perror("task restore failed");
-	return ret;
-}
-
-int ckpt_fork_stub(void *data)
-{
-	struct task *task = (struct task *) data;
-	struct ckpt_ctx *ctx = task->ctx;
-
-	/* chroot ? */
-	if ((task->flags & TASK_NEWROOT) && chroot(ctx->args->root) < 0)
-		return -1;
-	/* tasks with new pid-ns need new /proc mount */
-	if ((task->flags & TASK_NEWPID) && ckpt_remount_proc(ctx) < 0)
-		return -1;
-	/* remount /dev/pts ? */
-	if ((task->flags & TASK_NEWPTS) && ckpt_remount_devpts(ctx) < 0)
-		return -1;
-
-	/*
-	 * In restart into a new pid namespace (--pidns), coordinator
-	 * is the container init, hence if it terminated permatutely
-	 * then the task hierarchy will be cleaned up automagically.
-	 *
-	 * In restart into existing namespace (--no-pidns) we ensure
-	 * proper cleanup of the new hierarchy in case of coordinator
-	 * death by asking to be killed then. When restart succeeds,
-	 * it will have replaced this with the original value.
-	 *
-	 * This works because in the --no-pids case, the hierarchy of
-	 * tasks does not contain zombies (else, there must also be a
-	 * container init, whose pid (==1) is clearly already taken).
-	 *
-	 * Thus, if a the parent of this task dies before this prctl()
-	 * call, it suffices to test getppid() == task->parent_pid.
-	 */
-	if (!ctx->args->pidns) {
-		if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) < 0) {
-			ckpt_perror("prctl");
-			return -1;
-		}
-		if (getppid() != task->real_parent) {
-			ckpt_err("[%d]: parent is MIA (%d != %d)\n",
-				 _getpid(), getppid(), task->real_parent);
-			return -1;
-		}
-	}
-
-	/* if user requested freeze at end - add ourself to cgroup */
-	if (ctx->args->freezer && freezer_register(ctx, _getpid())) {
-		ckpt_err("[%d]: failed add to freezer cgroup\n", _getpid());
-		return -1;
-	}
-
-	/* root has some extra work */
-	if (task->flags & TASK_ROOT) {
-		ctx->root_pid = _getpid();
-		ckpt_dbg("root task pid %d\n", _getpid());
-	}
-
-	return ckpt_make_tree(ctx, task);
-}
-
-static pid_t ckpt_fork_child(struct ckpt_ctx *ctx, struct task *child)
-{
-	struct clone_args clone_args;
-	genstack stk;
-	unsigned long flags = SIGCHLD;
-	pid_t pid = 0;
-	pid_t *pids = &pid;
-	int i, j, depth;
-
-	ckpt_dbg("forking child vpid %d flags %#x\n", child->pid, child->flags);
-
-	stk = genstack_alloc(PTHREAD_STACK_MIN);
-	if (!stk) {
-		ckpt_perror("ckpt_fork_child genstack_alloc");
-		return -1;
-	}
-
-	if (child->flags & TASK_THREAD)
-		flags |= CLONE_THREAD | CLONE_SIGHAND | CLONE_VM;
-	else if (child->flags & TASK_SIBLING)
-		flags |= CLONE_PARENT;
-
-	memset(&clone_args, 0, sizeof(clone_args));
-	clone_args.nr_pids = 1;
-	/* select pid if --pids, otherwise it's 0 */
-	if (ctx->args->pids) {
-		depth = child->piddepth + 1;
-		clone_args.nr_pids = depth;
-
-		pids = malloc(sizeof(pid_t) * depth);
-		if (!pids) {
-			perror("ckpt_fork_child pids malloc");
-			return -1;
-		}
-
-		memset(pids, 0, sizeof(pid_t) * depth);
-		pids[0] = child->pid;
-
-		for (i = child->piddepth - 1, j = 0; i >= 0; i--, j++)
-			pids[j + 1] = ctx->vpids_arr[child->vidx + j];
-
-#ifndef CLONE_NEWPID
-		if (child->piddepth > child->creator->piddepth) {
-			ckpt_err("nested pidns but CLONE_NEWPID undefined");
-			errno = -EINVAL;
-			return -1;
-		} else if (child->flags & TASK_NEWPID) {
-			ckpt_err("TASK_NEWPID set but CLONE_NEWPID undefined");
-			errno = -EINVAL;
-			return -1;
-		}
-#else /* CLONE_NEWPID */
-		if (child->piddepth > child->creator->piddepth) {
-			child->flags |= TASK_NEWPID;
-			flags |= CLONE_NEWPID;
-			clone_args.nr_pids--;
-		} else if (child->flags & TASK_NEWPID) {
-			/* The TASK_NEWPID could have been set for root task */
-			pids[0] = 0;
-			flags |= CLONE_NEWPID;
-		}
-		if (flags & CLONE_NEWPID && !ctx->args->pidns) {
-			ckpt_err("Must use --pidns for nested pidns container");
-			errno = -EINVAL;
-			return -1;
-		}
-#if 0
-		if (flags & CLONE_NEWPID)
-			clone_args.nr_pids--;
-#endif
-#endif /* CLONE_NEWPID */
-	}
-
-	if (child->flags & TASK_NEWNS)
-		flags |= CLONE_NEWNS;
-
-	if (child->flags & (TASK_SIBLING | TASK_THREAD))
-		child->real_parent = getppid();
-	else
-		child->real_parent = _getpid();
-
-	clone_args.child_stack = (unsigned long)genstack_base(stk);
-	clone_args.child_stack_size = genstack_size(stk);
-
-	ckpt_dbg("task %d forking with flags %lx numpids %d\n",
-		child->pid, flags, clone_args.nr_pids);
-	for (i = 0; i < clone_args.nr_pids; i++)
-		ckpt_dbg("task %d pid[%d]=%d\n", child->pid, i, pids[i]);
-
-	pid = eclone(ckpt_fork_stub, child, flags, &clone_args, pids);
-	if (pid < 0)
-		ckpt_perror("eclone");
-
-	if (pid < 0 || !(child->flags & TASK_THREAD))
-		genstack_release(stk);
-
-	if (pids != &pid)
-		free(pids);
-
-	ckpt_dbg("forked child vpid %d (asked %d)\n", pid, child->pid);
-	return pid;
-}
-
-/*
- * ckpt_fork_feeder: create the feeder process and set a pipe to deliver
- * the feeder's stdout to our stdin.
- *
- * Also setup another pipe through which new tasks will report their
- * old- and new-pid (see ckpt_adjust_pids). This was originally used
- * only for '--no-pids', but now also ensures that all restarting
- * tasks are created by the time coordinator calls restart(2).
- */
-static int ckpt_fork_feeder(struct ckpt_ctx *ctx)
-{
-	genstack stk;
-	pid_t pid;
-
-	if (pipe(ctx->pipe_feed)) {
-		ckpt_perror("pipe");
-		return -1;
-	}
-
-	if (pipe(ctx->pipe_child) < 0) {
-		ckpt_perror("pipe");
-		return -1;
-	}
-
-	/*
-	 * Use clone() without SIGCHLD so that the when the feeder
-	 * terminates it does not notify the parent (coordinator), as
-	 * this may interfere with the restart.
-	 */
-
-	stk = genstack_alloc(PTHREAD_STACK_MIN);
-	if (!stk) {
-		ckpt_perror("ckpt_fork_feeder genstack_alloc");
-		return -1;
-	}
-
-	pid = clone(ckpt_do_feeder, genstack_sp(stk),
-		    CLONE_THREAD | CLONE_SIGHAND | CLONE_VM, ctx);
-	if (pid < 0) {
-		ckpt_perror("feeder thread");
-		return -1;
-	}
-
-	/* children pipe */
-	close(ctx->pipe_child[0]);
-	ctx->pipe_out = ctx->pipe_child[1];
-	/* feeder pipe */
-	close(ctx->pipe_feed[1]);
-	if (ctx->pipe_feed[0] != STDIN_FILENO) {
-		dup2(ctx->pipe_feed[0], STDIN_FILENO);
-		close(ctx->pipe_feed[0]);
-	}
-
-	return 0;
-}
-
-static void ckpt_abort(struct ckpt_ctx *ctx, char *str)
-{
-	ckpt_perror(str);
-	kill(-(ctx->root_pid), SIGKILL);
-	exit(1);
-}
-
-/* read/write image data as is, blindly */
-static void ckpt_read_write_blind(struct ckpt_ctx *ctx)
-{
-	int ret;
-
-	while (1) {
-		ret = read(STDIN_FILENO, ctx->buf, BUFSIZE);
-		ckpt_dbg("c/r read input %d\n", ret);
-		if (ret == 0)
-			break;
-		if (ret < 0)
-			ckpt_abort(ctx, "read input");
-		ret = ckpt_write(STDOUT_FILENO, ctx->buf, ret);
-		if (ret < 0)
-			ckpt_abort(ctx, "write output");
-	}
-}
-
-/* read/write image data while inspecting it */
-static void ckpt_read_write_inspect(struct ckpt_ctx *ctx)
-{
-	struct ckpt_hdr h;
-	int len, ret;
-
-	while (1) {
-		ret = _ckpt_read(STDIN_FILENO, &h, sizeof(h));
-ckpt_dbg("ret %d len %d type %d\n", ret, h.len, h.type);
-		if (ret == 0)
-			break;
-		if (ret < 0)
-			ckpt_abort(ctx, "read input");
-		if (h.len < sizeof(h)) {
-			errno = EINVAL;
-			ckpt_abort(ctx, "invalid record");
-		}
-
-		ret = ckpt_write(STDOUT_FILENO, &h, sizeof(h));
-		if (ret < 0)
-			ckpt_abort(ctx, "write output");
-
-		h.len -= sizeof(h);
-		if (h.type == CKPT_HDR_ERROR) {
-			len = (h.len > BUFSIZE ? BUFSIZE : h.len);
-			ret = read(STDIN_FILENO, ctx->buf, len);
-			if (ret < 0)
-				ckpt_abort(ctx, "error record");
-			errno = EIO;
-			ctx->buf[len - 1] = '\0';
-			ckpt_abort(ctx, &ctx->buf[1]);
-		}
-		ckpt_dbg("c/r read input %d\n", h.len);
-
-		while (h.len) {
-			len = (h.len > BUFSIZE ? BUFSIZE : h.len);
-			ret = read(STDIN_FILENO, ctx->buf, len);
-			if (ret == 0)
-				ckpt_abort(ctx, "short record");
-			if (ret < 0)
-				ckpt_abort(ctx, "read input");
-
-			h.len -= ret;
-			ret = ckpt_write(STDOUT_FILENO, ctx->buf, ret);
-ckpt_dbg("write len %d (%d)\n", len, ret);
-			if (ret < 0)
-				ckpt_abort(ctx, "write output");
-		}
-	}
-}
-
-/*
- * feeder process: delegates checkpoint image stream to the kernel.
- * In '--no-pids' mode, transform the pids array (struct ckpt_pids)
- * on the fly and feed the result to the "init" task of the restart
- */
-static int ckpt_do_feeder(void *data)
-{
-	struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
-
-	/* children pipe */
-	close(ctx->pipe_child[1]);
-	ctx->pipe_in = ctx->pipe_child[0];
-	/* feeder pipe */
-	close(ctx->pipe_feed[0]);
-	if (ctx->pipe_feed[1] != STDOUT_FILENO) {
-		dup2(ctx->pipe_feed[1], STDOUT_FILENO);
-		close(ctx->pipe_feed[1]);
-	}
-
-	if (ckpt_adjust_pids(ctx) < 0)
-		ckpt_abort(ctx, "collect pids");
-
-	if (ckpt_write_header(ctx) < 0)
-		ckpt_abort(ctx, "write c/r header");
-
-	if (ckpt_write_header_arch(ctx) < 0)
-		ckpt_abort(ctx, "write c/r header arch");
-
-	if (ckpt_write_container(ctx) < 0)
-		ckpt_abort(ctx, "write container section");
-
-	if (ckpt_write_tree(ctx) < 0)
-		ckpt_abort(ctx, "write c/r tree");
-
-	if (ckpt_write_vpids(ctx) < 0)
-		ckpt_abort(ctx, "write vpids");
-
-	/* read rest -> write rest */
-	if (ctx->args->inspect)
-		ckpt_read_write_inspect(ctx);
-	else
-		ckpt_read_write_blind(ctx);
-
-	/* All is well: feeder thread is done.  However, we must
-	 * invoke the exit system call directly. Otherwise, upon
-	 * return from this function, glibc's clone wrapper will call
-	 * _exit, which calls exit_group, which will terminate the
-	 * whole process, which is not what we want.
-	 */
-	syscall(SYS_exit, 0);
-
-	/* not reached */
-	return 0;
-}
-
-/*
- * ckpt_adjust_pids: transform the pids array (struct ckpt_pids) by
- * substituing actual pid values for original pid values.
- *
- * Collect pids reported by the newly created tasks; each task sends
- * a 'struct pid_swap' indicating old- and new-pid. Then modify the
- * the pids array accordingly.
- */
-static int ckpt_adjust_pids(struct ckpt_ctx *ctx)
-{
-	struct pid_swap swap;
-	int n, m, len, ret;
-	pid_t coord_sid;
-
-	coord_sid = getsid(0);
-
-	/*
-	 * Make a copy of the original array to fix a nifty bug where
-	 * two tasks in original image with pids A, B, restart with
-	 * pids swapped (B and A), for instance -
-	 *    original pid array:    [][][A][][B][]...
-	 *    first, swap B with A:  [][][A][][A][]...
-	 *    then, swap A with B:   [][][B][][B][]...
-	 *    but correct should be: [][][B][][A][]...
-	 */
-
-	len = sizeof(struct ckpt_pids) * ctx->pids_nr;
-
-#ifdef CHECKPOINT_DEBUG
-	ckpt_dbg("====== PIDS ARRAY\n");
-	for (m = 0; m < ctx->pids_nr; m++) {
-		struct ckpt_pids *p;
-		p = &ctx->pids_arr[m];
-		ckpt_dbg("[%d] pid %d ppid %d sid %d pgid %d\n",
-			 m, p->vpid, p->vppid, p->vsid, p->vpgid);
-	}
-	ckpt_dbg("............\n");
-#endif
-
-	memcpy(ctx->copy_arr, ctx->pids_arr, len);
-
-	/* read in 'pid_swap' data and adjust ctx->pids_arr */
-	for (n = 0; n < ctx->tasks_nr; n++) {
-		/* get pid info from next task */
-		ret = read(ctx->pipe_in, &swap, sizeof(swap));
-		if (ret < 0)
-			ckpt_abort(ctx, "read pipe");
-
-		/* swapping isn't needed with '--pids' */
-		if (ctx->args->pids)
-			continue;
-
-		ckpt_dbg("c/r swap old %d new %d\n", swap.old, swap.new);
-		for (m = 0; m < ctx->pids_nr; m++) {
-			if (ctx->pids_arr[m].vpid == swap.old)
-				ctx->copy_arr[m].vpid = swap.new;
-			if (ctx->pids_arr[m].vtgid == swap.old)
-				ctx->copy_arr[m].vtgid = swap.new;
-			if (ctx->pids_arr[m].vsid == swap.old)
-				ctx->copy_arr[m].vsid = swap.new;
-			if (ctx->pids_arr[m].vpgid == swap.old)
-				ctx->copy_arr[m].vpgid = swap.new;
-		}
-	}
-
-	memcpy(ctx->pids_arr, ctx->copy_arr, len);
-
-#ifdef CHECKPOINT_DEBUG
-	if (!ctx->args->pids) {
-		ckpt_dbg("====== PIDS ARRAY (swaped)\n");
-		for (m = 0; m < ctx->pids_nr; m++) {
-			struct ckpt_pids *p;
-			p = &ctx->pids_arr[m];
-			ckpt_dbg("[%d] pid %d ppid %d sid %d pgid %d\n",
-				 m, p->vpid, p->vppid, p->vsid, p->vpgid);
-		}
-		ckpt_dbg("............\n");
-	}
-#endif
-
-	close(ctx->pipe_in);
-	return 0;
-}
-
-/*
- * low-level write
- *   ckpt_write - write 'count' bytes to 'buf'
- *   ckpt_write_obj - write object
- *   ckpt_write_obj_buffer - write buffer object
- */
-static int ckpt_write(int fd, void *buf, int count)
-{
-	ssize_t nwrite;
-	int nleft;
-
-	for (nleft = count; nleft; nleft -= nwrite) {
-		nwrite = write(fd, buf, nleft);
-		if (nwrite < 0 && errno == EAGAIN)
-			continue;
-		if (nwrite < 0)
-			return -1;
-		buf += nwrite;
-	}
-	return 0;
-}
-
-int ckpt_write_obj(struct ckpt_ctx *ctx, struct ckpt_hdr *h)
-{
-	return ckpt_write(STDOUT_FILENO, h, h->len);
-}
-
-int ckpt_write_obj_ptr(struct ckpt_ctx *ctx, void *buf, int n, int type)
-{
-	struct ckpt_hdr h;
-	int ret;
-
-	h.type = type;
-	h.len = n + sizeof(h);
-	ret = ckpt_write(STDOUT_FILENO, &h, sizeof(h));
-	if (!ret)
-		ret = ckpt_write(STDOUT_FILENO, buf, n);
-	return ret;
-}
-
-/*
- * low-level read
- *   _ckpt_read - read 'count' bytes to 'buf', or EOF
- *   ckpt_read - read 'count' bytes to 'buf' (EOF disallowed)
- *   ckpt_read_obj - read up to 'n' bytes of object into 'buf'
- *   ckpt_read_obj_type - read up to 'n' bytes of object type 'type' into 'buf'
- *   ckpt_read_obj_ptr - like ckpt_read_obj_type, but discards header
- */
-static int _ckpt_read(int fd, void *buf, int count)
-{
-	ssize_t nread;
-	int nleft;
-
-	for (nleft = count; nleft; nleft -= nread) {
-		nread = read(fd, buf, nleft);
-		if (nread < 0 && errno == EAGAIN)
-			continue;
-		if (nread == 0 && nleft == count)
-			return 0;
-		if (nread <= 0)
-			return -1;
-		buf += nread;
-	}
-	return count;
-}
-
-static int ckpt_read(int fd, void *buf, int count)
-{
-	int ret;
-
-	ret = _ckpt_read(fd, buf, count);
-	if (ret == 0 && count) {
-		errno = EINVAL;
-		ret = -1;
-	}
-	return (ret < 0 ? ret : 0);
-}
-
-static int ckpt_read_obj(struct ckpt_ctx *ctx,
-			 struct ckpt_hdr *h, void *buf, int n)
-{
-	int ret;
-
-	ret = ckpt_read(STDIN_FILENO, h, sizeof(*h));
-	if (ret < 0)
-		return ret;
-	if (h->len < sizeof(*h) || h->len > n) {
-		errno = EINVAL;
-		return -1;
-	}
-	if (h->len == sizeof(*h))
-		return 0;
-	return ckpt_read(STDIN_FILENO, buf, h->len - sizeof(*h));
-}
-
-static int ckpt_read_obj_type(struct ckpt_ctx *ctx, void *buf, int n, int type)
-{
-	struct ckpt_hdr *h = (struct ckpt_hdr *) buf;
-	int ret;
-
-	ret = ckpt_read_obj(ctx, h, (void *) (h + 1), n);
-	if (ret < 0)
-		return ret;
-	if (h->type != type) {
-		errno = EINVAL;
-		return -1;
-	}
-	return 0;
-}
-
-static int ckpt_read_obj_ptr(struct ckpt_ctx *ctx, void *buf, int n, int type)
-{
-	struct ckpt_hdr h;
-	int ret;
-
-	ret = ckpt_read_obj(ctx, &h, buf, n + sizeof(h));
-	if (ret < 0)
-		return ret;
-	if (h.type != type) {
-		errno = EINVAL;
-		return -1;
-	}
-	return 0;
-}
-
-static int ckpt_read_obj_buffer(struct ckpt_ctx *ctx, void *buf, int n)
-{
-	return ckpt_read_obj_type(ctx, buf, BUFSIZE, CKPT_HDR_BUFFER);
-}
-
-/*
- * read/write the checkpoint image: similar to in-kernel code
- */
-
-static int ckpt_read_header(struct ckpt_ctx *ctx)
-{
-	struct ckpt_hdr_header *h;
-	char *ptr;
-	int ret;
-
-	h = (struct ckpt_hdr_header *) ctx->header;
-	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_HEADER);
-	if (ret < 0)
-		return ret;
-
-	if (h->constants.uts_release_len > BUFSIZE / 4 ||
-	    h->constants.uts_version_len > BUFSIZE / 4 ||
-	    h->constants.uts_machine_len > BUFSIZE / 4) {
-		errno = EINVAL;
-		return -1;
-	}
-
-	ptr = (char *) h;
-
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_read_obj_buffer(ctx, ptr, h->constants.uts_release_len);
-	if (ret < 0)
-		return ret;
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_read_obj_buffer(ctx, ptr, h->constants.uts_version_len);
-	if (ret < 0)
-		return ret;
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_read_obj_buffer(ctx, ptr, h->constants.uts_machine_len);
-	if (ret < 0)
-		return ret;
-
-	/* FIXME: skip version validation for now */
-
-	return 0;
-}
-
-static int ckpt_read_header_arch(struct ckpt_ctx *ctx)
-{
-	struct ckpt_hdr_header_arch *h;
-	int ret;
-
-	h = (struct ckpt_hdr_header_arch *) ctx->header_arch;
-	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_HEADER_ARCH);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int ckpt_read_container(struct ckpt_ctx *ctx)
-{
-	struct ckpt_hdr_container *h;
-	char *ptr;
-	int ret;
-
-	h = (struct ckpt_hdr_container *) ctx->container;
-	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_CONTAINER);
-	if (ret < 0)
-		return ret;
-
-	ptr = (char *) h;
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_read_obj_buffer(ctx, ptr, CHECKPOINT_LSM_NAME_MAX + 1);
-	if (ret < 0)
-		return ret;
-
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	return ckpt_read_obj_type(ctx, ptr, 200, CKPT_HDR_LSM_INFO);
-}
-
-static int ckpt_read_tree(struct ckpt_ctx *ctx)
-{
-	struct ckpt_hdr_tree *h;
-	int len, ret;
-
-	h = (struct ckpt_hdr_tree *) ctx->tree;
-	ret = ckpt_read_obj_type(ctx, h, sizeof(*h), CKPT_HDR_TREE);
-	if (ret < 0)
-		return ret;
-
-	ckpt_dbg("number of tasks: %d\n", h->nr_tasks);
-
-	if (h->nr_tasks <= 0) {
-		ckpt_err("invalid number of tasks %d", h->nr_tasks);
-		errno = EINVAL;
-		return -1;
-	}
-
-	/* get a working a copy of header */
-	memcpy(ctx->buf, ctx->tree, BUFSIZE);
-
-	ctx->pids_nr = h->nr_tasks;
-
-	len = sizeof(struct ckpt_pids) * ctx->pids_nr;
-
-	ctx->pids_arr = malloc(len);
-	ctx->copy_arr = malloc(len);
-	if (!ctx->pids_arr || !ctx->copy_arr) {
-		if (ctx->pids_arr)
-			free(ctx->pids_arr);
-		return -1;
-	}
-
-	ret = ckpt_read_obj_ptr(ctx, ctx->pids_arr, len, CKPT_HDR_BUFFER);
-	if (ret < 0) {
-		free(ctx->pids_arr);
-		return ret;
-	}
-
-	return ret;
-}
-
-/* set the vpids pointers in all the tasks */
-static int assign_vpids(struct ckpt_ctx *ctx)
-{
-	int d, hidx, tidx;
-	struct task *t;
-
-	for (hidx = 0, tidx = 0; tidx < ctx->pids_nr; tidx++) {
-		t = &ctx->tasks_arr[tidx];
-		d = t->piddepth = ctx->pids_arr[tidx].depth;
-		if (!d) {
-			ckpt_dbg("task[%d].vidx = -1\n", tidx);
-			t->vidx = -1;
-			continue;
-		}
-		t->vidx = hidx;
-		ckpt_dbg("task[%d].vidx = %d (depth %d, rpid %d)\n",
-			tidx, hidx, t->piddepth, ctx->pids_arr[tidx].vpid);
-		int i;
-		for (i=0; i<t->piddepth; i++)
-			ckpt_dbg("task[%d].vpid[%d] = %d\n", tidx, i,
-				ctx->vpids_arr[hidx+i]);
-		hidx += d;
-		if (hidx > ctx->vpids_nr) {
-			ckpt_err("Error parsing vpids array");
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-static int ckpt_read_vpids(struct ckpt_ctx *ctx)
-{
-	int i, len, ret;
-
-	for (i = 0; i < ctx->pids_nr; i++)
-		ctx->vpids_nr += ctx->pids_arr[i].depth;
-
-	ckpt_dbg("number of vpids: %d\n", ctx->vpids_nr);
-
-	if (ctx->vpids_nr < 0) {
-		ckpt_err("Invalid number of vpids %d", ctx->vpids_nr);
-		errno = -EINVAL;
-		return -1;
-	}
-
-	if (!ctx->vpids_nr)
-		return 0;
-
-	len = sizeof(__s32) * ctx->vpids_nr;
-	if (len < 0) {
-		ckpt_err("Length of vpids array overflowed");
-		errno = -EINVAL;
-		return -1;
-	}
-
-	ctx->vpids_arr = malloc(len);
-	if (!ctx->pids_arr)
-		return -1;
-
-	ret = ckpt_read_obj_ptr(ctx, ctx->vpids_arr, len, CKPT_HDR_BUFFER);
-	if (ret < 0)
-		free(ctx->vpids_arr);
-
-	return ret;
-}
-
-static int ckpt_write_header(struct ckpt_ctx *ctx)
-{
-	char *ptr;
-	int ret;
-
-	ptr = (char *) ctx->header;
-	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
-	if (ret < 0)
-		return ret;
-
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
-	if (ret < 0)
-		return ret;
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
-	if (ret < 0)
-		return ret;
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
-
-	return ret;
-}
-
-static int ckpt_write_header_arch(struct ckpt_ctx *ctx)
-{
-	struct ckpt_hdr_header_arch *h;
-
-	h = (struct ckpt_hdr_header_arch *) ctx->header_arch;
-	return ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
-}
-
-static int ckpt_write_container(struct ckpt_ctx *ctx)
-{
-	char *ptr;
-	int ret;
-
-	ptr = (char *) ctx->container;
-	/* write the container info section */
-	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
-	if (ret < 0)
-		return ret;
-
-	/* write the lsm name buffer */
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
-	if (ret < 0)
-		return ret;
-
-	/* write the lsm policy section */
-	ptr += ((struct ckpt_hdr *) ptr)->len;
-	return ckpt_write_obj(ctx, (struct ckpt_hdr *) ptr);
-}
-
-static int ckpt_write_tree(struct ckpt_ctx *ctx)
-{
-	struct ckpt_hdr_tree *h;
-	int len;
-
-	h = (struct ckpt_hdr_tree *) ctx->tree;
-	if (ckpt_write_obj(ctx, (struct ckpt_hdr *) h) < 0)
-		ckpt_abort(ctx, "write tree");
-
-	len = sizeof(struct ckpt_pids) * ctx->pids_nr;
-	if (ckpt_write_obj_ptr(ctx, ctx->pids_arr, len, CKPT_HDR_BUFFER) < 0)
-		ckpt_abort(ctx, "write pids");
-
-	return 0;
-}
-
-static int ckpt_write_vpids(struct ckpt_ctx *ctx)
-{
-	int len;
-
-	if (!ctx->vpids_nr)
-		return 0;
-	len = sizeof(__s32) * ctx->vpids_nr;
-	if (ckpt_write_obj_ptr(ctx, ctx->vpids_arr, len, CKPT_HDR_BUFFER) < 0)
-		ckpt_abort(ctx, "write vpids");
-	ckpt_dbg("wrote %d bytes for %d vpids\n", len, ctx->vpids_nr);
-
-	return 0;
-}
-
-/*
- * a simple hash implementation
- */
-
-#define HASH_BITS	11
-#define HASH_BUCKETS	(2 << (HASH_BITS - 1))
-
-static int hash_init(struct ckpt_ctx *ctx)
-{
-	struct hashent **hash;
-
-	ctx->hash_arr = malloc(sizeof(*hash) * HASH_BUCKETS);
-	if (!ctx->hash_arr) {
-		ckpt_perror("malloc hash table");
-		return -1;
-	}
-	memset(ctx->hash_arr, 0, sizeof(*hash) * HASH_BUCKETS);
-	return 0;
-}
-
-static void hash_exit(struct ckpt_ctx *ctx)
-{
-	struct hashent *hash, *next;
-	int i;
-
-	for (i = 0; i < HASH_BUCKETS; i++) {
-		for (hash = ctx->hash_arr[i]; hash; hash = next) {
-			next = hash->next;
-			free(hash);
-		}
-	}
-
-	free(ctx->hash_arr);
-}
-
-/* see linux kernel's include/linux/hash.h */
-
-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
-
-static inline int hash_func(long key)
-{
-	unsigned long hash = key * GOLDEN_RATIO_PRIME_32;
-	return (hash >> (sizeof(key)*8 - HASH_BITS));
-}
-
-static int hash_insert(struct ckpt_ctx *ctx, long key, void *data)
-{
-	struct hashent *hash;
-	int bucket;
-
-	hash = malloc(sizeof(*hash));
-	if (!hash) {
-		ckpt_perror("malloc hash");
-		return -1;
-	}
-	hash->key = key;
-	hash->data = data;
-
-	bucket = hash_func(key);
-	hash->next = ctx->hash_arr[bucket];
-	ctx->hash_arr[bucket] = hash;
-
-	return 0;
-}
-
-static void *hash_lookup(struct ckpt_ctx *ctx, long key)
-{
-	struct hashent *hash;
-	int bucket;
-
-	bucket = hash_func(key);
-	for (hash = ctx->hash_arr[bucket]; hash; hash = hash->next) {
-		if (hash->key == key)
-			return hash->data;
-	}
-	return NULL;
-}
-
-int cr_checkpoint(int pid, struct cr_checkpoint_args *args)
-{
-	int ret;
-	unsigned long flags;
-
-	global_uerrfd = args->uerrfd;
-
-	/* output file descriptor (default: stdout) */
-	if (args->outfd < 0)
-		args->outfd = STDOUT_FILENO;
-
-	/* log file descriptor (default: none) */
-	if (args->logfd < 0)
-		args->logfd = CHECKPOINT_FD_NONE;
-
-	if (!args->container)
-		flags |= CHECKPOINT_SUBTREE;
-
-	ret = checkpoint(pid, args->outfd, flags, args->logfd);
-
-	if (ret < 0) {
-		ckpt_perror("checkpoint");
-		ckpt_err("(you may use 'ckptinfo -e' for more info)\n"); 
-	} else if (args->verbose) {
-		ckpt_err("checkpoint id %d\n", ret);
-	}
-
-	return ret;
-}
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC][PATCH 6/6][usercr] Rename common.h to cr_log.h
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (4 preceding siblings ...)
  2010-04-14  0:53   ` [RFC][PATCH 5/6][usercr] Rename restart.c to cr_checkpoint.c Sukadev Bhattiprolu
@ 2010-04-14  0:53   ` Sukadev Bhattiprolu
  2010-04-20 18:07   ` [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code Sukadev Bhattiprolu
  6 siblings, 0 replies; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-14  0:53 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 13 Apr 2010 17:12:04 -0700
Subject: [RFC][PATCH 6/6][usercr] Rename common.h to cr_log.h

common.h used to be shared between checkpoint.c and restart.c.  Since all
the code is now in one file (cr_checkpoint.c), the name 'common.h' does not
make much sense.

The definitions in the file relate to logging, so rename to cr_log.h.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 Makefile          |    2 +-
 checkpoint-main.c |    2 +-
 common.h          |  112 -----------------------------------------------------
 cr_checkpoint.c   |    2 +-
 cr_log.h          |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 restart-main.c    |    2 +-
 6 files changed, 114 insertions(+), 116 deletions(-)
 delete mode 100644 common.h
 create mode 100644 cr_log.h

diff --git a/Makefile b/Makefile
index 205320a..f2c6b18 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ $(LIB_ECLONE):
 # restart needs to be thread-safe
 restart: CFLAGS += -D__REENTRANT -pthread
 
-$(CR_OBJS): common.h cr_checkpoint.h
+$(CR_OBJS): cr_log.h cr_checkpoint.h
 
 restart: cr_checkpoint.o restart-main.o
 	$(CC) -o $@ $^
diff --git a/checkpoint-main.c b/checkpoint-main.c
index b8f96a7..34eb827 100644
--- a/checkpoint-main.c
+++ b/checkpoint-main.c
@@ -10,7 +10,7 @@
 #include <linux/checkpoint.h>
 
 #include "cr_checkpoint.h"
-#include "common.h"
+#include "cr_log.h"
 
 static int global_uerrfd = -1;
 
diff --git a/common.h b/common.h
deleted file mode 100644
index 99b224d..0000000
--- a/common.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#include <stdio.h>
-#include <signal.h>
-
-#define BUFSIZE  (4 * 4096)
-
-static inline void ckpt_msg(int fd, char *format, ...)
-{
-	va_list ap;
-	char *bufp;
-	if (fd < 0)
-		return;
-
-	va_start(ap, format);
-
-	bufp = malloc(BUFSIZE);
-	if(bufp) {
-		vsnprintf(bufp, BUFSIZE, format, ap);
-		write(fd, bufp, strlen(bufp));
-	}
-	free(bufp);
-
-	va_end(ap);
-}
-
-#define ckpt_perror(s) 							\
-	do {								\
-		ckpt_msg(global_uerrfd, s);				\
-		ckpt_msg(global_uerrfd, ": %s\n", strerror(errno));	\
-	} while (0)
-
-#ifdef CHECKPOINT_DEBUG
-#define ckpt_dbg(_format, _args...)					\
-	do {								\
-		if (global_debug)					\
-			ckpt_msg(global_uerrfd, "<%d>" _format, 	\
-					_gettid(), ##_args); 		\
-	} while (0)
-#define ckpt_dbg_cont(_format, _args...)				\
-	do {								\
-		if (global_debug)					\
-			ckpt_msg(global_uerrfd, _format, ##_args);	\
-	} while (0)
-#else
-#define ckpt_dbg(_format, _args...)  \
-	do { } while (0)
-#define ckpt_dbg_cont(_format, _args...)  \
-	do { } while (0)
-#endif
-
-#define ckpt_err(...)  \
-	ckpt_msg(global_uerrfd, __VA_ARGS__)
-
-#define ckpt_verbose(...)					\
-	do {							\
-		if (global_verbose)				\
-			ckpt_msg(global_ulogfd, __VA_ARGS__);	\
-	} while(0)
-
-struct signal_array {
-	int signum;
-	char *sigstr;
-};
-
-#define SIGNAL_ENTRY(signal)  { SIG ## signal, #signal }
-
-#define INIT_SIGNAL_ARRAY { 	\
-	{ 0, "NONE" },		\
-	SIGNAL_ENTRY(ALRM),	\
-	SIGNAL_ENTRY(HUP),	\
-	SIGNAL_ENTRY(INT),	\
-	SIGNAL_ENTRY(KILL),	\
-	SIGNAL_ENTRY(PIPE),	\
-	SIGNAL_ENTRY(POLL),	\
-	SIGNAL_ENTRY(PROF),	\
-	SIGNAL_ENTRY(TERM),	\
-	SIGNAL_ENTRY(USR1),	\
-	SIGNAL_ENTRY(USR2),	\
-	SIGNAL_ENTRY(VTALRM),	\
-	SIGNAL_ENTRY(STKFLT),	\
-	SIGNAL_ENTRY(PWR),	\
-	SIGNAL_ENTRY(WINCH),	\
-	SIGNAL_ENTRY(CHLD),	\
-	SIGNAL_ENTRY(URG),	\
-	SIGNAL_ENTRY(TTIN),	\
-	SIGNAL_ENTRY(TTOU),	\
-	SIGNAL_ENTRY(STOP),	\
-	SIGNAL_ENTRY(CONT),	\
-	SIGNAL_ENTRY(ABRT),	\
-	SIGNAL_ENTRY(FPE),	\
-	SIGNAL_ENTRY(ILL),	\
-	SIGNAL_ENTRY(QUIT),	\
-	SIGNAL_ENTRY(SEGV),	\
-	SIGNAL_ENTRY(TRAP),	\
-	SIGNAL_ENTRY(SYS),	\
-	SIGNAL_ENTRY(BUS),	\
-	SIGNAL_ENTRY(XCPU),	\
-	SIGNAL_ENTRY(XFSZ),	\
-	{ -1, "LAST" },		\
-}
-
-#define CKPT_COND_PIDZERO  0x1
-#define CKPT_COND_MNTPROC  0x2
-#define CKPT_COND_MNTPTY   0x4
-
-#define CKPT_COND_NONE     0
-#define CKPT_COND_ANY      ULONG_MAX
-
-/* default for skip/warn/fail */
-#define CKPT_COND_WARN     (CKPT_COND_MNTPROC | \
-			    CKPT_COND_MNTPTY)
-#define CKPT_COND_FAIL     (CKPT_COND_NONE)
-
diff --git a/cr_checkpoint.c b/cr_checkpoint.c
index e01c08e..272c54b 100644
--- a/cr_checkpoint.c
+++ b/cr_checkpoint.c
@@ -41,7 +41,7 @@
 #include "genstack.h"
 #include "compat.h"
 #include "cr_checkpoint.h"
-#include "common.h"
+#include "cr_log.h"
 
 /*
  * By default, 'restart' creates a new pid namespace in which the
diff --git a/cr_log.h b/cr_log.h
new file mode 100644
index 0000000..c971b74
--- /dev/null
+++ b/cr_log.h
@@ -0,0 +1,110 @@
+#include <stdio.h>
+#include <signal.h>
+
+#define BUFSIZE  (4 * 4096)
+
+static inline void ckpt_msg(int fd, char *format, ...)
+{
+	va_list ap;
+	char *bufp;
+	if (fd < 0)
+		return;
+
+	va_start(ap, format);
+
+	bufp = malloc(BUFSIZE);
+	if(bufp) {
+		vsnprintf(bufp, BUFSIZE, format, ap);
+		write(fd, bufp, strlen(bufp));
+	}
+	free(bufp);
+
+	va_end(ap);
+}
+
+#define ckpt_perror(s) 							\
+	do {								\
+		ckpt_msg(global_uerrfd, s);				\
+		ckpt_msg(global_uerrfd, ": %s\n", strerror(errno));	\
+	} while (0)
+
+#ifdef CHECKPOINT_DEBUG
+#define ckpt_dbg(_format, _args...)					\
+	do {								\
+		if (global_debug)					\
+			ckpt_msg(global_uerrfd, "<%d>" _format, 	\
+					_gettid(), ##_args); 		\
+	} while (0)
+#define ckpt_dbg_cont(_format, _args...)				\
+	do {								\
+		if (global_debug)					\
+			ckpt_msg(global_uerrfd, _format, ##_args);	\
+	} while (0)
+#else
+#define ckpt_dbg(_format, _args...)  \
+	do { } while (0)
+#define ckpt_dbg_cont(_format, _args...)  \
+	do { } while (0)
+#endif
+
+#define ckpt_err(...)  \
+	ckpt_msg(global_uerrfd, __VA_ARGS__)
+
+#define ckpt_verbose(...)					\
+	do {							\
+		if (global_verbose)				\
+			ckpt_msg(global_ulogfd, __VA_ARGS__);	\
+	} while(0)
+
+struct signal_array {
+	int signum;
+	char *sigstr;
+};
+
+#define SIGNAL_ENTRY(signal)  { SIG ## signal, #signal }
+
+#define INIT_SIGNAL_ARRAY { 	\
+	{ 0, "NONE" },		\
+	SIGNAL_ENTRY(ALRM),	\
+	SIGNAL_ENTRY(HUP),	\
+	SIGNAL_ENTRY(INT),	\
+	SIGNAL_ENTRY(KILL),	\
+	SIGNAL_ENTRY(PIPE),	\
+	SIGNAL_ENTRY(POLL),	\
+	SIGNAL_ENTRY(PROF),	\
+	SIGNAL_ENTRY(TERM),	\
+	SIGNAL_ENTRY(USR1),	\
+	SIGNAL_ENTRY(USR2),	\
+	SIGNAL_ENTRY(VTALRM),	\
+	SIGNAL_ENTRY(STKFLT),	\
+	SIGNAL_ENTRY(PWR),	\
+	SIGNAL_ENTRY(WINCH),	\
+	SIGNAL_ENTRY(CHLD),	\
+	SIGNAL_ENTRY(URG),	\
+	SIGNAL_ENTRY(TTIN),	\
+	SIGNAL_ENTRY(TTOU),	\
+	SIGNAL_ENTRY(STOP),	\
+	SIGNAL_ENTRY(CONT),	\
+	SIGNAL_ENTRY(ABRT),	\
+	SIGNAL_ENTRY(FPE),	\
+	SIGNAL_ENTRY(ILL),	\
+	SIGNAL_ENTRY(QUIT),	\
+	SIGNAL_ENTRY(SEGV),	\
+	SIGNAL_ENTRY(TRAP),	\
+	SIGNAL_ENTRY(SYS),	\
+	SIGNAL_ENTRY(BUS),	\
+	SIGNAL_ENTRY(XCPU),	\
+	SIGNAL_ENTRY(XFSZ),	\
+	{ -1, "LAST" },		\
+}
+
+#define CKPT_COND_PIDZERO  0x1
+#define CKPT_COND_MNTPROC  0x2
+#define CKPT_COND_MNTPTY   0x4
+
+#define CKPT_COND_NONE     0
+#define CKPT_COND_ANY      ULONG_MAX
+
+/* default for skip/warn/fail */
+#define CKPT_COND_WARN     (CKPT_COND_MNTPROC | CKPT_COND_MNTPTY)
+#define CKPT_COND_FAIL     (CKPT_COND_NONE)
diff --git a/restart-main.c b/restart-main.c
index 1f1a749..c3100ac 100644
--- a/restart-main.c
+++ b/restart-main.c
@@ -9,7 +9,7 @@
 #include <getopt.h>
 
 #include "cr_checkpoint.h"
-#include "common.h"
+#include "cr_log.h"
 
 static int global_ulogfd;
 static int global_uerrfd;
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code
       [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (5 preceding siblings ...)
  2010-04-14  0:53   ` [RFC][PATCH 6/6][usercr] Rename common.h to cr_log.h Sukadev Bhattiprolu
@ 2010-04-20 18:07   ` Sukadev Bhattiprolu
       [not found]     ` <20100420180733.GA13874-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  6 siblings, 1 reply; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-20 18:07 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers

Sukadev Bhattiprolu [sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org] wrote:
| 
| Change the prefix of the USERCR apis to 'cr_' and reorg the code to avoid
| duplication and reduce code size.
| 
| 	[PATCH 1/6] Change API prefix to cr_
| 	[PATCH 2/6] Remove flags parameter to cr_checkpoint()
| 	[PATCH 3/6] Minor reorg of restart.c
| 	[PATCH 4/6] Move checkpoint() into restart.c
| 	[PATCH 5/6] Rename restart.c to cr_checkpoint.c
| 	[PATCH 6/6] Rename common.h to cr_log.h
| 
| With this change, USERCR for now "exports" just the following two files:
| 
| 	cr_checkpoint.o
| 	cr_checkpoint.h
| 
| But this patchset does cause some churn, let me know if you think any of it
| is unnecessary/noise.  I have tested for now along with the patch that removes
| most exits() from cr_restart(). If the reorg makes sense, will run all tests
| once more and resubmit.

All tests in cr-tests pass with these two patchsets (on usercr ckpt-v20-dev
and linux ckpt-v21-rc2).

Pls let me know if there are comments about the patchsets.

Thanks,

Sukadev

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code
       [not found]     ` <20100420180733.GA13874-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-25 20:24       ` Oren Laadan
       [not found]         ` <4BD4A501.6070106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Oren Laadan @ 2010-04-25 20:24 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers

Hi Suka,

Thanks for the patchset. A couple of comments:

* I prefer the header exported to users to be checkpoint.h - this
is consistent with kernel headers, and with future name of a c/r
library if we opt libcheckpoint.a

* I also prefer to leave checkpoint.c as a separate file, as is.
It may gain more functionality in the future. If the goal was to
only export a single .o file, then the solution IMHO is to instead
export a single library: libcheckpoint.a

Unless you have a strong opinion against the above, I'll go ahead
and pull patches 1-3, leave out 4-6, and update the Makefile to
create a libcheckpoint.a library.

Oren.


Sukadev Bhattiprolu wrote:
> Sukadev Bhattiprolu [sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org] wrote:
> | 
> | Change the prefix of the USERCR apis to 'cr_' and reorg the code to avoid
> | duplication and reduce code size.
> | 
> | 	[PATCH 1/6] Change API prefix to cr_
> | 	[PATCH 2/6] Remove flags parameter to cr_checkpoint()
> | 	[PATCH 3/6] Minor reorg of restart.c
> | 	[PATCH 4/6] Move checkpoint() into restart.c
> | 	[PATCH 5/6] Rename restart.c to cr_checkpoint.c
> | 	[PATCH 6/6] Rename common.h to cr_log.h
> | 
> | With this change, USERCR for now "exports" just the following two files:
> | 
> | 	cr_checkpoint.o
> | 	cr_checkpoint.h
> | 
> | But this patchset does cause some churn, let me know if you think any of it
> | is unnecessary/noise.  I have tested for now along with the patch that removes
> | most exits() from cr_restart(). If the reorg makes sense, will run all tests
> | once more and resubmit.
> 
> All tests in cr-tests pass with these two patchsets (on usercr ckpt-v20-dev
> and linux ckpt-v21-rc2).
> 
> Pls let me know if there are comments about the patchsets.
> 
> Thanks,
> 
> Sukadev
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code
       [not found]         ` <4BD4A501.6070106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-04-26 17:16           ` Sukadev Bhattiprolu
       [not found]             ` <20100426171603.GB27542-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-06-14 21:31           ` Oren Laadan
  1 sibling, 1 reply; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2010-04-26 17:16 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers

Oren Laadan [orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org] wrote:
> Hi Suka,
>
> Thanks for the patchset. A couple of comments:
>
> * I prefer the header exported to users to be checkpoint.h - this
> is consistent with kernel headers, and with future name of a c/r
> library if we opt libcheckpoint.a

That is fine. Will this checkpoint.h include <linux/checkpoint.h> ?

>
> * I also prefer to leave checkpoint.c as a separate file, as is.
> It may gain more functionality in the future. If the goal was to
> only export a single .o file, then the solution IMHO is to instead
> export a single library: libcheckpoint.a

A single .o was not really a goal. Sure, if we are ready for libcheckpoint.a
then thats a better way.

>
> Unless you have a strong opinion against the above, I'll go ahead
> and pull patches 1-3, leave out 4-6, and update the Makefile to
> create a libcheckpoint.a library.

How about patch 6, renaming 'common.h' to 'cr_log.h' (file internal to
usercr) ? Let me know if you want me to resend the patches dropping 4 and 5.

Sukadev

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code
       [not found]             ` <20100426171603.GB27542-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-26 18:46               ` Oren Laadan
  0 siblings, 0 replies; 12+ messages in thread
From: Oren Laadan @ 2010-04-26 18:46 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers



Sukadev Bhattiprolu wrote:
> Oren Laadan [orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org] wrote:
>> Hi Suka,
>>
>> Thanks for the patchset. A couple of comments:
>>
>> * I prefer the header exported to users to be checkpoint.h - this
>> is consistent with kernel headers, and with future name of a c/r
>> library if we opt libcheckpoint.a
> 
> That is fine. Will this checkpoint.h include <linux/checkpoint.h> ?

Definitely.

> 
>> * I also prefer to leave checkpoint.c as a separate file, as is.
>> It may gain more functionality in the future. If the goal was to
>> only export a single .o file, then the solution IMHO is to instead
>> export a single library: libcheckpoint.a
> 
> A single .o was not really a goal. Sure, if we are ready for libcheckpoint.a
> then thats a better way.
> 
>> Unless you have a strong opinion against the above, I'll go ahead
>> and pull patches 1-3, leave out 4-6, and update the Makefile to
>> create a libcheckpoint.a library.
> 
> How about patch 6, renaming 'common.h' to 'cr_log.h' (file internal to
> usercr) ? Let me know if you want me to resend the patches dropping 4 and 5.

It's not strictly limited to logging affairs, and it may have more
in the future, so I'd rather keep it as is.

No need to resend. I'll just pull and edit.

Thanks!

Oren.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code
       [not found]         ` <4BD4A501.6070106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  2010-04-26 17:16           ` Sukadev Bhattiprolu
@ 2010-06-14 21:31           ` Oren Laadan
  1 sibling, 0 replies; 12+ messages in thread
From: Oren Laadan @ 2010-06-14 21:31 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers


Patches 1-3 applied (and will be pushed shortly).

Oren.

On 04/25/2010 04:24 PM, Oren Laadan wrote:
> Hi Suka,
> 
> Thanks for the patchset. A couple of comments:
> 
> * I prefer the header exported to users to be checkpoint.h - this
> is consistent with kernel headers, and with future name of a c/r
> library if we opt libcheckpoint.a
> 
> * I also prefer to leave checkpoint.c as a separate file, as is.
> It may gain more functionality in the future. If the goal was to
> only export a single .o file, then the solution IMHO is to instead
> export a single library: libcheckpoint.a
> 
> Unless you have a strong opinion against the above, I'll go ahead
> and pull patches 1-3, leave out 4-6, and update the Makefile to
> create a libcheckpoint.a library.
> 
> Oren.
> 
> 
> Sukadev Bhattiprolu wrote:
>> Sukadev Bhattiprolu [sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org] wrote:
>> | 
>> | Change the prefix of the USERCR apis to 'cr_' and reorg the code to avoid
>> | duplication and reduce code size.
>> | 
>> | 	[PATCH 1/6] Change API prefix to cr_
>> | 	[PATCH 2/6] Remove flags parameter to cr_checkpoint()
>> | 	[PATCH 3/6] Minor reorg of restart.c
>> | 	[PATCH 4/6] Move checkpoint() into restart.c
>> | 	[PATCH 5/6] Rename restart.c to cr_checkpoint.c
>> | 	[PATCH 6/6] Rename common.h to cr_log.h
>> | 
>> | With this change, USERCR for now "exports" just the following two files:
>> | 
>> | 	cr_checkpoint.o
>> | 	cr_checkpoint.h
>> | 
>> | But this patchset does cause some churn, let me know if you think any of it
>> | is unnecessary/noise.  I have tested for now along with the patch that removes
>> | most exits() from cr_restart(). If the reorg makes sense, will run all tests
>> | once more and resubmit.
>>
>> All tests in cr-tests pass with these two patchsets (on usercr ckpt-v20-dev
>> and linux ckpt-v21-rc2).
>>
>> Pls let me know if there are comments about the patchsets.
>>
>> Thanks,
>>
>> Sukadev
>>
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-06-14 21:31 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-14  0:49 [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code Sukadev Bhattiprolu
     [not found] ` <20100414004927.GA13012-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-14  0:52   ` [RFC][PATCH 1/6][usercr] Change API prefix to cr_ Sukadev Bhattiprolu
2010-04-14  0:52   ` [RFC][PATCH 2/6][usercr] Remove flags parameter to cr_checkpoint() Sukadev Bhattiprolu
2010-04-14  0:52   ` [RFC][PATCH 3/6][usercr] Minor reorg of restart.c Sukadev Bhattiprolu
2010-04-14  0:52   ` [RFC][PATCH 4/6][usercr] Move checkpoint() into restart.c Sukadev Bhattiprolu
2010-04-14  0:53   ` [RFC][PATCH 5/6][usercr] Rename restart.c to cr_checkpoint.c Sukadev Bhattiprolu
2010-04-14  0:53   ` [RFC][PATCH 6/6][usercr] Rename common.h to cr_log.h Sukadev Bhattiprolu
2010-04-20 18:07   ` [RFC][PATCH 0/6][usercr]: Rename/reorg usercr code Sukadev Bhattiprolu
     [not found]     ` <20100420180733.GA13874-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-25 20:24       ` Oren Laadan
     [not found]         ` <4BD4A501.6070106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-26 17:16           ` Sukadev Bhattiprolu
     [not found]             ` <20100426171603.GB27542-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-26 18:46               ` Oren Laadan
2010-06-14 21:31           ` Oren Laadan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.