All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] restart: make sure all tasks are in sys_restart
@ 2009-09-29 16:53 Serge E. Hallyn
       [not found] ` <20090929165342.GA10076-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Serge E. Hallyn @ 2009-09-29 16:53 UTC (permalink / raw)
  To: Linux Containers

Make sure that all prepared tasks are in sys_restart before the
coordinator proceeds.  Otherwise, it was possible for the last
task actually in sys_restart to sync before the straggler was in
sys_restart, sending that task a signal and causing that task to
exit.  Then since there are not enough tasks completing restart,
the restart operation ends up hanging, waiting for the killed task.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/restart.c             |   53 ++++++++++++++++++++++++++++++++++++++
 checkpoint/sys.c                 |    1 +
 include/linux/checkpoint_types.h |    2 +
 3 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 543b380..849bda5 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -655,13 +655,35 @@ static struct ckpt_ctx *wait_checkpoint_ctx(void)
 static int do_ghost_task(void)
 {
 	struct ckpt_ctx *ctx;
+	int ret;
 
 	ctx = wait_checkpoint_ctx();
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	/*
+	 * Wait until coordinator task has completed prepare_descendants().
+	 * Note that prepare_descendants() wakes each task one a time
+	 * finishing the wait_checkpoint_ctx() above.  We may want to change
+	 * that flow so tasks only sleep once, but this works for now.
+	 */
+	ret = wait_event_interruptible(ctx->waitq,
+					atomic_read(&ctx->nr_running));
+	if (ret) {
+		ckpt_debug("wait for coordinator prep returned %d\n", ret);
+		return ret;
+	}
+	/*
+	 * Now each prepared task decrements nr_running, and, when that
+	 * hits zero, wakes the coordinator.  That way the coordinator
+	 * knows that all tasks are in sys_restart(0
+	 */
+	if (atomic_dec_and_test(&ctx->nr_running))
+		complete(&ctx->all_ready);
+
 	current->flags |= PF_RESTARTING;
 
+	/* Finally wait for all ghosts to be woken up - to die */
 	wait_event_interruptible(ctx->ghostq,
 				 all_tasks_activated(ctx) ||
 				 ckpt_test_ctx_error(ctx));
@@ -682,6 +704,26 @@ static int do_restore_task(void)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	/*
+	 * Wait until coordinator task has completed prepare_descendants().
+	 * Note that prepare_descendants() wakes each task one a time
+	 * finishing the wait_checkpoint_ctx() above.  We may want to change
+	 * that flow so tasks only sleep once, but this works for now.
+	 */
+	ret = wait_event_interruptible(ctx->waitq,
+					atomic_read(&ctx->nr_running));
+	if (ret) {
+		ckpt_debug("wait for coordinator prep returned %d\n", ret);
+		return ret;
+	}
+	/*
+	 * Now each prepared task decrements nr_running, and, when that
+	 * hits zero, wakes the coordinator.  That way the coordinator
+	 * knows that all tasks are in sys_restart(0
+	 */
+	if (atomic_dec_and_test(&ctx->nr_running))
+		complete(&ctx->all_ready);
+
 	current->flags |= PF_RESTARTING;
 
 	/* wait for our turn, do the restore, and tell next task in line */
@@ -814,6 +856,7 @@ static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
 		ret = -ESRCH;
 
 	atomic_set(&ctx->nr_total, nr_pids);
+	atomic_set(&ctx->nr_running, nr_pids);
 	return ret;
 }
 
@@ -948,6 +991,16 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 		ret = prepare_descendants(ctx, ctx->root_task);
 		if (ret < 0)
 			goto out;
+
+		/* tell tasks we're ready for them to dec  ctx->nr_running */
+		wake_up_all(&ctx->waitq);
+		/* ctx->nr_running hit 0, all our tasks are in sys_restart */
+		ret = wait_for_completion_interruptible(&ctx->all_ready);
+		if (ret) {
+			ckpt_debug("waiting on all_ready returned %d\n", ret);
+			goto out;
+		}
+
 		/* wait for all other tasks to complete do_restore_task() */
 		ret = wait_all_tasks_finish(ctx);
 		if (ret < 0)
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 76a3fa9..d48e261 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -233,6 +233,7 @@ static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned long uflags,
 	ctx->uflags = uflags;
 	ctx->kflags = kflags;
 	ctx->ktime_begin = ktime_get();
+	init_completion(&ctx->all_ready);
 
 	atomic_set(&ctx->refcount, 0);
 	INIT_LIST_HEAD(&ctx->pgarr_list);
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index 9b7b4dd..2a854e0 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -69,8 +69,10 @@ struct ckpt_ctx {
 	struct ckpt_pids *pids_arr;	/* array of all pids [restart] */
 	int nr_pids;			/* size of pids array */
 	atomic_t nr_total;		/* total tasks count (with ghosts) */
+	atomic_t nr_running;		/* countdown of tasks in sys_restart */
 	int active_pid;			/* (next) position in pids array */
 	struct completion complete;	/* completion for container root */
+	struct completion all_ready;	/* all tasks are in sys_restart */
 	wait_queue_head_t waitq;	/* waitqueue for restarting tasks */
 	wait_queue_head_t ghostq;	/* waitqueue for ghost tasks */
 	struct cred *realcred, *ecred;	/* tmp storage for cred at restart */
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] restart debug: add final process tree status
       [not found] ` <20090929165342.GA10076-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-09-29 16:54   ` Serge E. Hallyn
       [not found]     ` <20090929165402.GA10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-09-29 16:54   ` [PATCH 3/3] restart debug: splatter more ckpt_debugs about Serge E. Hallyn
  2009-10-01  1:53   ` [PATCH 1/3] restart: make sure all tasks are in sys_restart Oren Laadan
  2 siblings, 1 reply; 8+ messages in thread
From: Serge E. Hallyn @ 2009-09-29 16:54 UTC (permalink / raw)
  To: Linux Containers

Have tasks in sys_restart keep some status in a list off
of checkpoint_ctx, and print this info when the checkpoint_ctx
is freed.

This is mostly an RFC - in particular the error tracking is
pretty half-hearted so far.  But the info it does spit out
helped me to figured out the coordinator syncing problem
fixed by the previous patch.

Sample dmesg output:
[4568:4568:c/r:free_per_task_status:200] 4 tasks registered, nr_tasks was 0 nr_total 0
[4568:4568:c/r:free_per_task_status:202] active pid was 1, ctx->errno 0
[4568:4568:c/r:free_per_task_status:204] kflags 6 uflags 0 oflags 1
[4568:4568:c/r:free_per_task_status:206] task 0 to run was 4568
[4568:4568:c/r:free_per_task_status:209] pid 4566
[4568:4568:c/r:free_per_task_status:211] it was coordinator
[4568:4568:c/r:free_per_task_status:219] it was running
[4568:4568:c/r:free_per_task_status:209] pid 4570
[4568:4568:c/r:free_per_task_status:213] it was a ghost
[4568:4568:c/r:free_per_task_status:209] pid 4569
[4568:4568:c/r:free_per_task_status:213] it was a ghost
[4568:4568:c/r:free_per_task_status:209] pid 4568
[4568:4568:c/r:free_per_task_status:215] it was the root task
[4568:4568:c/r:free_per_task_status:221] it was a normal task

So, when one task died before hitting sys_restart, the first line would
show '3 tasks registered'.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/restart.c             |  105 ++++++++++++++++++++++++++++++++++++++
 checkpoint/sys.c                 |   49 ++++++++++++++++++
 include/linux/checkpoint_types.h |   20 +++++++
 3 files changed, 174 insertions(+), 0 deletions(-)

diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 849bda5..1085ed5 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -26,6 +26,98 @@
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 
+#ifdef CONFIG_CHECKPOINT_DEBUG
+static struct ckpt_task_status *ckpt_debug_checkin(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+	s->pid = current->pid;
+	s->error = 0;
+	s->flags = RESTART_DBG_WAITING;
+	if (current == ctx->root_task)
+		s->flags |= RESTART_DBG_ROOT;
+	list_add_tail(&s->list, &ctx->per_task_status);
+	return s;
+}
+
+static struct ckpt_task_status *getme(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s = NULL;
+	list_for_each_entry(s, &ctx->per_task_status, list) {
+		if (s->pid == current->pid)
+			break;
+	}
+	if (!s || s->pid != current->pid)
+		return NULL;
+	return s;
+}
+
+static void ckpt_debug_coord(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+
+	s = ckpt_debug_checkin(ctx);
+	if (s)
+		s->flags |= RESTART_DBG_COORD;
+}
+
+static void ckpt_debug_ghost(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+
+	s = ckpt_debug_checkin(ctx);
+	if (s)
+		s->flags |= RESTART_DBG_GHOST;
+}
+
+static void ckpt_debug_normal(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+
+	s = ckpt_debug_checkin(ctx);
+	if (s)
+		s->flags |= RESTART_DBG_NORMAL;
+}
+
+static void ckpt_debug_log_error(struct ckpt_ctx *ctx, int err)
+{
+	struct ckpt_task_status *s = getme(ctx);
+	if (!s)
+		return;
+	s->error = err;
+	s->flags &= ~RESTART_DBG_WAITING;
+	s->flags &= ~RESTART_DBG_RUNNING;
+	if (err)
+		s->flags |= RESTART_DBG_FAILED;
+	else
+		s->flags |= RESTART_DBG_SUCCESS;
+}
+
+static void ckpt_debug_log_running(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s = getme(ctx);
+	if (!s)
+		return;
+	s->flags &= ~RESTART_DBG_WAITING;
+	s->flags |= RESTART_DBG_RUNNING;
+}
+#else
+static inline void ckpt_debug_checkin(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_coord(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_ghost(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_normal(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_log_error(struct ckpt_ctx *ctx, int err)
+{}
+static inline void ckpt_debug_log_running(struct ckpt_ctx *ctx)
+{}
+#endif
+
 static int _ckpt_read_err(struct ckpt_ctx *ctx, struct ckpt_hdr *h)
 {
 	char *ptr;
@@ -661,6 +753,8 @@ static int do_ghost_task(void)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	ckpt_debug_ghost(ctx);
+
 	/*
 	 * Wait until coordinator task has completed prepare_descendants().
 	 * Note that prepare_descendants() wakes each task one a time
@@ -681,6 +775,8 @@ static int do_ghost_task(void)
 	if (atomic_dec_and_test(&ctx->nr_running))
 		complete(&ctx->all_ready);
 
+	ckpt_debug_log_running(ctx);
+
 	current->flags |= PF_RESTARTING;
 
 	/* Finally wait for all ghosts to be woken up - to die */
@@ -688,6 +784,7 @@ static int do_ghost_task(void)
 				 all_tasks_activated(ctx) ||
 				 ckpt_test_ctx_error(ctx));
 
+	ckpt_debug_log_error(ctx, 0);
 	current->exit_signal = -1;
 	ckpt_ctx_put(ctx);
 	do_exit(0);
@@ -704,6 +801,8 @@ static int do_restore_task(void)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	ckpt_debug_normal(ctx);
+
 	/*
 	 * Wait until coordinator task has completed prepare_descendants().
 	 * Note that prepare_descendants() wakes each task one a time
@@ -731,6 +830,8 @@ static int do_restore_task(void)
 	if (ret < 0)
 		goto out;
 
+	ckpt_debug_log_running(ctx);
+
 	zombie = restore_task(ctx);
 	if (zombie < 0) {
 		ret = zombie;
@@ -759,6 +860,7 @@ static int do_restore_task(void)
 	if (old_ctx)
 		ckpt_ctx_put(old_ctx);
 
+	ckpt_debug_log_error(ctx, ret);
 	/* if we're first to fail - notify others */
 	if (ret < 0 && !ckpt_test_ctx_error(ctx)) {
 		restore_notify_error(ctx, ret);
@@ -945,6 +1047,9 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 	struct ckpt_ctx *old_ctx;
 	int ret;
 
+	ckpt_debug_coord(ctx);
+	ckpt_debug_log_running(ctx);
+
 	ret = restore_read_header(ctx);
 	if (ret < 0)
 		return ret;
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index d48e261..fb3332a 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -188,10 +188,56 @@ static void task_arr_free(struct ckpt_ctx *ctx)
 	kfree(ctx->tasks_arr);
 }
 
+#ifdef CONFIG_CHECKPOINT_DEBUG
+static void free_per_task_status(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s, *p;
+	int i, count = 0;
+
+	list_for_each_entry(s, &ctx->per_task_status, list)
+		count++;
+	ckpt_debug("%d tasks registered, nr_tasks was %d nr_total %d\n",
+		count, ctx->nr_tasks, atomic_read(&ctx->nr_total));
+	ckpt_debug("active pid was %d, ctx->errno %d\n", ctx->active_pid,
+		ctx->errno);
+	ckpt_debug("kflags %lu uflags %lu oflags %lu", ctx->kflags,
+		ctx->uflags, ctx->oflags);
+	for (i = 0; i < ctx->active_pid; i++)
+		ckpt_debug("task %d to run was %d\n", i, ctx->pids_arr[i].vpid);
+
+	list_for_each_entry_safe(s, p, &ctx->per_task_status, list) {
+		ckpt_debug("pid %d\n", s->pid);
+		if (s->flags & RESTART_DBG_COORD)
+			ckpt_debug("it was coordinator\n");
+		if (s->flags & RESTART_DBG_GHOST)
+			ckpt_debug("it was a ghost\n");
+		if (s->flags & RESTART_DBG_ROOT)
+			ckpt_debug("it was the root task\n");
+		if (s->flags & RESTART_DBG_WAITING)
+			ckpt_debug("it was still waiting to run restart\n");
+		if (s->flags & RESTART_DBG_RUNNING)
+			ckpt_debug("it was running\n");
+		if (s->flags & RESTART_DBG_NORMAL)
+			ckpt_debug("it was a normal task\n");
+		if (s->flags & RESTART_DBG_FAILED)
+			ckpt_debug("it finished with error %d\n", s->error);
+		if (s->flags & RESTART_DBG_FAILED)
+			ckpt_debug("it finished successfully");
+		list_del(&s->list);
+		kfree(s);
+	}
+}
+#else
+static inline void free_per_task_status(struct ckpt_ctx *ctx)
+{ }
+#endif
+
 static void ckpt_ctx_free(struct ckpt_ctx *ctx)
 {
 	BUG_ON(atomic_read(&ctx->refcount));
 
+	free_per_task_status(ctx);
+
 	if (ctx->deferqueue)
 		deferqueue_destroy(ctx->deferqueue);
 
@@ -237,6 +283,9 @@ static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned long uflags,
 
 	atomic_set(&ctx->refcount, 0);
 	INIT_LIST_HEAD(&ctx->pgarr_list);
+#ifdef CONFIG_CHECKPOINT_DEBUG
+	INIT_LIST_HEAD(&ctx->per_task_status);
+#endif
 	INIT_LIST_HEAD(&ctx->pgarr_pool);
 	init_waitqueue_head(&ctx->waitq);
 	init_waitqueue_head(&ctx->ghostq);
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index 2a854e0..1cda085 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -76,10 +76,30 @@ struct ckpt_ctx {
 	wait_queue_head_t waitq;	/* waitqueue for restarting tasks */
 	wait_queue_head_t ghostq;	/* waitqueue for ghost tasks */
 	struct cred *realcred, *ecred;	/* tmp storage for cred at restart */
+#ifdef CONFIG_CHECKPOINT_DEBUG
+	struct list_head per_task_status;
+#endif
 
 	struct ckpt_stats stats;	/* statistics */
 };
 
+#ifdef CONFIG_CHECKPOINT_DEBUG
+struct ckpt_task_status {
+	pid_t pid;
+#define RESTART_DBG_ROOT	(1 << 0)
+#define RESTART_DBG_GHOST	(1 << 1)
+#define RESTART_DBG_COORD	(1 << 2)
+#define RESTART_DBG_NORMAL	(1 << 3)
+#define RESTART_DBG_WAITING	(1 << 4)
+#define RESTART_DBG_RUNNING	(1 << 5)
+#define RESTART_DBG_FAILED	(1 << 6)
+#define RESTART_DBG_SUCCESS	(1 << 7)
+	int flags;
+	int error;
+	struct list_head list;
+};
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_CHECKPOINT_TYPES_H_ */
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] restart debug: splatter more ckpt_debugs about
       [not found] ` <20090929165342.GA10076-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-09-29 16:54   ` [PATCH 2/3] restart debug: add final process tree status Serge E. Hallyn
@ 2009-09-29 16:54   ` Serge E. Hallyn
       [not found]     ` <20090929165415.GB10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-10-01  1:53   ` [PATCH 1/3] restart: make sure all tasks are in sys_restart Oren Laadan
  2 siblings, 1 reply; 8+ messages in thread
From: Serge E. Hallyn @ 2009-09-29 16:54 UTC (permalink / raw)
  To: Linux Containers

Alas they uglify the error paths during restart, but I can't
count on two hands the number of times I've had to add these and
recompile to find one silly bug or other, so I suggest that it
may be worth having these in the code anyway.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/restart.c |   50 +++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 1085ed5..ebdd5ba 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -662,6 +662,7 @@ static int restore_activate_next(struct ckpt_ctx *ctx)
 		rcu_read_unlock();
 
 		if (!task) {
+			ckpt_debug("could not find task %d\n", pid);
 			restore_notify_error(ctx, -ESRCH);
 			return -ESRCH;
 		}
@@ -685,6 +686,7 @@ static int wait_task_active(struct ckpt_ctx *ctx)
 	ckpt_debug("active %d < %d (ret %d)\n",
 		   ctx->active_pid, ctx->nr_pids, ret);
 	if (!ret && ckpt_test_ctx_error(ctx)) {
+		ckpt_debug("wait_event_interruptible returned %d\n", ret);
 		force_sig(SIGKILL, current);
 		ret = -EBUSY;
 	}
@@ -827,20 +829,25 @@ static int do_restore_task(void)
 
 	/* wait for our turn, do the restore, and tell next task in line */
 	ret = wait_task_active(ctx);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("wait_task_active returned %d\n", ret);
 		goto out;
+	}
 
 	ckpt_debug_log_running(ctx);
 
 	zombie = restore_task(ctx);
 	if (zombie < 0) {
+		ckpt_debug("restore_task returned %d\n", ret);
 		ret = zombie;
 		goto out;
 	}
 
 	ret = restore_activate_next(ctx);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("restore_activate_next returned %d\n", ret);
 		goto out;
+	}
 
 	/*
 	 * zombie: we're done here; do_exit() will notice the @ctx on
@@ -855,6 +862,8 @@ static int do_restore_task(void)
 
 	restore_task_done(ctx);
 	ret = wait_task_sync(ctx);
+	if (ret)
+		ckpt_debug("wait_task_sync returned %d\n", ret);
  out:
 	old_ctx = xchg(&current->checkpoint_ctx, NULL);
 	if (old_ctx)
@@ -970,12 +979,14 @@ static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
 
 	BUG_ON(ctx->active_pid != -1);
 	ret = restore_activate_next(ctx);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("restore_activate_next returned %d\n", ret);
 		return ret;
+	}
 
 	ret = wait_for_completion_interruptible(&ctx->complete);
 
-	ckpt_debug("final sync kflags %#lx\n", ctx->kflags);
+	ckpt_debug("final sync kflags %#lx (ret %d)\n", ctx->kflags, ret);
 	/*
 	 * Usually when restart fails, the restarting task will first
 	 * set @ctx->errno before waking us up. In the rare event that
@@ -1051,18 +1062,24 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 	ckpt_debug_log_running(ctx);
 
 	ret = restore_read_header(ctx);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("restore_read_header returned %d\n", ret);
 		return ret;
+	}
 	ret = restore_read_tree(ctx);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("restore_read_tree returned %d\n", ret);
 		return ret;
+	}
 
 	if ((ctx->uflags & RESTART_TASKSELF) && ctx->nr_pids != 1)
 		return -EINVAL;
 
 	ret = init_restart_ctx(ctx, pid);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("init_restart_ctx returned %d\n", ret);
 		return ret;
+	}
 
 	/*
 	 * Populate own ->checkpoint_ctx: if an ancestor attempts to
@@ -1094,8 +1111,10 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 	} else {
 		/* prepare descendants' t->checkpoint_ctx point to coord */
 		ret = prepare_descendants(ctx, ctx->root_task);
-		if (ret < 0)
+		if (ret < 0) {
+			ckpt_debug("prepare_descendants returned %d", ret);
 			goto out;
+		}
 
 		/* tell tasks we're ready for them to dec  ctx->nr_running */
 		wake_up_all(&ctx->waitq);
@@ -1108,17 +1127,23 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 
 		/* wait for all other tasks to complete do_restore_task() */
 		ret = wait_all_tasks_finish(ctx);
-		if (ret < 0)
+		if (ret < 0) {
+			ckpt_debug("wait_all_tasks_finish returned %d", ret);
 			goto out;
+		}
 	}
 
 	ret = deferqueue_run(ctx->deferqueue);  /* run deferred work */
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("deferqueue_run returned %d\n", ret);
 		goto out;
+	}
 
 	ret = restore_read_tail(ctx);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_debug("restore_read_tail returned %d\n", ret);
 		goto out;
+	}
 
 	if (ctx->uflags & RESTART_FROZEN) {
 		ret = cgroup_freezer_make_frozen(ctx->root_task);
@@ -1202,6 +1227,9 @@ long do_restart(struct ckpt_ctx *ctx, pid_t pid, unsigned long flags)
 	else
 		ret = do_restore_task();
 
+	if (ret < 0)
+		ckpt_debug("ret %ld\n", ret);
+
 	/* restart(2) isn't idempotent: should not be auto-restarted */
 	if (ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
 	    ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] restart: make sure all tasks are in sys_restart
       [not found] ` <20090929165342.GA10076-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-09-29 16:54   ` [PATCH 2/3] restart debug: add final process tree status Serge E. Hallyn
  2009-09-29 16:54   ` [PATCH 3/3] restart debug: splatter more ckpt_debugs about Serge E. Hallyn
@ 2009-10-01  1:53   ` Oren Laadan
  2 siblings, 0 replies; 8+ messages in thread
From: Oren Laadan @ 2009-10-01  1:53 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers


There is no need the add another sync point at the beginning of
restart.

The bug was elsewhere - in reparent_thread() the code tested
the child's flags for PF_RESTARTING instead of the parent's.

Oren.

Serge E. Hallyn wrote:
> Make sure that all prepared tasks are in sys_restart before the
> coordinator proceeds.  Otherwise, it was possible for the last
> task actually in sys_restart to sync before the straggler was in
> sys_restart, sending that task a signal and causing that task to
> exit.  Then since there are not enough tasks completing restart,
> the restart operation ends up hanging, waiting for the killed task.
> 
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] restart debug: splatter more ckpt_debugs about
       [not found]     ` <20090929165415.GB10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-01  1:54       ` Oren Laadan
  0 siblings, 0 replies; 8+ messages in thread
From: Oren Laadan @ 2009-10-01  1:54 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers


I used this as a reference in one of patch 1/5 in the recent series.

Oren.

Serge E. Hallyn wrote:
> Alas they uglify the error paths during restart, but I can't
> count on two hands the number of times I've had to add these and
> recompile to find one silly bug or other, so I suggest that it
> may be worth having these in the code anyway.
> 
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  checkpoint/restart.c |   50 +++++++++++++++++++++++++++++++++++++++-----------
>  1 files changed, 39 insertions(+), 11 deletions(-)
> 
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 1085ed5..ebdd5ba 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -662,6 +662,7 @@ static int restore_activate_next(struct ckpt_ctx *ctx)
>  		rcu_read_unlock();
>  
>  		if (!task) {
> +			ckpt_debug("could not find task %d\n", pid);
>  			restore_notify_error(ctx, -ESRCH);
>  			return -ESRCH;
>  		}
> @@ -685,6 +686,7 @@ static int wait_task_active(struct ckpt_ctx *ctx)
>  	ckpt_debug("active %d < %d (ret %d)\n",
>  		   ctx->active_pid, ctx->nr_pids, ret);
>  	if (!ret && ckpt_test_ctx_error(ctx)) {
> +		ckpt_debug("wait_event_interruptible returned %d\n", ret);
>  		force_sig(SIGKILL, current);
>  		ret = -EBUSY;
>  	}
> @@ -827,20 +829,25 @@ static int do_restore_task(void)
>  
>  	/* wait for our turn, do the restore, and tell next task in line */
>  	ret = wait_task_active(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("wait_task_active returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	ckpt_debug_log_running(ctx);
>  
>  	zombie = restore_task(ctx);
>  	if (zombie < 0) {
> +		ckpt_debug("restore_task returned %d\n", ret);
>  		ret = zombie;
>  		goto out;
>  	}
>  
>  	ret = restore_activate_next(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_activate_next returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	/*
>  	 * zombie: we're done here; do_exit() will notice the @ctx on
> @@ -855,6 +862,8 @@ static int do_restore_task(void)
>  
>  	restore_task_done(ctx);
>  	ret = wait_task_sync(ctx);
> +	if (ret)
> +		ckpt_debug("wait_task_sync returned %d\n", ret);
>   out:
>  	old_ctx = xchg(&current->checkpoint_ctx, NULL);
>  	if (old_ctx)
> @@ -970,12 +979,14 @@ static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
>  
>  	BUG_ON(ctx->active_pid != -1);
>  	ret = restore_activate_next(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_activate_next returned %d\n", ret);
>  		return ret;
> +	}
>  
>  	ret = wait_for_completion_interruptible(&ctx->complete);
>  
> -	ckpt_debug("final sync kflags %#lx\n", ctx->kflags);
> +	ckpt_debug("final sync kflags %#lx (ret %d)\n", ctx->kflags, ret);
>  	/*
>  	 * Usually when restart fails, the restarting task will first
>  	 * set @ctx->errno before waking us up. In the rare event that
> @@ -1051,18 +1062,24 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  	ckpt_debug_log_running(ctx);
>  
>  	ret = restore_read_header(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_read_header returned %d\n", ret);
>  		return ret;
> +	}
>  	ret = restore_read_tree(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_read_tree returned %d\n", ret);
>  		return ret;
> +	}
>  
>  	if ((ctx->uflags & RESTART_TASKSELF) && ctx->nr_pids != 1)
>  		return -EINVAL;
>  
>  	ret = init_restart_ctx(ctx, pid);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("init_restart_ctx returned %d\n", ret);
>  		return ret;
> +	}
>  
>  	/*
>  	 * Populate own ->checkpoint_ctx: if an ancestor attempts to
> @@ -1094,8 +1111,10 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  	} else {
>  		/* prepare descendants' t->checkpoint_ctx point to coord */
>  		ret = prepare_descendants(ctx, ctx->root_task);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			ckpt_debug("prepare_descendants returned %d", ret);
>  			goto out;
> +		}
>  
>  		/* tell tasks we're ready for them to dec  ctx->nr_running */
>  		wake_up_all(&ctx->waitq);
> @@ -1108,17 +1127,23 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  
>  		/* wait for all other tasks to complete do_restore_task() */
>  		ret = wait_all_tasks_finish(ctx);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			ckpt_debug("wait_all_tasks_finish returned %d", ret);
>  			goto out;
> +		}
>  	}
>  
>  	ret = deferqueue_run(ctx->deferqueue);  /* run deferred work */
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("deferqueue_run returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	ret = restore_read_tail(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_read_tail returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	if (ctx->uflags & RESTART_FROZEN) {
>  		ret = cgroup_freezer_make_frozen(ctx->root_task);
> @@ -1202,6 +1227,9 @@ long do_restart(struct ckpt_ctx *ctx, pid_t pid, unsigned long flags)
>  	else
>  		ret = do_restore_task();
>  
> +	if (ret < 0)
> +		ckpt_debug("ret %ld\n", ret);
> +
>  	/* restart(2) isn't idempotent: should not be auto-restarted */
>  	if (ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
>  	    ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] restart debug: add final process tree status
       [not found]     ` <20090929165402.GA10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-01  1:57       ` Oren Laadan
       [not found]         ` <4AC40CA0.8020305-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Oren Laadan @ 2009-10-01  1:57 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers



Serge E. Hallyn wrote:
> Have tasks in sys_restart keep some status in a list off
> of checkpoint_ctx, and print this info when the checkpoint_ctx
> is freed.
> 
> This is mostly an RFC - in particular the error tracking is
> pretty half-hearted so far.  But the info it does spit out
> helped me to figured out the coordinator syncing problem
> fixed by the previous patch.
> 
> Sample dmesg output:
> [4568:4568:c/r:free_per_task_status:200] 4 tasks registered, nr_tasks was 0 nr_total 0
> [4568:4568:c/r:free_per_task_status:202] active pid was 1, ctx->errno 0
> [4568:4568:c/r:free_per_task_status:204] kflags 6 uflags 0 oflags 1
> [4568:4568:c/r:free_per_task_status:206] task 0 to run was 4568
> [4568:4568:c/r:free_per_task_status:209] pid 4566
> [4568:4568:c/r:free_per_task_status:211] it was coordinator
> [4568:4568:c/r:free_per_task_status:219] it was running
> [4568:4568:c/r:free_per_task_status:209] pid 4570
> [4568:4568:c/r:free_per_task_status:213] it was a ghost
> [4568:4568:c/r:free_per_task_status:209] pid 4569
> [4568:4568:c/r:free_per_task_status:213] it was a ghost
> [4568:4568:c/r:free_per_task_status:209] pid 4568
> [4568:4568:c/r:free_per_task_status:215] it was the root task
> [4568:4568:c/r:free_per_task_status:221] it was a normal task
> 
> So, when one task died before hitting sys_restart, the first line would
> show '3 tasks registered'.
> 
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

[...]

This looks pretty useful. Any chance you can you make it work on top
of the 5-patch series I posted ... ?

Thanks,

Oren.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] restart debug: add final process tree status
       [not found]         ` <4AC40CA0.8020305-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-10-01 15:33           ` Serge E. Hallyn
       [not found]             ` <20091001153356.GA20565-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Serge E. Hallyn @ 2009-10-01 15:33 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> 
> 
> Serge E. Hallyn wrote:
> > Have tasks in sys_restart keep some status in a list off
> > of checkpoint_ctx, and print this info when the checkpoint_ctx
> > is freed.
> > 
> > This is mostly an RFC - in particular the error tracking is
> > pretty half-hearted so far.  But the info it does spit out
> > helped me to figured out the coordinator syncing problem
> > fixed by the previous patch.
> > 
> > Sample dmesg output:
> > [4568:4568:c/r:free_per_task_status:200] 4 tasks registered, nr_tasks was 0 nr_total 0
> > [4568:4568:c/r:free_per_task_status:202] active pid was 1, ctx->errno 0
> > [4568:4568:c/r:free_per_task_status:204] kflags 6 uflags 0 oflags 1
> > [4568:4568:c/r:free_per_task_status:206] task 0 to run was 4568
> > [4568:4568:c/r:free_per_task_status:209] pid 4566
> > [4568:4568:c/r:free_per_task_status:211] it was coordinator
> > [4568:4568:c/r:free_per_task_status:219] it was running
> > [4568:4568:c/r:free_per_task_status:209] pid 4570
> > [4568:4568:c/r:free_per_task_status:213] it was a ghost
> > [4568:4568:c/r:free_per_task_status:209] pid 4569
> > [4568:4568:c/r:free_per_task_status:213] it was a ghost
> > [4568:4568:c/r:free_per_task_status:209] pid 4568
> > [4568:4568:c/r:free_per_task_status:215] it was the root task
> > [4568:4568:c/r:free_per_task_status:221] it was a normal task
> > 
> > So, when one task died before hitting sys_restart, the first line would
> > show '3 tasks registered'.
> > 
> > Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> 
> [...]
> 
> This looks pretty useful. Any chance you can you make it work on top
> of the 5-patch series I posted ... ?

Here:

From 8cf006a1bf26a4b280841401302c99689d629e0a Mon Sep 17 00:00:00 2001
From: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Date: Thu, 1 Oct 2009 11:09:40 -0400
Subject: [PATCH 1/1] restart debug: add final process tree status (v2)

Have tasks in sys_restart keep some status in a list off
of checkpoint_ctx, and print this info when the checkpoint_ctx
is freed.

This version is mainly just ported against ckpt-v18-hallyn.

Sample output:

[3519:2:c/r:free_per_task_status:207] 3 tasks registered, nr_tasks was 0 nr_total 0
[3519:2:c/r:free_per_task_status:210] active pid was 1, ctx->errno 0
[3519:2:c/r:free_per_task_status:212] kflags 6 uflags 0 oflags 1
[3519:2:c/r:free_per_task_status:214] task 0 to run was 2
[3519:2:c/r:free_per_task_status:217] pid 3517
[3519:2:c/r:free_per_task_status:219] it was coordinator
[3519:2:c/r:free_per_task_status:227] it was running
[3519:2:c/r:free_per_task_status:217] pid 3519
[3519:2:c/r:free_per_task_status:223] it was the root task
[3519:2:c/r:free_per_task_status:229] it was a normal task
[3519:2:c/r:free_per_task_status:217] pid 3520
[3519:2:c/r:free_per_task_status:221] it was a ghost

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/restart.c             |  106 ++++++++++++++++++++++++++++++++++++++
 checkpoint/sys.c                 |   57 ++++++++++++++++++++
 include/linux/checkpoint_types.h |   20 +++++++
 3 files changed, 183 insertions(+), 0 deletions(-)

diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index b12c8bd..1f356c0 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -26,6 +26,98 @@
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 
+#ifdef CONFIG_CHECKPOINT_DEBUG
+static struct ckpt_task_status *ckpt_debug_checkin(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+	s->pid = current->pid;
+	s->error = 0;
+	s->flags = RESTART_DBG_WAITING;
+	if (current == ctx->root_task)
+		s->flags |= RESTART_DBG_ROOT;
+	list_add_tail(&s->list, &ctx->per_task_status);
+	return s;
+}
+
+static struct ckpt_task_status *getme(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s = NULL;
+	list_for_each_entry(s, &ctx->per_task_status, list) {
+		if (s->pid == current->pid)
+			break;
+	}
+	if (!s || s->pid != current->pid)
+		return NULL;
+	return s;
+}
+
+static void ckpt_debug_coord(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+
+	s = ckpt_debug_checkin(ctx);
+	if (s)
+		s->flags |= RESTART_DBG_COORD;
+}
+
+static void ckpt_debug_ghost(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+
+	s = ckpt_debug_checkin(ctx);
+	if (s)
+		s->flags |= RESTART_DBG_GHOST;
+}
+
+static void ckpt_debug_normal(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s;
+
+	s = ckpt_debug_checkin(ctx);
+	if (s)
+		s->flags |= RESTART_DBG_NORMAL;
+}
+
+static void ckpt_debug_log_error(struct ckpt_ctx *ctx, int err)
+{
+	struct ckpt_task_status *s = getme(ctx);
+	if (!s)
+		return;
+	s->error = err;
+	s->flags &= ~RESTART_DBG_WAITING;
+	s->flags &= ~RESTART_DBG_RUNNING;
+	if (err)
+		s->flags |= RESTART_DBG_FAILED;
+	else
+		s->flags |= RESTART_DBG_SUCCESS;
+}
+
+static void ckpt_debug_log_running(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s = getme(ctx);
+	if (!s)
+		return;
+	s->flags &= ~RESTART_DBG_WAITING;
+	s->flags |= RESTART_DBG_RUNNING;
+}
+#else
+static inline void ckpt_debug_checkin(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_coord(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_ghost(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_normal(struct ckpt_ctx *ctx)
+{}
+static inline void ckpt_debug_log_error(struct ckpt_ctx *ctx, int err)
+{}
+static inline void ckpt_debug_log_running(struct ckpt_ctx *ctx)
+{}
+#endif
+
 static int _ckpt_read_err(struct ckpt_ctx *ctx, struct ckpt_hdr *h)
 {
 	char *ptr;
@@ -680,11 +772,17 @@ static int do_ghost_task(void)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	ckpt_debug_ghost(ctx);
+
+	ckpt_debug_log_running(ctx);
+
 	current->flags |= PF_RESTARTING;
 
 	ret = wait_event_interruptible(ctx->ghostq,
 				       all_tasks_activated(ctx) ||
 				       ckpt_test_ctx_error(ctx));
+
+	ckpt_debug_log_error(ctx, 0);
 	if (ret < 0)
 		restore_notify_error(ctx, ret);
 
@@ -752,6 +850,8 @@ static int do_restore_task(void)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	ckpt_debug_normal(ctx);
+
 	current->flags |= PF_RESTARTING;
 
 	ret = wait_sync_threads();
@@ -767,6 +867,8 @@ static int do_restore_task(void)
 	if (ret < 0)
 		goto out;
 
+	ckpt_debug_log_running(ctx);
+
 	zombie = restore_task(ctx);
 	if (zombie < 0) {
 		ret = zombie;
@@ -791,6 +893,7 @@ static int do_restore_task(void)
 	restore_task_done(ctx);
 	ret = wait_task_sync(ctx);
  out:
+	ckpt_debug_log_error(ctx, ret);
 	if (ret < 0)
 		restore_notify_error(ctx, ret);
 
@@ -964,6 +1067,9 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 {
 	int ret;
 
+	ckpt_debug_coord(ctx);
+	ckpt_debug_log_running(ctx);
+
 	ret = restore_read_header(ctx);
 	ckpt_debug("restore header: %d\n", ret);
 	if (ret < 0)
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 7604089..b98812b 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -188,10 +188,64 @@ static void task_arr_free(struct ckpt_ctx *ctx)
 	kfree(ctx->tasks_arr);
 }
 
+#ifdef CONFIG_CHECKPOINT_DEBUG
+static void free_per_task_status(struct ckpt_ctx *ctx)
+{
+	struct ckpt_task_status *s, *p;
+	int i, count = 0;
+
+	/* The per-task debug info is for restart only */
+	if (!(ctx->kflags & CKPT_CTX_RESTART))
+		return;
+
+	/* See how many tasks registered.  Tasks which didn't reach
+	 * sys_restart() won't have registered.  So if this count is
+	 * not the same as ctx->nr_total, that's a warning bell */
+	list_for_each_entry(s, &ctx->per_task_status, list)
+		count++;
+	ckpt_debug("%d tasks registered, nr_tasks was %d nr_total %d\n",
+		count, ctx->nr_tasks, atomic_read(&ctx->nr_total));
+
+	ckpt_debug("active pid was %d, ctx->errno %d\n", ctx->active_pid,
+		ctx->errno);
+	ckpt_debug("kflags %lu uflags %lu oflags %lu", ctx->kflags,
+		ctx->uflags, ctx->oflags);
+	for (i = 0; i < ctx->active_pid; i++)
+		ckpt_debug("task %d to run was %d\n", i, ctx->pids_arr[i].vpid);
+
+	list_for_each_entry_safe(s, p, &ctx->per_task_status, list) {
+		ckpt_debug("pid %d\n", s->pid);
+		if (s->flags & RESTART_DBG_COORD)
+			ckpt_debug("it was coordinator\n");
+		if (s->flags & RESTART_DBG_GHOST)
+			ckpt_debug("it was a ghost\n");
+		if (s->flags & RESTART_DBG_ROOT)
+			ckpt_debug("it was the root task\n");
+		if (s->flags & RESTART_DBG_WAITING)
+			ckpt_debug("it was still waiting to run restart\n");
+		if (s->flags & RESTART_DBG_RUNNING)
+			ckpt_debug("it was running\n");
+		if (s->flags & RESTART_DBG_NORMAL)
+			ckpt_debug("it was a normal task\n");
+		if (s->flags & RESTART_DBG_FAILED)
+			ckpt_debug("it finished with error %d\n", s->error);
+		if (s->flags & RESTART_DBG_FAILED)
+			ckpt_debug("it finished successfully");
+		list_del(&s->list);
+		kfree(s);
+	}
+}
+#else
+static inline void free_per_task_status(struct ckpt_ctx *ctx)
+{ }
+#endif
+
 static void ckpt_ctx_free(struct ckpt_ctx *ctx)
 {
 	BUG_ON(atomic_read(&ctx->refcount));
 
+	free_per_task_status(ctx);
+
 	if (ctx->deferqueue)
 		deferqueue_destroy(ctx->deferqueue);
 
@@ -235,6 +289,9 @@ static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned long uflags,
 	ctx->ktime_begin = ktime_get();
 
 	atomic_set(&ctx->refcount, 0);
+#ifdef CONFIG_CHECKPOINT_DEBUG
+	INIT_LIST_HEAD(&ctx->per_task_status);
+#endif
 	INIT_LIST_HEAD(&ctx->pgarr_list);
 	INIT_LIST_HEAD(&ctx->pgarr_pool);
 	init_waitqueue_head(&ctx->waitq);
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index b9393f4..198d4e9 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -78,10 +78,30 @@ struct ckpt_ctx {
 	wait_queue_head_t waitq;	/* waitqueue for restarting tasks */
 	wait_queue_head_t ghostq;	/* waitqueue for ghost tasks */
 	struct cred *realcred, *ecred;	/* tmp storage for cred at restart */
+#ifdef CONFIG_CHECKPOINT_DEBUG
+	struct list_head per_task_status; /* list of status for each task */
+#endif
 
 	struct ckpt_stats stats;	/* statistics */
 };
 
+#ifdef CONFIG_CHECKPOINT_DEBUG
+struct ckpt_task_status {
+	pid_t pid;
+#define RESTART_DBG_ROOT	(1 << 0)
+#define RESTART_DBG_GHOST	(1 << 1)
+#define RESTART_DBG_COORD	(1 << 2)
+#define RESTART_DBG_NORMAL	(1 << 3)
+#define RESTART_DBG_WAITING	(1 << 4)
+#define RESTART_DBG_RUNNING	(1 << 5)
+#define RESTART_DBG_FAILED	(1 << 6)
+#define RESTART_DBG_SUCCESS	(1 << 7)
+	int flags;
+	int error;
+	struct list_head list;
+};
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_CHECKPOINT_TYPES_H_ */
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] restart debug: add final process tree status
       [not found]             ` <20091001153356.GA20565-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-01 23:29               ` Oren Laadan
  0 siblings, 0 replies; 8+ messages in thread
From: Oren Laadan @ 2009-10-01 23:29 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers



Serge E. Hallyn wrote:
> 
> Here:
> 
> From 8cf006a1bf26a4b280841401302c99689d629e0a Mon Sep 17 00:00:00 2001
> From: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> Date: Thu, 1 Oct 2009 11:09:40 -0400
> Subject: [PATCH 1/1] restart debug: add final process tree status (v2)
> 
> Have tasks in sys_restart keep some status in a list off
> of checkpoint_ctx, and print this info when the checkpoint_ctx
> is freed.
> 
> This version is mainly just ported against ckpt-v18-hallyn.
> 
> Sample output:
> 
> [3519:2:c/r:free_per_task_status:207] 3 tasks registered, nr_tasks was 0 nr_total 0
> [3519:2:c/r:free_per_task_status:210] active pid was 1, ctx->errno 0
> [3519:2:c/r:free_per_task_status:212] kflags 6 uflags 0 oflags 1
> [3519:2:c/r:free_per_task_status:214] task 0 to run was 2
> [3519:2:c/r:free_per_task_status:217] pid 3517
> [3519:2:c/r:free_per_task_status:219] it was coordinator
> [3519:2:c/r:free_per_task_status:227] it was running
> [3519:2:c/r:free_per_task_status:217] pid 3519
> [3519:2:c/r:free_per_task_status:223] it was the root task
> [3519:2:c/r:free_per_task_status:229] it was a normal task
> [3519:2:c/r:free_per_task_status:217] pid 3520
> [3519:2:c/r:free_per_task_status:221] it was a ghost
> 
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

Looks good.. I'll massage it a bit and add. Meanwhile, a
couple of questions:

[...]

> ---
>  checkpoint/restart.c             |  106 ++++++++++++++++++++++++++++++++++++++
>  checkpoint/sys.c                 |   57 ++++++++++++++++++++
>  include/linux/checkpoint_types.h |   20 +++++++
>  3 files changed, 183 insertions(+), 0 deletions(-)
> 
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index b12c8bd..1f356c0 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -26,6 +26,98 @@
>  #include <linux/checkpoint.h>
>  #include <linux/checkpoint_hdr.h>
>  
> +#ifdef CONFIG_CHECKPOINT_DEBUG
> +static struct ckpt_task_status *ckpt_debug_checkin(struct ckpt_ctx *ctx)
> +{
> +	struct ckpt_task_status *s;
> +	s = kmalloc(sizeof(*s), GFP_KERNEL);
> +	if (!s)
> +		return NULL;
> +	s->pid = current->pid;
> +	s->error = 0;
> +	s->flags = RESTART_DBG_WAITING;
> +	if (current == ctx->root_task)
> +		s->flags |= RESTART_DBG_ROOT;
> +	list_add_tail(&s->list, &ctx->per_task_status);
> +	return s;
> +}

The logic would be a bit simpler if you allow check-in to fail
(and then fail the restart) - you then don't need to test for
validity of @s everywhere.

> +
> +static struct ckpt_task_status *getme(struct ckpt_ctx *ctx)
> +{
> +	struct ckpt_task_status *s = NULL;
> +	list_for_each_entry(s, &ctx->per_task_status, list) {
> +		if (s->pid == current->pid)
> +			break;
> +	}
> +	if (!s || s->pid != current->pid)
> +		return NULL;

Note that here @s is never NULL.

[...]

> @@ -680,11 +772,17 @@ static int do_ghost_task(void)
>  	if (IS_ERR(ctx))
>  		return PTR_ERR(ctx);
>  
> +	ckpt_debug_ghost(ctx);
> +
> +	ckpt_debug_log_running(ctx);
> +
>  	current->flags |= PF_RESTARTING;
>  
>  	ret = wait_event_interruptible(ctx->ghostq,
>  				       all_tasks_activated(ctx) ||
>  				       ckpt_test_ctx_error(ctx));
> +
> +	ckpt_debug_log_error(ctx, 0);

Did you mean s/0/ret/ ?

[...]

> +	list_for_each_entry_safe(s, p, &ctx->per_task_status, list) {
> +		ckpt_debug("pid %d\n", s->pid);
> +		if (s->flags & RESTART_DBG_COORD)
> +			ckpt_debug("it was coordinator\n");
> +		if (s->flags & RESTART_DBG_GHOST)
> +			ckpt_debug("it was a ghost\n");
> +		if (s->flags & RESTART_DBG_ROOT)
> +			ckpt_debug("it was the root task\n");
> +		if (s->flags & RESTART_DBG_WAITING)
> +			ckpt_debug("it was still waiting to run restart\n");
> +		if (s->flags & RESTART_DBG_RUNNING)
> +			ckpt_debug("it was running\n");
> +		if (s->flags & RESTART_DBG_NORMAL)
> +			ckpt_debug("it was a normal task\n");
> +		if (s->flags & RESTART_DBG_FAILED)
> +			ckpt_debug("it finished with error %d\n", s->error);
> +		if (s->flags & RESTART_DBG_FAILED)

s/FAILED/SUCCESS/ ... :p

[...]

Oren.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-10-01 23:29 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-29 16:53 [PATCH 1/3] restart: make sure all tasks are in sys_restart Serge E. Hallyn
     [not found] ` <20090929165342.GA10076-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-09-29 16:54   ` [PATCH 2/3] restart debug: add final process tree status Serge E. Hallyn
     [not found]     ` <20090929165402.GA10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01  1:57       ` Oren Laadan
     [not found]         ` <4AC40CA0.8020305-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 15:33           ` Serge E. Hallyn
     [not found]             ` <20091001153356.GA20565-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01 23:29               ` Oren Laadan
2009-09-29 16:54   ` [PATCH 3/3] restart debug: splatter more ckpt_debugs about Serge E. Hallyn
     [not found]     ` <20090929165415.GB10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01  1:54       ` Oren Laadan
2009-10-01  1:53   ` [PATCH 1/3] restart: make sure all tasks are in sys_restart Oren Laadan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.