From: Oren Laadan <orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
To: "Serge E. Hallyn" <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: Linux Containers <containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org>
Subject: Re: [PATCH 3/3] restart debug: splatter more ckpt_debugs about
Date: Wed, 30 Sep 2009 21:54:09 -0400 [thread overview]
Message-ID: <4AC40BC1.6010802@librato.com> (raw)
In-Reply-To: <20090929165415.GB10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
I used this as a reference in one of patch 1/5 in the recent series.
Oren.
Serge E. Hallyn wrote:
> Alas they uglify the error paths during restart, but I can't
> count on two hands the number of times I've had to add these and
> recompile to find one silly bug or other, so I suggest that it
> may be worth having these in the code anyway.
>
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
> checkpoint/restart.c | 50 +++++++++++++++++++++++++++++++++++++++-----------
> 1 files changed, 39 insertions(+), 11 deletions(-)
>
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 1085ed5..ebdd5ba 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -662,6 +662,7 @@ static int restore_activate_next(struct ckpt_ctx *ctx)
> rcu_read_unlock();
>
> if (!task) {
> + ckpt_debug("could not find task %d\n", pid);
> restore_notify_error(ctx, -ESRCH);
> return -ESRCH;
> }
> @@ -685,6 +686,7 @@ static int wait_task_active(struct ckpt_ctx *ctx)
> ckpt_debug("active %d < %d (ret %d)\n",
> ctx->active_pid, ctx->nr_pids, ret);
> if (!ret && ckpt_test_ctx_error(ctx)) {
> + ckpt_debug("wait_event_interruptible returned %d\n", ret);
> force_sig(SIGKILL, current);
> ret = -EBUSY;
> }
> @@ -827,20 +829,25 @@ static int do_restore_task(void)
>
> /* wait for our turn, do the restore, and tell next task in line */
> ret = wait_task_active(ctx);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("wait_task_active returned %d\n", ret);
> goto out;
> + }
>
> ckpt_debug_log_running(ctx);
>
> zombie = restore_task(ctx);
> if (zombie < 0) {
> + ckpt_debug("restore_task returned %d\n", ret);
> ret = zombie;
> goto out;
> }
>
> ret = restore_activate_next(ctx);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("restore_activate_next returned %d\n", ret);
> goto out;
> + }
>
> /*
> * zombie: we're done here; do_exit() will notice the @ctx on
> @@ -855,6 +862,8 @@ static int do_restore_task(void)
>
> restore_task_done(ctx);
> ret = wait_task_sync(ctx);
> + if (ret)
> + ckpt_debug("wait_task_sync returned %d\n", ret);
> out:
> old_ctx = xchg(¤t->checkpoint_ctx, NULL);
> if (old_ctx)
> @@ -970,12 +979,14 @@ static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
>
> BUG_ON(ctx->active_pid != -1);
> ret = restore_activate_next(ctx);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("restore_activate_next returned %d\n", ret);
> return ret;
> + }
>
> ret = wait_for_completion_interruptible(&ctx->complete);
>
> - ckpt_debug("final sync kflags %#lx\n", ctx->kflags);
> + ckpt_debug("final sync kflags %#lx (ret %d)\n", ctx->kflags, ret);
> /*
> * Usually when restart fails, the restarting task will first
> * set @ctx->errno before waking us up. In the rare event that
> @@ -1051,18 +1062,24 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
> ckpt_debug_log_running(ctx);
>
> ret = restore_read_header(ctx);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("restore_read_header returned %d\n", ret);
> return ret;
> + }
> ret = restore_read_tree(ctx);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("restore_read_tree returned %d\n", ret);
> return ret;
> + }
>
> if ((ctx->uflags & RESTART_TASKSELF) && ctx->nr_pids != 1)
> return -EINVAL;
>
> ret = init_restart_ctx(ctx, pid);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("init_restart_ctx returned %d\n", ret);
> return ret;
> + }
>
> /*
> * Populate own ->checkpoint_ctx: if an ancestor attempts to
> @@ -1094,8 +1111,10 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
> } else {
> /* prepare descendants' t->checkpoint_ctx point to coord */
> ret = prepare_descendants(ctx, ctx->root_task);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("prepare_descendants returned %d", ret);
> goto out;
> + }
>
> /* tell tasks we're ready for them to dec ctx->nr_running */
> wake_up_all(&ctx->waitq);
> @@ -1108,17 +1127,23 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>
> /* wait for all other tasks to complete do_restore_task() */
> ret = wait_all_tasks_finish(ctx);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("wait_all_tasks_finish returned %d", ret);
> goto out;
> + }
> }
>
> ret = deferqueue_run(ctx->deferqueue); /* run deferred work */
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("deferqueue_run returned %d\n", ret);
> goto out;
> + }
>
> ret = restore_read_tail(ctx);
> - if (ret < 0)
> + if (ret < 0) {
> + ckpt_debug("restore_read_tail returned %d\n", ret);
> goto out;
> + }
>
> if (ctx->uflags & RESTART_FROZEN) {
> ret = cgroup_freezer_make_frozen(ctx->root_task);
> @@ -1202,6 +1227,9 @@ long do_restart(struct ckpt_ctx *ctx, pid_t pid, unsigned long flags)
> else
> ret = do_restore_task();
>
> + if (ret < 0)
> + ckpt_debug("ret %ld\n", ret);
> +
> /* restart(2) isn't idempotent: should not be auto-restarted */
> if (ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
> ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)
next prev parent reply other threads:[~2009-10-01 1:54 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-29 16:53 [PATCH 1/3] restart: make sure all tasks are in sys_restart Serge E. Hallyn
[not found] ` <20090929165342.GA10076-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-09-29 16:54 ` [PATCH 2/3] restart debug: add final process tree status Serge E. Hallyn
[not found] ` <20090929165402.GA10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01 1:57 ` Oren Laadan
[not found] ` <4AC40CA0.8020305-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 15:33 ` Serge E. Hallyn
[not found] ` <20091001153356.GA20565-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01 23:29 ` Oren Laadan
2009-09-29 16:54 ` [PATCH 3/3] restart debug: splatter more ckpt_debugs about Serge E. Hallyn
[not found] ` <20090929165415.GB10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01 1:54 ` Oren Laadan [this message]
2009-10-01 1:53 ` [PATCH 1/3] restart: make sure all tasks are in sys_restart Oren Laadan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4AC40BC1.6010802@librato.com \
--to=orenl-rdfvbdnroixbdgjk7y7tuq@public.gmane.org \
--cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
--cc=serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox