All of lore.kernel.org
 help / color / mirror / Atom feed
From: Oren Laadan <orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
To: "Serge E. Hallyn" <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: Linux Containers <containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org>
Subject: Re: [PATCH 3/3] restart debug: splatter more ckpt_debugs about
Date: Wed, 30 Sep 2009 21:54:09 -0400	[thread overview]
Message-ID: <4AC40BC1.6010802@librato.com> (raw)
In-Reply-To: <20090929165415.GB10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>


I used this as a reference in one of patch 1/5 in the recent series.

Oren.

Serge E. Hallyn wrote:
> Alas they uglify the error paths during restart, but I can't
> count on two hands the number of times I've had to add these and
> recompile to find one silly bug or other, so I suggest that it
> may be worth having these in the code anyway.
> 
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  checkpoint/restart.c |   50 +++++++++++++++++++++++++++++++++++++++-----------
>  1 files changed, 39 insertions(+), 11 deletions(-)
> 
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 1085ed5..ebdd5ba 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -662,6 +662,7 @@ static int restore_activate_next(struct ckpt_ctx *ctx)
>  		rcu_read_unlock();
>  
>  		if (!task) {
> +			ckpt_debug("could not find task %d\n", pid);
>  			restore_notify_error(ctx, -ESRCH);
>  			return -ESRCH;
>  		}
> @@ -685,6 +686,7 @@ static int wait_task_active(struct ckpt_ctx *ctx)
>  	ckpt_debug("active %d < %d (ret %d)\n",
>  		   ctx->active_pid, ctx->nr_pids, ret);
>  	if (!ret && ckpt_test_ctx_error(ctx)) {
> +		ckpt_debug("wait_event_interruptible returned %d\n", ret);
>  		force_sig(SIGKILL, current);
>  		ret = -EBUSY;
>  	}
> @@ -827,20 +829,25 @@ static int do_restore_task(void)
>  
>  	/* wait for our turn, do the restore, and tell next task in line */
>  	ret = wait_task_active(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("wait_task_active returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	ckpt_debug_log_running(ctx);
>  
>  	zombie = restore_task(ctx);
>  	if (zombie < 0) {
> +		ckpt_debug("restore_task returned %d\n", ret);
>  		ret = zombie;
>  		goto out;
>  	}
>  
>  	ret = restore_activate_next(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_activate_next returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	/*
>  	 * zombie: we're done here; do_exit() will notice the @ctx on
> @@ -855,6 +862,8 @@ static int do_restore_task(void)
>  
>  	restore_task_done(ctx);
>  	ret = wait_task_sync(ctx);
> +	if (ret)
> +		ckpt_debug("wait_task_sync returned %d\n", ret);
>   out:
>  	old_ctx = xchg(&current->checkpoint_ctx, NULL);
>  	if (old_ctx)
> @@ -970,12 +979,14 @@ static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
>  
>  	BUG_ON(ctx->active_pid != -1);
>  	ret = restore_activate_next(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_activate_next returned %d\n", ret);
>  		return ret;
> +	}
>  
>  	ret = wait_for_completion_interruptible(&ctx->complete);
>  
> -	ckpt_debug("final sync kflags %#lx\n", ctx->kflags);
> +	ckpt_debug("final sync kflags %#lx (ret %d)\n", ctx->kflags, ret);
>  	/*
>  	 * Usually when restart fails, the restarting task will first
>  	 * set @ctx->errno before waking us up. In the rare event that
> @@ -1051,18 +1062,24 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  	ckpt_debug_log_running(ctx);
>  
>  	ret = restore_read_header(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_read_header returned %d\n", ret);
>  		return ret;
> +	}
>  	ret = restore_read_tree(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_read_tree returned %d\n", ret);
>  		return ret;
> +	}
>  
>  	if ((ctx->uflags & RESTART_TASKSELF) && ctx->nr_pids != 1)
>  		return -EINVAL;
>  
>  	ret = init_restart_ctx(ctx, pid);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("init_restart_ctx returned %d\n", ret);
>  		return ret;
> +	}
>  
>  	/*
>  	 * Populate own ->checkpoint_ctx: if an ancestor attempts to
> @@ -1094,8 +1111,10 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  	} else {
>  		/* prepare descendants' t->checkpoint_ctx point to coord */
>  		ret = prepare_descendants(ctx, ctx->root_task);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			ckpt_debug("prepare_descendants returned %d", ret);
>  			goto out;
> +		}
>  
>  		/* tell tasks we're ready for them to dec  ctx->nr_running */
>  		wake_up_all(&ctx->waitq);
> @@ -1108,17 +1127,23 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  
>  		/* wait for all other tasks to complete do_restore_task() */
>  		ret = wait_all_tasks_finish(ctx);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			ckpt_debug("wait_all_tasks_finish returned %d", ret);
>  			goto out;
> +		}
>  	}
>  
>  	ret = deferqueue_run(ctx->deferqueue);  /* run deferred work */
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("deferqueue_run returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	ret = restore_read_tail(ctx);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		ckpt_debug("restore_read_tail returned %d\n", ret);
>  		goto out;
> +	}
>  
>  	if (ctx->uflags & RESTART_FROZEN) {
>  		ret = cgroup_freezer_make_frozen(ctx->root_task);
> @@ -1202,6 +1227,9 @@ long do_restart(struct ckpt_ctx *ctx, pid_t pid, unsigned long flags)
>  	else
>  		ret = do_restore_task();
>  
> +	if (ret < 0)
> +		ckpt_debug("ret %ld\n", ret);
> +
>  	/* restart(2) isn't idempotent: should not be auto-restarted */
>  	if (ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
>  	    ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)

  parent reply	other threads:[~2009-10-01  1:54 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-29 16:53 [PATCH 1/3] restart: make sure all tasks are in sys_restart Serge E. Hallyn
     [not found] ` <20090929165342.GA10076-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-09-29 16:54   ` [PATCH 2/3] restart debug: add final process tree status Serge E. Hallyn
     [not found]     ` <20090929165402.GA10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01  1:57       ` Oren Laadan
     [not found]         ` <4AC40CA0.8020305-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 15:33           ` Serge E. Hallyn
     [not found]             ` <20091001153356.GA20565-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01 23:29               ` Oren Laadan
2009-09-29 16:54   ` [PATCH 3/3] restart debug: splatter more ckpt_debugs about Serge E. Hallyn
     [not found]     ` <20090929165415.GB10114-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01  1:54       ` Oren Laadan [this message]
2009-10-01  1:53   ` [PATCH 1/3] restart: make sure all tasks are in sys_restart Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4AC40BC1.6010802@librato.com \
    --to=orenl-rdfvbdnroixbdgjk7y7tuq@public.gmane.org \
    --cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
    --cc=serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.