From: "Serge E. Hallyn" <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
To: Oren Laadan <orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
Subject: Re: [PATCH 3/5] c/r: introduce walk_task_subtree() to iterate through descendants
Date: Thu, 1 Oct 2009 11:41:09 -0500 [thread overview]
Message-ID: <20091001164109.GD20565@us.ibm.com> (raw)
In-Reply-To: <1254361634-30076-4-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> Introduce a helper to iterate over the descendants of a given
> task. The prototype is:
>
> int walk_task_subtree(struct task_struct *root,
> int (*func)(struct task_struct *, void *),
> void *data)
>
> The function will start with @root, and iterate through all the
> descendants, including threads, in a DFS manner. Children of a task
> are traversed before proceeding to the next thread of that task.
>
> For each task, the callback @func will be called providing the task
> pointer and the @data. The callback is invoked while holding the
> tasklist_lock for reading. If the callback fails it should return a
> negative error, and the traversal ends. If the callback succeeds, it
> returns a non-negative number, and these values are summed.
>
> On success, walk_task_subtree() returns the total summed. On failure,
> it returns a negative value.
>
> The code in checkpoint/checkpoint.c and checkpoint/restart.c has
> been converted to use this helper.
>
> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
> checkpoint/checkpoint.c | 97 ++++++++++++++--------------------------
> checkpoint/restart.c | 105 +++++++++++++++++++-------------------------
> checkpoint/sys.c | 55 +++++++++++++++++++++++
> include/linux/checkpoint.h | 3 +
> 4 files changed, 137 insertions(+), 123 deletions(-)
>
> diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
> index dbe9e10..1eeb557 100644
> --- a/checkpoint/checkpoint.c
> +++ b/checkpoint/checkpoint.c
> @@ -520,80 +520,51 @@ static int collect_objects(struct ckpt_ctx *ctx)
> return ret;
> }
>
> +struct ckpt_cnt_tasks {
> + struct ckpt_ctx *ctx;
> + int nr;
> +};
> +
> /* count number of tasks in tree (and optionally fill pid's in array) */
> -static int tree_count_tasks(struct ckpt_ctx *ctx)
> +static int __tree_count_tasks(struct task_struct *task, void *data)
> {
> - struct task_struct *root;
> - struct task_struct *task;
> - struct task_struct *parent;
> - struct task_struct **tasks_arr = ctx->tasks_arr;
> - int nr_tasks = ctx->nr_tasks;
> - int nr = 0;
> + struct ckpt_cnt_tasks *d = (struct ckpt_cnt_tasks *) data;
> + struct ckpt_ctx *ctx = d->ctx;
> int ret;
>
> - read_lock(&tasklist_lock);
> -
> - /* we hold the lock, so root_task->real_parent can't change */
> - task = ctx->root_task;
> - if (ctx->root_init) {
> - /* container-init: start from container parent */
> - parent = task->real_parent;
> - root = parent;
> - } else {
> - /* non-container-init: start from root task and down */
> - parent = NULL;
> - root = task;
> - }
> -
> - /* count tasks via DFS scan of the tree */
> - while (1) {
> - ctx->tsk = task; /* (for ckpt_write_err) */
> + ctx->tsk = task; /* (for ckpt_write_err) */
>
> - /* is this task cool ? */
> - ret = may_checkpoint_task(ctx, task);
> - if (ret < 0) {
> - nr = ret;
> - break;
> - }
> - if (tasks_arr) {
> - /* unlikely... but if so then try again later */
> - if (nr == nr_tasks) {
> - nr = -EBUSY; /* cleanup in ckpt_ctx_free() */
> - break;
> - }
> - tasks_arr[nr] = task;
> - get_task_struct(task);
> - }
> - nr++;
> - /* if has children - proceed with child */
> - if (!list_empty(&task->children)) {
> - parent = task;
> - task = list_entry(task->children.next,
> - struct task_struct, sibling);
> - continue;
> - }
> - while (task != root) {
> - /* if has sibling - proceed with sibling */
> - if (!list_is_last(&task->sibling, &parent->children)) {
> - task = list_entry(task->sibling.next,
> - struct task_struct, sibling);
> - break;
> - }
> + /* is this task cool ? */
> + ret = may_checkpoint_task(ctx, task);
> + if (ret < 0)
> + goto out;
>
> - /* else, trace back to parent and proceed */
> - task = parent;
> - parent = parent->real_parent;
> + if (ctx->tasks_arr) {
> + if (d->nr == ctx->nr_tasks) { /* unlikely... try again later */
> + __ckpt_write_err(ctx, "TE", "bad task count\n", -EBUSY);
> + ret = -EBUSY;
> + goto out;
> }
> - if (task == root)
> - break;
> + ctx->tasks_arr[d->nr++] = task;
> + get_task_struct(task);
> }
> +
> + ret = 1;
> + out:
> + if (ret < 0)
> + ckpt_write_err(ctx, "", NULL);
> ctx->tsk = NULL;
> + return ret;
> +}
>
> - read_unlock(&tasklist_lock);
> +static int tree_count_tasks(struct ckpt_ctx *ctx)
> +{
> + struct ckpt_cnt_tasks data;
>
> - if (nr < 0)
> - ckpt_write_err(ctx, "", NULL);
> - return nr;
> + data.ctx = ctx;
> + data.nr = 0;
> +
> + return walk_task_subtree(ctx->root_task, __tree_count_tasks, &data);
> }
>
> /*
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 37454c5..c021eaf 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -804,77 +804,56 @@ static int do_restore_task(void)
> * Called by the coodinator to set the ->checkpoint_ctx pointer of the
> * root task and all its descendants.
> */
> -static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
> +static int __prepare_descendants(struct task_struct *task, void *data)
> {
> - struct task_struct *leader = root;
> - struct task_struct *parent = NULL;
> - struct task_struct *task = root;
> - int nr_pids = 0;
> - int ret = -EBUSY;
> + struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
>
> - read_lock(&tasklist_lock);
> - while (1) {
> - ckpt_debug("consider task %d\n", task_pid_vnr(task));
> - if (task_ptrace(task) & PT_PTRACED)
> - break;
> - /*
> - * Set task->checkpoint_ctx of all non-zombie descendants.
> - * If a descendant already has a ->checkpoint_ctx, it
> - * must be a coordinator (for a different restart ?) so
> - * we fail.
> - *
> - * Note that own ancestors cannot interfere since they
> - * won't descend past us, as own ->checkpoint_ctx must
> - * already be set.
> - */
> - if (!task->exit_state) {
> - if (!set_task_ctx(task, ctx))
> - break;
> - ckpt_debug("prepare task %d\n", task_pid_vnr(task));
> - wake_up_process(task);
> - nr_pids++;
> - }
> + ckpt_debug("consider task %d\n", task_pid_vnr(task));
>
> - /* if has children - proceed with child */
> - if (!list_empty(&task->children)) {
> - parent = task;
> - task = list_entry(task->children.next,
> - struct task_struct, sibling);
> - continue;
> - }
> - while (task != root) {
> - /* if has sibling - proceed with sibling */
> - if (!list_is_last(&task->sibling, &parent->children)) {
> - task = list_entry(task->sibling.next,
> - struct task_struct, sibling);
> - break;
> - }
> -
> - /* else, trace back to parent and proceed */
> - task = parent;
> - parent = parent->real_parent;
> - }
> - if (task == root) {
> - /* in case root task is multi-threaded */
> - root = task = next_thread(task);
> - if (root == leader) {
> - ret = 0;
> - break;
> - }
> - }
> + if (task_ptrace(task) & PT_PTRACED) {
> + ckpt_debug("ptraced task %d\n", task_pid_vnr(task));
> + return -EBUSY;
> }
> - read_unlock(&tasklist_lock);
> - ckpt_debug("nr %d/%d ret %d\n", ctx->nr_pids, nr_pids, ret);
> +
> + /*
> + * Set task->checkpoint_ctx of all non-zombie descendants.
> + * If a descendant already has a ->checkpoint_ctx, it
> + * must be a coordinator (for a different restart ?) so
> + * we fail.
> + *
> + * Note that own ancestors cannot interfere since they
> + * won't descend past us, as own ->checkpoint_ctx must
> + * already be set.
> + */
> + if (!task->exit_state) {
> + if (!set_task_ctx(task, ctx))
> + return -EBUSY;
> + ckpt_debug("prepare task %d\n", task_pid_vnr(task));
> + wake_up_process(task);
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
> +{
> + int nr_pids;
> +
> + nr_pids = walk_task_subtree(root, __prepare_descendants, ctx);
> + ckpt_debug("nr %d/%d\n", ctx->nr_pids, nr_pids);
> + if (nr_pids < 0)
> + return nr_pids;
>
> /*
> * Actual tasks count may exceed ctx->nr_pids due of 'dead'
> * tasks used as place-holders for PGIDs, but not fall short.
> */
> - if (!ret && (nr_pids < ctx->nr_pids))
> - ret = -ESRCH;
> + if (nr_pids < ctx->nr_pids)
> + return -ESRCH;
>
> atomic_set(&ctx->nr_total, nr_pids);
> - return ret;
> + return nr_pids;
> }
>
> static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
> @@ -991,6 +970,12 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
> return -EBUSY;
> }
>
> + /*
> + * From now on we are committed to the restart. If anything
> + * fails, we'll wipe out the entire subtree below us, to
> + * ensure proper cleanup.
> + */
> +
> if (ctx->uflags & RESTART_TASKSELF) {
> ret = restore_task(ctx);
> ckpt_debug("restore task: %d\n", ret);
> diff --git a/checkpoint/sys.c b/checkpoint/sys.c
> index 77613d7..7604089 100644
> --- a/checkpoint/sys.c
> +++ b/checkpoint/sys.c
> @@ -276,6 +276,61 @@ void ckpt_ctx_put(struct ckpt_ctx *ctx)
> ckpt_ctx_free(ctx);
> }
>
> +int walk_task_subtree(struct task_struct *root,
> + int (*func)(struct task_struct *, void *),
> + void *data)
> +{
> +
> + struct task_struct *leader = root;
> + struct task_struct *parent = NULL;
> + struct task_struct *task = root;
> + int total = 0;
> + int ret;
> +
> + read_lock(&tasklist_lock);
> + while (1) {
> + /* invoke callback on this task */
> + ret = func(task, data);
> + if (ret < 0)
> + break;
> +
> + total += ret;
> +
> + /* if has children - proceed with child */
> + if (!list_empty(&task->children)) {
> + parent = task;
> + task = list_entry(task->children.next,
> + struct task_struct, sibling);
> + continue;
> + }
> +
> + while (task != root) {
> + /* if has sibling - proceed with sibling */
> + if (!list_is_last(&task->sibling, &parent->children)) {
> + task = list_entry(task->sibling.next,
> + struct task_struct, sibling);
> + break;
> + }
> +
> + /* else, trace back to parent and proceed */
> + task = parent;
> + parent = parent->real_parent;
> + }
> +
> + if (task == root) {
> + /* in case root task is multi-threaded */
> + root = task = next_thread(task);
> + if (root == leader)
> + break;
> + }
> + }
> + read_unlock(&tasklist_lock);
> +
> + ckpt_debug("total %d ret %d\n", total, ret);
> + return (ret < 0 ? ret : total);
> +}
> +
> +
> /**
> * sys_checkpoint - checkpoint a container
> * @pid: pid of the container init(1) process
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index b7f1796..12210e4 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -50,6 +50,9 @@
> RESTART_FROZEN | \
> RESTART_GHOST)
>
> +extern int walk_task_subtree(struct task_struct *task,
> + int (*func)(struct task_struct *, void *),
> + void *data);
> extern void exit_checkpoint(struct task_struct *tsk);
>
> extern int ckpt_kwrite(struct ckpt_ctx *ctx, void *buf, int count);
> --
> 1.6.0.4
>
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
next prev parent reply other threads:[~2009-10-01 16:41 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-01 1:47 Simplify restart logic and fix races Oren Laadan
[not found] ` <1254361634-30076-1-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 1:47 ` [PATCH 1/5] c/r: simplify logic of tracking restarting tasks Oren Laadan
[not found] ` <1254361634-30076-2-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 15:58 ` Serge E. Hallyn
[not found] ` <20091001155823.GB20565-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01 16:21 ` Oren Laadan
2009-10-01 1:47 ` [PATCH 2/5] c/r: let entire thread group in sys_restart before restoring a thread Oren Laadan
[not found] ` <1254361634-30076-3-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 16:20 ` Serge E. Hallyn
[not found] ` <20091001162026.GC20565-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-01 19:03 ` Oren Laadan
2009-10-01 1:47 ` [PATCH 3/5] c/r: introduce walk_task_subtree() to iterate through descendants Oren Laadan
[not found] ` <1254361634-30076-4-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 16:41 ` Serge E. Hallyn [this message]
2009-10-01 1:47 ` [PATCH 4/5] c/r: fix restart failure due to a race with ghost/dead tasks Oren Laadan
[not found] ` <1254361634-30076-5-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 17:21 ` Serge E. Hallyn
2009-10-01 1:47 ` [PATCH 5/5] c/r: defer restore of blocked signals mask during restart Oren Laadan
[not found] ` <1254361634-30076-6-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-01 17:48 ` Serge E. Hallyn
2009-10-01 3:39 ` Simplify restart logic and fix races Serge E. Hallyn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091001164109.GD20565@us.ibm.com \
--to=serue-r/jw6+rmf7hqt0dzr+alfa@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
--cc=orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.