From: Oren Laadan <orenl@cs.columbia.edu>
To: Andrey Mirkin <major@openvz.org>
Cc: containers@lists.linux-foundation.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/2] Add support for in-kernel process creation during restart
Date: Tue, 25 Nov 2008 15:17:56 -0500 [thread overview]
Message-ID: <492C5D74.7040302@cs.columbia.edu> (raw)
In-Reply-To: <1227541175-30301-3-git-send-email-major@openvz.org>
Hi,
Andrey Mirkin wrote:
> All work (process tree creation and process state restore) now can be
> done in kernel.
>
> Task structure in image file is extended with 2 fields to make in-kernel
> process creation more easy.
>
> Signed-off-by: Andrey Mirkin <major@openvz.org>
> ---
> checkpoint/checkpoint.c | 17 ++++
> checkpoint/restart.c | 4 +-
> checkpoint/rstr_process.c | 201 +++++++++++++++++++++++++++++++++++++++-
> include/linux/checkpoint.h | 2 +
> include/linux/checkpoint_hdr.h | 2 +
> 5 files changed, 223 insertions(+), 3 deletions(-)
>
> diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
> index 04b0c4a..ae3326e 100644
> --- a/checkpoint/checkpoint.c
> +++ b/checkpoint/checkpoint.c
> @@ -173,6 +173,21 @@ static int cr_write_tail(struct cr_ctx *ctx)
> return ret;
> }
>
> +static int cr_count_children(struct cr_ctx *ctx, struct task_struct *tsk)
> +{
> + int num = 0;
> + struct task_struct *child;
> +
> + read_lock(&tasklist_lock);
> + list_for_each_entry(child, &tsk->children, sibling) {
> + if (child->parent != tsk)
> + continue;
> + num++;
> + }
> + read_unlock(&tasklist_lock);
> + return num;
> +}
> +
This information (and the pids of children) is already available in
the ctx->pids_arr.
> /* dump the task_struct of a given task */
> static int cr_write_task_struct(struct cr_ctx *ctx, struct task_struct *t)
> {
> @@ -189,6 +204,8 @@ static int cr_write_task_struct(struct cr_ctx *ctx, struct task_struct *t)
> hh->exit_code = t->exit_code;
> hh->exit_signal = t->exit_signal;
>
> + hh->vpid = task_pid_nr_ns(t, ctx->root_nsproxy->pid_ns);
> + hh->children_nr = cr_count_children(ctx, t);
> hh->task_comm_len = TASK_COMM_LEN;
Same here (hmm... well, if it weren't skipped below, actually...)
>
> /* FIXME: save remaining relevant task_struct fields */
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
[...]
> +static int restart_thread(void *arg)
> +{
> + struct thr_context *thr_ctx = arg;
> + struct cr_ctx *ctx;
> + struct cr_hdr_task *ht;
> + int ret;
> + int i;
> +
> + current->state = TASK_UNINTERRUPTIBLE;
Why do you not want it to be interruptible ?
> +
> + ctx = thr_ctx->ctx;
> + ht = thr_ctx->ht;
> +
> + if (ht->vpid == 1) {
> + ctx->root_task = current;
> + ctx->root_nsproxy = current->nsproxy;
> +
> + get_task_struct(ctx->root_task);
> + get_nsproxy(ctx->root_nsproxy);
> + }
> +
> + ret = cr_rstr_task_struct(ctx, ht);
> + cr_debug("rstr_task_struct: ret %d\n", ret);
> + if (ret < 0)
> + goto out;
> + ret = cr_read_mm(ctx);
> + cr_debug("memory: ret %d\n", ret);
> + if (ret < 0)
> + goto out;
> + ret = cr_read_files(ctx);
> + cr_debug("files: ret %d\n", ret);
> + if (ret < 0)
> + goto out;
> + ret = cr_read_thread(ctx);
> + cr_debug("thread: ret %d\n", ret);
> + if (ret < 0)
> + goto out;
> + ret = cr_read_cpu(ctx);
> + cr_debug("cpu: ret %d\n", ret);
> +
> + for (i = 0; i < ht->children_nr; i++) {
> + ret = cr_restart_process(ctx);
> + if (ret < 0)
> + break;
> + }
Here, eventually we'll need the pids of those processes; instead of
keeping 'ht->children_nr', you could loop through the pids array in
the ctx and create those who have their parent set to us.
> +
> +out:
> + thr_ctx->error = ret;
> + complete(&thr_ctx->complete);
> +
> + if (!ret && (ht->state & (EXIT_ZOMBIE|EXIT_DEAD))) {
> + do_exit(ht->exit_code);
> + } else {
> + __set_current_state(TASK_UNINTERRUPTIBLE);
> + }
> + schedule();
Using TASK_UNINTERRUPTIBLE, may keep a task unkill-able for potentially
long time. Any reason not to use TASK_INTERRUPTIBLE ?
> +
> + cr_debug("leaked %d/%d %p\n", task_pid_nr(current), task_pid_vnr(current), current->mm);
> +
> + complete_and_exit(NULL, 0);
> + return ret;
> +}
> +
> +static int cr_restart_process(struct cr_ctx *ctx)
> +{
> + struct thr_context thr_ctx;
> + struct task_struct *tsk;
> + struct cr_hdr_task *ht = cr_hbuf_get(ctx, sizeof(*ht));
> + int pid, parent, ret = -EINVAL;
> +
> + thr_ctx.ctx = ctx;
> + thr_ctx.error = 0;
> + init_completion(&thr_ctx.complete);
> +
> + parent = cr_read_obj_type(ctx, ht, sizeof(*ht), CR_HDR_TASK);
> + if (parent < 0) {
> + ret = parent;
> + goto out;
> + } else if (parent != 0)
> + goto out;
> +
> + thr_ctx.ht = ht;
> +
> + if (ht->vpid == 1) {
> + /* We should also create container here */
> + pid = cr_kernel_thread(restart_thread, &thr_ctx,
> + CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
> + CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET, 0);
why do you want to start the container in the kernel ?
instead, we can create a new containers in user space, and have the init task
call the restart syscall from inside there.
> + } else {
> + /* We should fork here a child with saved pid and
> + correct flags */
> + pid = cr_kernel_thread(restart_thread, &thr_ctx, 0, ht->vpid);
> + }
> + if (pid < 0) {
> + ret = pid;
> + goto out;
> + }
> + read_lock(&tasklist_lock);
> + tsk = find_task_by_vpid(pid);
> + if (tsk)
> + get_task_struct(tsk);
> + read_unlock(&tasklist_lock);
> + if (tsk == NULL) {
> + ret = -ESRCH;
> + goto out;
> + }
> +
> + wait_for_completion(&thr_ctx.complete);
> + wait_task_inactive(tsk, 0);
> + ret = thr_ctx.error;
> + put_task_struct(tsk);
> +
> +out:
> + cr_hbuf_put(ctx, sizeof(*ht));
> + return ret;
> +}
> +
>
> int do_restart_in_kernel(struct cr_ctx *ctx)
> {
> - return -ENOSYS;
> + int ret, size, parent;
> + struct cr_hdr_tree *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +
> + ret = cr_read_head(ctx);
> + if (ret < 0)
> + goto out;
> +
> + ret = -EINVAL;
> + parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_TREE);
> + if (parent < 0) {
> + ret = parent;
> + goto out;
> + } else if (parent != 0)
> + goto out;
> +
> + size = sizeof(*ctx->pids_arr) * hh->tasks_nr;
> + if (size < 0)
> + goto out;
> + ctx->file->f_pos += size;
You can't do that - the file may be non-seekable (e.g. a socket); must read
the data in.
Besides, if instead of skipping this data you read it in, then you could use
it as noted above.
> +
> + ret = cr_restart_process(ctx);
> + if (ret < 0)
> + goto out;
> +
> + ret = cr_read_tail(ctx);
[...]
Thanks,
Oren.
next prev parent reply other threads:[~2008-11-25 20:18 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-24 15:39 [PATCH 0/2] In-kernel process restart Andrey Mirkin
2008-11-24 15:39 ` [PATCH 1/2] Add flags for user-space and in-kernel process creation Andrey Mirkin
[not found] ` <1227541175-30301-2-git-send-email-major-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-11-24 15:39 ` [PATCH 2/2] Add support for in-kernel process creation during restart Andrey Mirkin
2008-11-24 16:02 ` [PATCH 1/2] Add flags for user-space and in-kernel process creation Louis Rilling
2008-11-24 16:02 ` Louis Rilling
2008-11-26 4:55 ` Andrey Mirkin
[not found] ` <20081124160211.GE27238-Hu8+6S1rdjywhHL9vcZdMVaTQe2KTcn/@public.gmane.org>
2008-11-26 4:55 ` Andrey Mirkin
2008-11-24 15:39 ` [PATCH 2/2] Add support for in-kernel process creation during restart Andrey Mirkin
2008-11-25 0:45 ` Alexey Dobriyan
2008-11-26 5:07 ` Andrey Mirkin
[not found] ` <20081125004024.GA4440-2ev+ksY9ol182hYKe6nXyg@public.gmane.org>
2008-11-26 5:07 ` Andrey Mirkin
2008-11-25 20:17 ` Oren Laadan [this message]
2008-11-26 11:58 ` Andrey Mirkin
[not found] ` <492C5D74.7040302-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-11-26 11:58 ` Andrey Mirkin
[not found] ` <1227541175-30301-3-git-send-email-major-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-11-25 0:45 ` Alexey Dobriyan
2008-11-25 20:17 ` Oren Laadan
2008-11-25 20:02 ` [PATCH 0/2] In-kernel process restart Oren Laadan
[not found] ` <492C59DD.5070401-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-11-26 11:44 ` Andrey Mirkin
2008-11-26 11:44 ` Andrey Mirkin
[not found] ` <1227541175-30301-1-git-send-email-major-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-11-24 15:39 ` [PATCH 1/2] Add flags for user-space and in-kernel process creation Andrey Mirkin
2008-11-25 20:02 ` [PATCH 0/2] In-kernel process restart Oren Laadan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=492C5D74.7040302@cs.columbia.edu \
--to=orenl@cs.columbia.edu \
--cc=containers@lists.linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=major@openvz.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.