Linux Container Development
 help / color / mirror / Atom feed
From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
To: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	Dave Hansen
	<dave-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Subject: Re: [RFC v14-rc2][PATCH 14/29] Checkpoint multiple processes
Date: Mon, 6 Apr 2009 20:31:11 -0700	[thread overview]
Message-ID: <20090407033111.GI12316@us.ibm.com> (raw)
In-Reply-To: <1238477349-11029-15-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>

Oren Laadan [orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org] wrote:
| From ee2f3b5c8548136229cc2f41c5271b0a81ab8a4d Mon Sep 17 00:00:00 2001
| From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
| Date: Mon, 30 Mar 2009 15:06:13 -0400
| Subject: [PATCH 14/29] Checkpoint multiple processes
| 
| Checkpointing of multiple processes works by recording the tasks tree
| structure below a given task (usually this task is the container init).
| 
| For a given task, do a DFS scan of the tasks tree and collect them
| into an array (keeping a reference to each task). Using DFS simplifies
| the recreation of tasks either in user space or kernel space. For each
| task collected, test if it can be checkpointed, and save its pid, tgid,
| and ppid.
| 
| The actual work is divided into two passes: a first scan counts the
| tasks, then memory is allocated and a second scan fills the array.
| 
| The logic is suitable for creation of processes during restart either
| in userspace or by the kernel.
| 
| Currently we ignore threads and zombies, as well as session ids.
| 
| Changelog[v14]:
|   - Refuse non-self checkpoint if target task isn't frozen
|   - Revert change to pr_debug(), back to cr_debug()
|   - Use only unsigned fields in checkpoint headers
|   - Check retval of cr_tree_count_tasks() in cr_build_tree()
|   - Discard 'h.parent' field
|   - Check whether calls to cr_hbuf_get() fail
| 
| Changelog[v13]:
|   - Release tasklist_lock in error path in cr_tree_count_tasks()
|   - Use separate index for 'tasks_arr' and 'hh' in cr_write_pids()
| 
| Changelog[v12]:
|   - Replace obsolete cr_debug() with pr_debug()
| 
| Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
| Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
| ---
|  checkpoint/checkpoint.c        |  228 ++++++++++++++++++++++++++++++++++++++--
|  checkpoint/sys.c               |   16 +++
|  include/linux/checkpoint.h     |    3 +
|  include/linux/checkpoint_hdr.h |   13 ++-
|  4 files changed, 248 insertions(+), 12 deletions(-)
| 
| diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
| index 25229d3..7f5eee6 100644
| --- a/checkpoint/checkpoint.c
| +++ b/checkpoint/checkpoint.c
| @@ -244,11 +244,6 @@ static int cr_write_task(struct cr_ctx *ctx, struct task_struct *t)
|  {
|  	int ret;
|  
| -	if (t->state == TASK_DEAD) {
| -		pr_warning("c/r: task may not be in state TASK_DEAD\n");
| -		return -EAGAIN;
| -	}
| -
|  	ret = cr_write_task_struct(ctx, t);
|  	cr_debug("task_struct: ret %d\n", ret);
|  	if (ret < 0)
| @@ -271,6 +266,211 @@ static int cr_write_task(struct cr_ctx *ctx, struct task_struct *t)
|  	return ret;
|  }
|  
| +/* dump all tasks in ctx->tasks_arr[] */
| +static int cr_write_all_tasks(struct cr_ctx *ctx)
| +{
| +	int n, ret = 0;
| +
| +	for (n = 0; n < ctx->tasks_nr; n++) {
| +		cr_debug("dumping task #%d\n", n);
| +		ret = cr_write_task(ctx, ctx->tasks_arr[n]);
| +		if (ret < 0)
| +			break;
| +	}
| +
| +	return ret;
| +}
| +
| +static int cr_may_checkpoint_task(struct task_struct *t, struct cr_ctx *ctx)
| +{
| +	cr_debug("check %d\n", task_pid_nr_ns(t, ctx->root_nsproxy->pid_ns));
| +
| +	if (t->state == TASK_DEAD) {
| +		pr_warning("c/r: task %d is TASK_DEAD\n", task_pid_vnr(t));
| +		return -EAGAIN;
| +	}
| +
| +	if (!ptrace_may_access(t, PTRACE_MODE_READ))
| +		return -EPERM;
| +
| +	/* verify that the task is frozen (unless self) */
| +	if (t != current && !frozen(t))
| +		return -EBUSY;
| +
| +	/* FIXME: change this for nested containers */
| +	if (task_nsproxy(t) != ctx->root_nsproxy)
| +		return -EPERM;
| +
| +	return 0;
| +}
| +
| +#define CR_HDR_PIDS_CHUNK	256
| +
| +static int cr_write_pids(struct cr_ctx *ctx)
| +{
| +	struct cr_hdr_pids *hh;
| +	struct pid_namespace *ns;
| +	struct task_struct *task;
| +	struct task_struct **tasks_arr;
| +	int tasks_nr, n, pos = 0, ret = 0;
| +
| +	ns = ctx->root_nsproxy->pid_ns;
| +	tasks_arr = ctx->tasks_arr;
| +	tasks_nr = ctx->tasks_nr;
| +	BUG_ON(tasks_nr <= 0);
| +
| +	hh = cr_hbuf_get(ctx, sizeof(*hh) * CR_HDR_PIDS_CHUNK);
| +	if (!hh)
| +		return -ENOMEM;
| +
| +	do {
| +		rcu_read_lock();
| +		for (n = 0; n < min(tasks_nr, CR_HDR_PIDS_CHUNK); n++) {
| +			task = tasks_arr[pos];
| +
| +			/* is this task cool ? */
| +			ret = cr_may_checkpoint_task(task, ctx);
| +			if (ret < 0) {
| +				rcu_read_unlock();
| +				goto out;
| +			}
| +			hh[n].vpid = task_pid_nr_ns(task, ns);
| +			hh[n].vtgid = task_tgid_nr_ns(task, ns);
| +			hh[n].vppid = task_tgid_nr_ns(task->real_parent, ns);
| +			cr_debug("task[%d]: vpid %d vtgid %d parent %d\n", pos,
| +				 hh[n].vpid, hh[n].vtgid, hh[n].vppid);
| +			pos++;
| +		}
| +		rcu_read_unlock();
| +
| +		n = min(tasks_nr, CR_HDR_PIDS_CHUNK);
| +		ret = cr_kwrite(ctx, hh, n * sizeof(*hh));
| +		if (ret < 0)
| +			break;
| +
| +		tasks_nr -= n;
| +	} while (tasks_nr > 0);
| + out:
| +	cr_hbuf_put(ctx, sizeof(*hh));
| +	return ret;
| +}
| +
| +/* count number of tasks in tree (and optionally fill pid's in array) */
| +static int cr_tree_count_tasks(struct cr_ctx *ctx)
| +{
| +	struct task_struct *root = ctx->root_task;
| +	struct task_struct *task = root;
| +	struct task_struct *parent = NULL;
| +	struct task_struct **tasks_arr = ctx->tasks_arr;
| +	int tasks_nr = ctx->tasks_nr;
| +	int nr = 0;
| +
| +	read_lock(&tasklist_lock);
| +
| +	/* count tasks via DFS scan of the tree */
| +	while (1) {
| +		if (tasks_arr) {
| +			/* unlikely... but if so then try again later */
| +			if (nr == tasks_nr) {
| +				nr = -EAGAIN;	/* cleanup in cr_ctx_free() */
| +				break;
| +			}
| +			tasks_arr[nr] = task;
| +			get_task_struct(task);

Can we do an early cr_may_checkpoint_task() here ?

Sukadev

  parent reply	other threads:[~2009-04-07  3:31 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-31  5:28 [RFC v14-rc2][PATCH 00/29] Kernel based checkpoint/restart Oren Laadan
     [not found] ` <1238477349-11029-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 01/29] Create syscalls: sys_checkpoint, sys_restart Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 02/29] Checkpoint/restart: initial documentation Oren Laadan
     [not found]     ` <1238477349-11029-3-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:22       ` Sukadev Bhattiprolu
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 03/29] Make file_pos_read/write() public Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 04/29] General infrastructure for checkpoint restart Oren Laadan
     [not found]     ` <1238477349-11029-5-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:24       ` Sukadev Bhattiprolu
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 05/29] x86 support for checkpoint/restart Oren Laadan
     [not found]     ` <1238477349-11029-6-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:25       ` Sukadev Bhattiprolu
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 06/29] Dump memory address space Oren Laadan
     [not found]     ` <1238477349-11029-7-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:26       ` Sukadev Bhattiprolu
     [not found]         ` <20090407032636.GD12316-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-07  4:57           ` Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 07/29] Restore " Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 08/29] Infrastructure for shared objects Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 09/29] Dump open file descriptors Oren Laadan
     [not found]     ` <1238477349-11029-10-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:28       ` Sukadev Bhattiprolu
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 10/29] actually use f_op in checkpoint code Oren Laadan
     [not found]     ` <1238477349-11029-11-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-03-31 18:31       ` Oren Laadan
2009-04-01 18:54       ` Serge E. Hallyn
2009-04-07  3:29       ` Sukadev Bhattiprolu
     [not found]         ` <20090407032912.GF12316-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-07  5:36           ` Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 11/29] add generic checkpoint f_op to ext fses Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 12/29] Restore open file descriptors Oren Laadan
     [not found]     ` <1238477349-11029-13-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:29       ` Sukadev Bhattiprolu
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 13/29] External checkpoint of a task other than ourself Oren Laadan
     [not found]     ` <1238477349-11029-14-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:30       ` Sukadev Bhattiprolu
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 14/29] Checkpoint multiple processes Oren Laadan
     [not found]     ` <1238477349-11029-15-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:31       ` Sukadev Bhattiprolu [this message]
     [not found]         ` <20090407033111.GI12316-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-07  5:12           ` Oren Laadan
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 15/29] Restart " Oren Laadan
     [not found]     ` <1238477349-11029-16-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07  3:33       ` Sukadev Bhattiprolu
     [not found]         ` <20090407033315.GJ12316-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-07  5:31           ` Oren Laadan
     [not found]             ` <49DAE526.6010900-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-07 16:29               ` Sukadev Bhattiprolu
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 16/29] A new file type (CR_FD_OBJREF) for a file descriptor already setup Oren Laadan
     [not found]     ` <1238477349-11029-17-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-01 13:59       ` Serge E. Hallyn
     [not found]         ` <20090401135952.GA16973-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-01 14:13           ` Oren Laadan
2009-04-01 18:36       ` Serge E. Hallyn
2009-04-03 15:46       ` Dan Smith
     [not found]         ` <87y6uhyc3j.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2009-04-03 16:25           ` Oren Laadan
     [not found]             ` <49D63865.1030807-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-03 16:30               ` Dan Smith
2009-04-03 16:54               ` Dave Hansen
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 17/29] Checkpoint open pipes Oren Laadan
     [not found]     ` <1238477349-11029-18-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-01 19:47       ` Serge E. Hallyn
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 18/29] Restore " Oren Laadan
     [not found]     ` <1238477349-11029-19-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-01 20:34       ` Serge E. Hallyn
2009-03-31  5:28   ` [RFC v14-rc2][PATCH 19/29] Record 'struct file' object instead of the file name for VMAs Oren Laadan
     [not found]     ` <1238477349-11029-20-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-01 21:45       ` Serge E. Hallyn
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 20/29] Prepare to support shared memory Oren Laadan
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 21/29] Dump anonymous- and file-mapped- " Oren Laadan
     [not found]     ` <1238477349-11029-22-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-01 23:06       ` Serge E. Hallyn
     [not found]         ` <20090401230657.GB27725-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-01 23:18           ` Oren Laadan
     [not found]             ` <49D3F636.1070303-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-01 23:32               ` Serge E. Hallyn
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 22/29] Restore " Oren Laadan
     [not found]     ` <1238477349-11029-23-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-02 16:59       ` Serge E. Hallyn
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 23/29] s390: Expose a constant for the number of words representing the CRs Oren Laadan
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 24/29] c/r: Add CR_COPY() macro (v4) Oren Laadan
     [not found]     ` <1238477349-11029-25-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-04-01 23:20       ` Serge E. Hallyn
     [not found]         ` <20090401232013.GA31361-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-02 19:00           ` Dan Smith
     [not found]             ` <87vdpmnan2.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2009-04-02 19:06               ` Serge E. Hallyn
     [not found]                 ` <20090402190612.GA24390-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-04-02 20:22                   ` Dan Smith
     [not found]                     ` <87r60an6us.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2009-04-05 20:25                       ` Oren Laadan
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 25/29] s390: define s390-specific checkpoint-restart code (v7) Oren Laadan
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 26/29] powerpc: provide APIs for validating and updating DABR Oren Laadan
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 27/29] powerpc: checkpoint/restart implementation Oren Laadan
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 28/29] powerpc: wire up checkpoint and restart syscalls Oren Laadan
2009-03-31  5:29   ` [RFC v14-rc2][PATCH 29/29] powerpc: enable checkpoint support in Kconfig Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090407033111.GI12316@us.ibm.com \
    --to=sukadev-23vcf4htsmix0ybbhkvfkdbpr1lh4cv8@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=dave-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org \
    --cc=orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox