All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Serge E. Hallyn" <serue@us.ibm.com>
To: Oren Laadan <orenl@cs.columbia.edu>
Cc: dave@linux.vnet.ibm.com, containers@lists.linux-foundation.org,
	jeremy@goop.org, linux-kernel@vger.kernel.org, arnd@arndb.de
Subject: Re: [RFC v5][PATCH 2/8] General infrastructure for checkpoint restart
Date: Mon, 15 Sep 2008 16:15:25 -0500	[thread overview]
Message-ID: <20080915211525.GC28683@us.ibm.com> (raw)
In-Reply-To: <1221347167-9956-3-git-send-email-orenl@cs.columbia.edu>

Quoting Oren Laadan (orenl@cs.columbia.edu):
> Add those interfaces, as well as helpers needed to easily manage the
> file format. The code is roughly broken out as follows:
> 
> checkpoint/sys.c - user/kernel data transfer, as well as setup of the
> checkpoint/restart context (a per-checkpoint data structure for
> housekeeping)
> 
> checkpoint/checkpoint.c - output wrappers and basic checkpoint handling
> 
> checkpoint/restart.c - input wrappers and basic restart handling
> 
> Patches to add the per-architecture support as well as the actual
> work to do the memory checkpoint follow in subsequent patches.
> 
> Signed-off-by: Oren Laadan <orenl@cs.columbia.edu>
> ---
>  Makefile                       |    2 +-
>  checkpoint/Makefile            |    2 +-
>  checkpoint/checkpoint.c        |  174 ++++++++++++++++++++++++++++++++
>  checkpoint/restart.c           |  189 ++++++++++++++++++++++++++++++++++
>  checkpoint/sys.c               |  218 +++++++++++++++++++++++++++++++++++++++-
>  include/linux/checkpoint.h     |   60 +++++++++++
>  include/linux/checkpoint_hdr.h |   75 ++++++++++++++
>  include/linux/magic.h          |    3 +
>  8 files changed, 717 insertions(+), 6 deletions(-)
>  create mode 100644 checkpoint/checkpoint.c
>  create mode 100644 checkpoint/restart.c
>  create mode 100644 include/linux/checkpoint.h
>  create mode 100644 include/linux/checkpoint_hdr.h
> 
> diff --git a/Makefile b/Makefile
> index f448e00..a558ad2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -619,7 +619,7 @@ export mod_strip_cmd
> 
> 
>  ifeq ($(KBUILD_EXTMOD),)
> -core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/
> +core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/ checkpoint/
> 
>  vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
>  		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
> diff --git a/checkpoint/Makefile b/checkpoint/Makefile
> index 07d018b..d2df68c 100644
> --- a/checkpoint/Makefile
> +++ b/checkpoint/Makefile
> @@ -2,4 +2,4 @@
>  # Makefile for linux checkpoint/restart.
>  #
> 
> -obj-$(CONFIG_CHECKPOINT_RESTART) += sys.o
> +obj-$(CONFIG_CHECKPOINT_RESTART) += sys.o checkpoint.o restart.o
> diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
> new file mode 100644
> index 0000000..e5e188f
> --- /dev/null
> +++ b/checkpoint/checkpoint.c
> @@ -0,0 +1,174 @@
> +/*
> + *  Checkpoint logic and helpers
> + *
> + *  Copyright (C) 2008 Oren Laadan
> + *
> + *  This file is subject to the terms and conditions of the GNU General Public
> + *  License.  See the file COPYING in the main directory of the Linux
> + *  distribution for more details.
> + */
> +
> +#include <linux/version.h>
> +#include <linux/sched.h>
> +#include <linux/time.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include <linux/dcache.h>
> +#include <linux/mount.h>
> +#include <linux/utsname.h>
> +#include <linux/magic.h>
> +#include <linux/checkpoint.h>
> +#include <linux/checkpoint_hdr.h>
> +
> +/**
> + * cr_write_obj - write a record described by a cr_hdr
> + * @ctx: checkpoint context
> + * @h: record descriptor
> + * @buf: record buffer
> + */
> +int cr_write_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf)
> +{
> +	int ret;
> +
> +	ret = cr_kwrite(ctx, h, sizeof(*h));
> +	if (ret < 0)
> +		return ret;
> +	return cr_kwrite(ctx, buf, h->len);
> +}
> +
> +/**
> + * cr_write_string - write a string
> + * @ctx: checkpoint context
> + * @str: string pointer
> + * @len: string length
> + */
> +int cr_write_string(struct cr_ctx *ctx, char *str, int len)
> +{
> +	struct cr_hdr h;
> +
> +	h.type = CR_HDR_STRING;
> +	h.len = len;
> +	h.parent = 0;
> +
> +	return cr_write_obj(ctx, &h, str);
> +}
> +
> +/* write the checkpoint header */
> +static int cr_write_head(struct cr_ctx *ctx)
> +{
> +	struct cr_hdr h;
> +	struct cr_hdr_head *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	struct new_utsname *uts;
> +	struct timeval ktv;
> +	int ret;
> +
> +	h.type = CR_HDR_HEAD;
> +	h.len = sizeof(*hh);
> +	h.parent = 0;
> +
> +	do_gettimeofday(&ktv);
> +
> +	hh->magic = CHECKPOINT_MAGIC_HEAD;
> +	hh->major = (LINUX_VERSION_CODE >> 16) & 0xff;
> +	hh->minor = (LINUX_VERSION_CODE >> 8) & 0xff;
> +	hh->patch = (LINUX_VERSION_CODE) & 0xff;
> +
> +	hh->rev = CR_VERSION;
> +
> +	hh->flags = ctx->flags;
> +	hh->time = ktv.tv_sec;
> +
> +	uts = utsname();
> +	memcpy(hh->release, uts->release, __NEW_UTS_LEN);
> +	memcpy(hh->version, uts->version, __NEW_UTS_LEN);
> +	memcpy(hh->machine, uts->machine, __NEW_UTS_LEN);
> +
> +	ret = cr_write_obj(ctx, &h, hh);
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	return ret;
> +}
> +
> +/* write the checkpoint trailer */
> +static int cr_write_tail(struct cr_ctx *ctx)
> +{
> +	struct cr_hdr h;
> +	struct cr_hdr_tail *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	int ret;
> +
> +	h.type = CR_HDR_TAIL;
> +	h.len = sizeof(*hh);
> +	h.parent = 0;
> +
> +	hh->magic = CHECKPOINT_MAGIC_TAIL;
> +
> +	ret = cr_write_obj(ctx, &h, hh);
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	return ret;
> +}
> +
> +/* dump the task_struct of a given task */
> +static int cr_write_task_struct(struct cr_ctx *ctx, struct task_struct *t)
> +{
> +	struct cr_hdr h;
> +	struct cr_hdr_task *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	int ret;
> +
> +	h.type = CR_HDR_TASK;
> +	h.len = sizeof(*hh);
> +	h.parent = 0;
> +
> +	hh->state = t->state;
> +	hh->exit_state = t->exit_state;
> +	hh->exit_code = t->exit_code;
> +	hh->exit_signal = t->exit_signal;
> +
> +	hh->task_comm_len = TASK_COMM_LEN;
> +
> +	/* FIXME: save remaining relevant task_struct fields */
> +
> +	ret = cr_write_obj(ctx, &h, hh);
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	if (ret < 0)
> +		return ret;
> +
> +	return cr_write_string(ctx, t->comm, TASK_COMM_LEN);
> +}
> +
> +/* dump the entire state of a given task */
> +static int cr_write_task(struct cr_ctx *ctx, struct task_struct *t)
> +{
> +	int ret ;
> +
> +	if (t->state == TASK_DEAD) {
> +		pr_warning("CR: task may not be in state TASK_DEAD\n");
> +		return -EAGAIN;
> +	}
> +
> +	ret = cr_write_task_struct(ctx, t);
> +	cr_debug("ret %d\n", ret);
> +
> +	return ret;
> +}
> +
> +int do_checkpoint(struct cr_ctx *ctx)
> +{
> +	int ret;
> +
> +	/* FIX: need to test whether container is checkpointable */
> +
> +	ret = cr_write_head(ctx);
> +	if (ret < 0)
> +		goto out;
> +	ret = cr_write_task(ctx, current);
> +	if (ret < 0)
> +		goto out;
> +	ret = cr_write_tail(ctx);
> +	if (ret < 0)
> +		goto out;
> +
> +	/* on success, return (unique) checkpoint identifier */
> +	ret = ctx->crid;

This feels a litlte weird, since the ctx->crid actually was calculated
in ctx_alloc() in sys_checkpoint().  I'd almost prefer do_checkpoint()
return 0 on success.

It may seem silly, but it seems to make the flow and layering clearer.

> +
> + out:
> +	return ret;
> +}
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> new file mode 100644
> index 0000000..ef6bc37
> --- /dev/null
> +++ b/checkpoint/restart.c
> @@ -0,0 +1,189 @@
> +/*
> + *  Restart logic and helpers
> + *
> + *  Copyright (C) 2008 Oren Laadan
> + *
> + *  This file is subject to the terms and conditions of the GNU General Public
> + *  License.  See the file COPYING in the main directory of the Linux
> + *  distribution for more details.
> + */
> +
> +#include <linux/version.h>
> +#include <linux/sched.h>
> +#include <linux/file.h>
> +#include <linux/magic.h>
> +#include <linux/checkpoint.h>
> +#include <linux/checkpoint_hdr.h>
> +
> +/**
> + * cr_read_obj - read a whole record (cr_hdr followed by payload)
> + * @ctx: checkpoint context
> + * @h: record descriptor
> + * @buf: record buffer
> + * @n: available buffer size
> + *
> + * Returns size of payload
> + */
> +int cr_read_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf, int n)
> +{
> +	int ret;
> +
> +	ret = cr_kread(ctx, h, sizeof(*h));
> +	if (ret < 0)
> +		return ret;
> +
> +	cr_debug("type %d len %d parent %d\n", h->type, h->len, h->parent);
> +
> +	if (h->len < 0 || h->len > n)
> +		return -EINVAL;

If h->len > n should we return -ENOSPC so the caller can theoretically
deal with it?

> +
> +	return cr_kread(ctx, buf, h->len);
> +}
> +
> +/**
> + * cr_read_obj_type - read a whole record of expected type
> + * @ctx: checkpoint context
> + * @buf: record buffer
> + * @n: available buffer size
> + * @type: expected record type
> + *
> + * Returns object reference of the parent object
> + */
> +int cr_read_obj_type(struct cr_ctx *ctx, void *buf, int n, int type)
> +{
> +	struct cr_hdr h;
> +	int ret;
> +
> +	ret = cr_read_obj(ctx, &h, buf, n);
> +	if (!ret) {
> +		if (h.type == type)
> +			ret = h.parent;
> +		else
> +			ret = -EINVAL;
> +	}
> +	return ret;
> +}
> +
> +/**
> + * cr_read_string - read a string
> + * @ctx: checkpoint context
> + * @str: string buffer
> + * @len: buffer buffer length
> + */
> +int cr_read_string(struct cr_ctx *ctx, void *str, int len)
> +{
> +	return cr_read_obj_type(ctx, str, len, CR_HDR_STRING);
> +}
> +
> +/* read the checkpoint header */
> +static int cr_read_head(struct cr_ctx *ctx)
> +{
> +	struct cr_hdr_head *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	int parent;
> +
> +	parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_HEAD);
> +	if (parent < 0)
> +		return parent;
> +	else if (parent != 0)
> +		return -EINVAL;
> +
> +	if (hh->magic != CHECKPOINT_MAGIC_HEAD || hh->rev != CR_VERSION ||
> +	    hh->major != ((LINUX_VERSION_CODE >> 16) & 0xff) ||
> +	    hh->minor != ((LINUX_VERSION_CODE >> 8) & 0xff) ||
> +	    hh->patch != ((LINUX_VERSION_CODE) & 0xff))
> +		return -EINVAL;
> +
> +	if (hh->flags & ~CR_CTX_CKPT)
> +		return -EINVAL;
> +
> +	ctx->oflags = hh->flags;
> +
> +	/* FIX: verify compatibility of release, version and machine */
> +
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	return 0;
> +}
> +
> +/* read the checkpoint trailer */
> +static int cr_read_tail(struct cr_ctx *ctx)
> +{
> +	struct cr_hdr_tail *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	int parent;
> +
> +	parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_TAIL);
> +	if (parent < 0)
> +		return parent;
> +	else if (parent != 0)
> +		return -EINVAL;
> +
> +	if (hh->magic != CHECKPOINT_MAGIC_TAIL)
> +		return -EINVAL;
> +
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	return 0;
> +}
> +
> +/* read the task_struct into the current task */
> +static int cr_read_task_struct(struct cr_ctx *ctx)
> +{
> +	struct cr_hdr_task *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	struct task_struct *t = current;
> +	char *buf;
> +	int parent, ret;
> +
> +	parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_TASK);
> +	if (parent < 0)
> +		return parent;
> +	else if (parent != 0)
> +		return -EINVAL;
> +
> +	/* upper limit for task_comm_len to prevent DoS */
> +	if (hh->task_comm_len < 0 || hh->task_comm_len > PAGE_SIZE)
> +		return -EINVAL;
> +
> +	buf = kmalloc(hh->task_comm_len, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +	ret = cr_read_string(ctx, buf, hh->task_comm_len);

Here I am getting confused about return values.  

cr_read_obj() says it returns size of payload.

cr_read_obj_type() says it returns "object reference of the parent
object", though if cr_read_obj() returned non-zero then it returns its
return value, which could be size of object read.

cr_read_string() returns cr_read_obj_type()'s return value.

Now here you only copy buf into t->comm if return value was 0.

Am I misreading?  Or is this code wrong?

Annoying as it may be (causing patch conflicts in patches 3-9), could
you please put the return values of all these functions in the comments
above them?

> +	if (!ret) {
> +		/* if t->comm is too long, silently truncate */
> +		memset(t->comm, 0, TASK_COMM_LEN);
> +		memcpy(t->comm, buf, min(hh->task_comm_len, TASK_COMM_LEN));
> +	}
> +	kfree(buf);
> +
> +	/* FIXME: restore remaining relevant task_struct fields */
> +
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	return ret;
> +}
> +
> +/* read the entire state of the current task */
> +static int cr_read_task(struct cr_ctx *ctx)
> +{
> +	int ret;
> +
> +	ret = cr_read_task_struct(ctx);
> +	cr_debug("ret %d\n", ret);
> +
> +	return ret;
> +}
> +
> +int do_restart(struct cr_ctx *ctx)
> +{
> +	int ret;
> +
> +	ret = cr_read_head(ctx);
> +	if (ret < 0)
> +		goto out;
> +	ret = cr_read_task(ctx);
> +	if (ret < 0)
> +		goto out;
> +	ret = cr_read_tail(ctx);
> +	if (ret < 0)
> +		goto out;
> +
> +	/* on success, adjust the return value if needed [TODO] */
> + out:
> +	return ret;
> +}
> diff --git a/checkpoint/sys.c b/checkpoint/sys.c
> index 375129c..30863c6 100644
> --- a/checkpoint/sys.c
> +++ b/checkpoint/sys.c
> @@ -10,6 +10,189 @@
> 
>  #include <linux/sched.h>
>  #include <linux/kernel.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include <linux/uaccess.h>
> +#include <linux/capability.h>
> +#include <linux/checkpoint.h>
> +
> +/*
> + * helpers to write/read to/from the image file descriptor
> + *
> + *   cr_uwrite() - write a user-space buffer to the checkpoint image
> + *   cr_kwrite() - write a kernel-space buffer to the checkpoint image
> + *   cr_uread() - read from the checkpoint image to a user-space buffer
> + *   cr_kread() - read from the checkpoint image to a kernel-space buffer
> + */
> +
> +/*
> + * FIXME: (temporarily added file_pos_read() and file_pos_write() because
> + * they are static in fs/read_write.c... should cleanup and remove later)
> + */
> +static inline loff_t file_pos_read(struct file *file)
> +{
> +	return file->f_pos;
> +}
> +
> +static inline void file_pos_write(struct file *file, loff_t pos)
> +{
> +	file->f_pos = pos;
> +}
> +
> +int cr_uwrite(struct cr_ctx *ctx, void *buf, int count)
> +{
> +	struct file *file = ctx->file;
> +	ssize_t nwrite;
> +	int nleft;
> +
> +	for (nleft = count; nleft; nleft -= nwrite) {
> +		loff_t pos = file_pos_read(file);
> +		nwrite = vfs_write(file, (char __user *) buf, nleft, &pos);
> +		file_pos_write(file, pos);
> +		if (nwrite <= 0) {
> +			if (nwrite == -EAGAIN)
> +				nwrite = 0;
> +			else
> +				return nwrite;
> +		}
> +		buf += nwrite;
> +	}
> +
> +	ctx->total += count;
> +	return 0;
> +}
> +
> +int cr_kwrite(struct cr_ctx *ctx, void *buf, int count)
> +{
> +	mm_segment_t oldfs;
> +	int ret;
> +
> +	oldfs = get_fs();
> +	set_fs(KERNEL_DS);
> +	ret = cr_uwrite(ctx, buf, count);
> +	set_fs(oldfs);
> +
> +	return ret;
> +}
> +
> +int cr_uread(struct cr_ctx *ctx, void *buf, int count)
> +{
> +	struct file *file = ctx->file;
> +	ssize_t nread;
> +	int nleft;
> +
> +	for (nleft = count; nleft; nleft -= nread) {
> +		loff_t pos = file_pos_read(file);
> +		nread = vfs_read(file, (char __user *) buf, nleft, &pos);
> +		file_pos_write(file, pos);
> +		if (nread <= 0) {
> +			if (nread == -EAGAIN)
> +				nread = 0;
> +			else
> +				return nread;
> +		}
> +		buf += nread;
> +	}
> +
> +	ctx->total += count;
> +	return 0;
> +}
> +
> +int cr_kread(struct cr_ctx *ctx, void *buf, int count)
> +{
> +	mm_segment_t oldfs;
> +	int ret;
> +
> +	oldfs = get_fs();
> +	set_fs(KERNEL_DS);
> +	ret = cr_uread(ctx, buf, count);
> +	set_fs(oldfs);
> +
> +	return ret;
> +}
> +
> +
> +/*
> + * helpers to manage CR contexts: allocated for each checkpoint and/or
> + * restart operation, and persists until the operation is completed.
> + */
> +
> +/* unique checkpoint identifier (FIXME: should be per-container) */
> +static atomic_t cr_ctx_count;
> +
> +void cr_ctx_free(struct cr_ctx *ctx)
> +{
> +	if (ctx->file)
> +		fput(ctx->file);
> +
> +	free_pages((unsigned long) ctx->hbuf, CR_HBUF_ORDER);
> +
> +	kfree(ctx);
> +}
> +
> +struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
> +{
> +	struct cr_ctx *ctx;
> +
> +	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ctx->file = fget(fd);
> +	if (!ctx->file) {
> +		cr_ctx_free(ctx);
> +		return ERR_PTR(-EBADF);
> +	}
> +
> +	ctx->hbuf = (void *) __get_free_pages(GFP_KERNEL, CR_HBUF_ORDER);
> +	if (!ctx->hbuf) {
> +		cr_ctx_free(ctx);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	ctx->pid = pid;
> +	ctx->flags = flags;
> +
> +	ctx->crid = atomic_inc_return(&cr_ctx_count);
> +
> +	return ctx;
> +}
> +
> +/*
> + * During checkpoint and restart the code writes outs/reads in data
> + * to/from the chekcpoint image from/to a temporary buffer (ctx->hbuf).
> + * Because operations can be nested, one should call cr_hbuf_get() to
> + * reserve space in the buffer, and then cr_hbuf_put() when no longer
> + * needs that space.
> + */
> +
> +/**
> + * cr_hbuf_get - reserve space on the hbuf
> + * @ctx: checkpoint context
> + * @n: number of bytes to reserve
> + *
> + * Returns pointer to reserved space
> + */
> +void *cr_hbuf_get(struct cr_ctx *ctx, int n)
> +{
> +	void *ptr;
> +
> +	BUG_ON(ctx->hpos + n > CR_HBUF_TOTAL);
> +	ptr = (void *) (((char *) ctx->hbuf) + ctx->hpos);
> +	ctx->hpos += n;
> +	return ptr;
> +}
> +
> +/**
> + * cr_hbuf_put - unreserve space on the hbuf
> + * @ctx: checkpoint context
> + * @n: number of bytes to reserve
> + */
> +void cr_hbuf_put(struct cr_ctx *ctx, int n)
> +{
> +	BUG_ON(ctx->hpos < n);
> +	ctx->hpos -= n;
> +}
> 
>  /**
>   * sys_checkpoint - checkpoint a container
> @@ -22,9 +205,23 @@
>   */
>  asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned long flags)
>  {
> -	pr_debug("sys_checkpoint not implemented yet\n");
> -	return -ENOSYS;
> +	struct cr_ctx *ctx;
> +	int ret;
> +
> +	/* no flags for now */
> +	if (flags)
> +		return -EINVAL;
> +
> +	ctx = cr_ctx_alloc(pid, fd, flags | CR_CTX_CKPT);
> +	if (IS_ERR(ctx))
> +		return PTR_ERR(ctx);
> +
> +	ret = do_checkpoint(ctx);
> +
> +	cr_ctx_free(ctx);
> +	return ret;
>  }
> +
>  /**
>   * sys_restart - restart a container
>   * @crid: checkpoint image identifier
> @@ -36,6 +233,19 @@ asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned long flags)
>   */
>  asmlinkage long sys_restart(int crid, int fd, unsigned long flags)
>  {
> -	pr_debug("sys_restart not implemented yet\n");
> -	return -ENOSYS;
> +	struct cr_ctx *ctx;
> +	int ret;
> +
> +	/* no flags for now */
> +	if (flags)
> +		return -EINVAL;
> +
> +	ctx = cr_ctx_alloc(crid, fd, flags | CR_CTX_RSTR);
> +	if (IS_ERR(ctx))
> +		return PTR_ERR(ctx);
> +
> +	ret = do_restart(ctx);
> +
> +	cr_ctx_free(ctx);
> +	return ret;
>  }
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> new file mode 100644
> index 0000000..5e53ae6
> --- /dev/null
> +++ b/include/linux/checkpoint.h
> @@ -0,0 +1,60 @@
> +#ifndef _CHECKPOINT_CKPT_H_
> +#define _CHECKPOINT_CKPT_H_
> +/*
> + *  Generic container checkpoint-restart
> + *
> + *  Copyright (C) 2008 Oren Laadan
> + *
> + *  This file is subject to the terms and conditions of the GNU General Public
> + *  License.  See the file COPYING in the main directory of the Linux
> + *  distribution for more details.
> + */
> +
> +#define CR_VERSION  1
> +
> +struct cr_ctx {
> +	pid_t pid;		/* container identifier */
> +	int crid;		/* unique checkpoint id */
> +
> +	unsigned long flags;
> +	unsigned long oflags;	/* restart: old flags */
> +
> +	struct file *file;
> +	int total;		/* total read/written */
> +
> +	void *hbuf;		/* temporary buffer for headers */
> +	int hpos;		/* position in headers buffer */
> +};
> +
> +/* cr_ctx: flags */
> +#define CR_CTX_CKPT	0x1
> +#define CR_CTX_RSTR	0x2
> +
> +/* allocation defaults */
> +#define CR_HBUF_ORDER  1
> +#define CR_HBUF_TOTAL  (PAGE_SIZE << CR_HBUF_ORDER)
> +
> +extern int cr_uwrite(struct cr_ctx *ctx, void *buf, int count);
> +extern int cr_kwrite(struct cr_ctx *ctx, void *buf, int count);
> +extern int cr_uread(struct cr_ctx *ctx, void *buf, int count);
> +extern int cr_kread(struct cr_ctx *ctx, void *buf, int count);
> +
> +extern void *cr_hbuf_get(struct cr_ctx *ctx, int n);
> +extern void cr_hbuf_put(struct cr_ctx *ctx, int n);
> +
> +struct cr_hdr;
> +
> +extern int cr_write_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf);
> +extern int cr_write_string(struct cr_ctx *ctx, char *str, int len);
> +
> +extern int cr_read_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf, int n);
> +extern int cr_read_obj_type(struct cr_ctx *ctx, void *buf, int n, int type);
> +extern int cr_read_string(struct cr_ctx *ctx, void *str, int len);
> +
> +extern int do_checkpoint(struct cr_ctx *ctx);
> +extern int do_restart(struct cr_ctx *ctx);
> +
> +#define cr_debug(fmt, args...)  \
> +	pr_debug("[CR:%s] " fmt, __func__, ## args)
> +
> +#endif /* _CHECKPOINT_CKPT_H_ */
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> new file mode 100644
> index 0000000..79e4df2
> --- /dev/null
> +++ b/include/linux/checkpoint_hdr.h
> @@ -0,0 +1,75 @@
> +#ifndef _CHECKPOINT_CKPT_HDR_H_
> +#define _CHECKPOINT_CKPT_HDR_H_
> +/*
> + *  Generic container checkpoint-restart
> + *
> + *  Copyright (C) 2008 Oren Laadan
> + *
> + *  This file is subject to the terms and conditions of the GNU General Public
> + *  License.  See the file COPYING in the main directory of the Linux
> + *  distribution for more details.
> + */
> +
> +#include <linux/types.h>
> +#include <linux/utsname.h>
> +
> +/*
> + * To maintain compatibility between 32-bit and 64-bit architecture flavors,
> + * keep data 64-bit aligned: use padding for structure members, and use
> + * __attribute__ ((aligned (8))) for the entire structure.
> + */
> +
> +/* records: generic header */
> +
> +struct cr_hdr {
> +	__s16 type;
> +	__s16 len;
> +	__u32 parent;
> +};
> +
> +/* header types */
> +enum {
> +	CR_HDR_HEAD = 1,
> +	CR_HDR_STRING,
> +
> +	CR_HDR_TASK = 101,
> +	CR_HDR_THREAD,
> +	CR_HDR_CPU,
> +
> +	CR_HDR_MM = 201,
> +	CR_HDR_VMA,
> +	CR_HDR_MM_CONTEXT,
> +
> +	CR_HDR_TAIL = 5001
> +};
> +
> +struct cr_hdr_head {
> +	__u64 magic;
> +
> +	__u16 major;
> +	__u16 minor;
> +	__u16 patch;
> +	__u16 rev;
> +
> +	__u64 time;	/* when checkpoint taken */
> +	__u64 flags;	/* checkpoint options */
> +
> +	char release[__NEW_UTS_LEN];
> +	char version[__NEW_UTS_LEN];
> +	char machine[__NEW_UTS_LEN];
> +} __attribute__((aligned(8)));
> +
> +struct cr_hdr_tail {
> +	__u64 magic;
> +} __attribute__((aligned(8)));
> +
> +struct cr_hdr_task {
> +	__u32 state;
> +	__u32 exit_state;
> +	__u32 exit_code;
> +	__u32 exit_signal;
> +
> +	__s32 task_comm_len;
> +} __attribute__((aligned(8)));
> +
> +#endif /* _CHECKPOINT_CKPT_HDR_H_ */
> diff --git a/include/linux/magic.h b/include/linux/magic.h
> index 1fa0c2c..c2b811c 100644
> --- a/include/linux/magic.h
> +++ b/include/linux/magic.h
> @@ -42,4 +42,7 @@
>  #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
>  #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
> 
> +#define CHECKPOINT_MAGIC_HEAD  0x00feed0cc0a2d200LL
> +#define CHECKPOINT_MAGIC_TAIL  0x002d2a0cc0deef00LL
> +
>  #endif /* __LINUX_MAGIC_H__ */
> -- 
> 1.5.4.3
> 
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers

  parent reply	other threads:[~2008-09-15 21:33 UTC|newest]

Thread overview: 87+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-13 23:05 [RFC v5][PATCH 0/9] Kernel based checkpoint/restart Oren Laadan
2008-09-13 23:06 ` [RFC v5][PATCH 2/8] General infrastructure for checkpoint restart Oren Laadan
2008-09-15 17:54   ` Dave Hansen
2008-09-15 17:59   ` Dave Hansen
2008-09-15 18:00   ` Dave Hansen
2008-09-15 18:02   ` Dave Hansen
2008-09-15 18:52     ` Oren Laadan
2008-09-15 18:52     ` Oren Laadan
     [not found]       ` <48CEAEF2.1050901-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-15 19:13         ` Dave Hansen
2008-09-15 19:13       ` Dave Hansen
2008-09-16 12:27     ` Bastian Blank
2008-09-16 12:27     ` Bastian Blank
     [not found]   ` <1221347167-9956-3-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-15 17:54     ` Dave Hansen
2008-09-15 17:59     ` Dave Hansen
2008-09-15 18:00     ` Dave Hansen
2008-09-15 18:02     ` Dave Hansen
2008-09-15 21:15     ` Serge E. Hallyn
2008-09-15 21:15   ` Serge E. Hallyn [this message]
2008-09-13 23:06 ` [RFC v5][PATCH 4/8] Dump memory address space Oren Laadan
2008-09-17  6:48   ` MinChan Kim
     [not found]   ` <1221347167-9956-5-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-17  6:48     ` MinChan Kim
     [not found] ` <1221347167-9956-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-13 23:05   ` [RFC v5][PATCH 1/8] Create syscalls: sys_checkpoint, sys_restart Oren Laadan
2008-09-13 23:05     ` Oren Laadan
     [not found]     ` <1221347167-9956-2-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-15 20:28       ` Serge E. Hallyn
2008-09-15 20:28         ` Serge E. Hallyn
2008-09-13 23:06   ` [RFC v5][PATCH 2/8] General infrastructure for checkpoint restart Oren Laadan
2008-09-13 23:06   ` [RFC v5][PATCH 3/8] x86 support for checkpoint/restart Oren Laadan
2008-09-13 23:06     ` Oren Laadan
     [not found]     ` <1221347167-9956-4-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-15 21:31       ` Serge E. Hallyn
2008-09-15 21:31         ` Serge E. Hallyn
2008-09-13 23:06   ` [RFC v5][PATCH 4/8] Dump memory address space Oren Laadan
2008-09-13 23:06   ` [RFC v5][PATCH 5/8] Restore " Oren Laadan
2008-09-13 23:06     ` Oren Laadan
2008-09-15 19:14     ` Dave Hansen
     [not found]     ` <1221347167-9956-6-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-15 19:14       ` Dave Hansen
2008-09-13 23:06   ` [RFC v5][PATCH 6/8] Checkpoint/restart: initial documentation Oren Laadan
2008-09-13 23:06     ` Oren Laadan
     [not found]     ` <1221347167-9956-7-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-15 20:26       ` Serge E. Hallyn
2008-09-17  6:23       ` MinChan Kim
2008-09-15 20:26     ` Serge E. Hallyn
2008-09-17  6:23     ` MinChan Kim
2008-09-13 23:06   ` [RFC v5][PATCH 7/8] Infrastructure for shared objects Oren Laadan
2008-09-13 23:06     ` Oren Laadan
     [not found]     ` <1221347167-9956-8-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-16 16:48       ` Dave Hansen
2008-09-16 16:48         ` Dave Hansen
2008-09-17  7:31         ` MinChan Kim
2008-09-17  7:31         ` MinChan Kim
2008-09-16 20:54       ` Serge E. Hallyn
2008-09-16 20:54     ` Serge E. Hallyn
     [not found]       ` <20080916205459.GA7644-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-09-16 21:36         ` Oren Laadan
2008-09-16 21:36           ` Oren Laadan
     [not found]           ` <48D026ED.3080109-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-16 22:09             ` Serge E. Hallyn
2008-09-16 22:09           ` Serge E. Hallyn
2008-09-13 23:06   ` [RFC v5][PATCH 8/8] Dump open file descriptors Oren Laadan
2008-09-13 23:06     ` Oren Laadan
2008-09-14  9:51     ` Bastian Blank
     [not found]       ` <20080914095106.GA6300-0IJIQSrh9RL9UF0aPl6fsj8Kkb2uy4ct@public.gmane.org>
2008-09-14 15:40         ` Oren Laadan
2008-09-14 15:40       ` Oren Laadan
     [not found]         ` <48CD3069.7080200-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-16 23:03           ` Serge E. Hallyn
2008-09-16 23:03         ` Serge E. Hallyn
     [not found]           ` <20080916230320.GA25445-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-09-22 15:31             ` Dave Hansen
2008-09-22 15:31           ` Dave Hansen
2008-09-16 15:54     ` Dave Hansen
2008-09-16 16:55     ` Dave Hansen
     [not found]     ` <1221347167-9956-9-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-14  9:51       ` Bastian Blank
2008-09-16 15:54       ` Dave Hansen
2008-09-16 16:55       ` Dave Hansen
2008-09-13 23:06   ` [RFC v5][PATCH 9/9] Restore open file descriprtors Oren Laadan
2008-09-13 23:22   ` Oren Laadan
2008-09-17 14:16   ` [RFC v5][PATCH 0/9] Kernel based checkpoint/restart Serge E. Hallyn
2008-09-24 21:42   ` Serge E. Hallyn
2008-09-13 23:06 ` [RFC v5][PATCH 9/9] Restore open file descriprtors Oren Laadan
2008-09-16 23:08   ` Serge E. Hallyn
2008-09-17  0:11     ` Oren Laadan
     [not found]       ` <48D04B19.9060502-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-17  4:56         ` Serge E. Hallyn
2008-09-22 16:02         ` Dave Hansen
2008-09-22 16:02           ` Dave Hansen
2008-09-17  4:56       ` Serge E. Hallyn
     [not found]     ` <20080916230850.GB25445-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-09-17  0:11       ` Oren Laadan
     [not found]   ` <1221347167-9956-10-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-09-16 23:08     ` Serge E. Hallyn
2008-09-13 23:22 ` Oren Laadan
2008-09-17 14:16 ` [RFC v5][PATCH 0/9] Kernel based checkpoint/restart Serge E. Hallyn
2008-10-08  9:59   ` Oren Laadan
     [not found]   ` <20080917141601.GA14010-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-10-08  9:59     ` Oren Laadan
2008-09-24 21:42 ` Serge E. Hallyn
     [not found]   ` <20080924214242.GA27875-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-09-25 12:58     ` Cedric Le Goater
2008-09-25 12:58   ` Cedric Le Goater

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080915211525.GC28683@us.ibm.com \
    --to=serue@us.ibm.com \
    --cc=arnd@arndb.de \
    --cc=containers@lists.linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=jeremy@goop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=orenl@cs.columbia.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.