All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] c/r: Add UTS support (v3)
@ 2009-03-17 16:08 Dan Smith
       [not found] ` <1237306139-23075-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Dan Smith @ 2009-03-17 16:08 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg; +Cc: adobriyan-Re5JQEeQqe8AvxtiuMwx3w

This patch adds a "phase" of checkpoint that saves out information about any
namespaces the task(s) may have.  Do this by tracking the nsproxy of the
first task and making sure that the tasks that follow get hooked back to
share the same one on restart.

Restart is handled in userspace by reading the UTS record(s), calling
unshare() and setting the hostname accordingly.  See my changes to
mktree.c for details.

I tested this with single and multiple task restore, on top of Oren's
v13 tree.

Changes:
  - Remove the kernel restore path
  - Punt on nested namespaces
  - Use __NEW_UTS_LEN in nodename and domainname buffers
  - Add a note to Documentation/checkpoint/internals.txt to indicate where
    in the save/restore process the UTS information is kept

Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 Documentation/checkpoint/internals.txt |    2 +
 checkpoint/checkpoint.c                |   80 ++++++++++++++++++++++++++++++++
 checkpoint/objhash.c                   |    7 +++
 checkpoint/restart.c                   |    1 +
 include/linux/checkpoint.h             |    1 +
 include/linux/checkpoint_hdr.h         |   15 ++++++
 6 files changed, 106 insertions(+), 0 deletions(-)

diff --git a/Documentation/checkpoint/internals.txt b/Documentation/checkpoint/internals.txt
index b363e83..7a2488b 100644
--- a/Documentation/checkpoint/internals.txt
+++ b/Documentation/checkpoint/internals.txt
@@ -12,6 +12,8 @@ The order of operations, both save and restore, is as follows:
 
 * Process forest: [TBD] tasks and their relationships
 
+* Namespace section: per-container namespace information
+
 * Per task data (for each task):
   -> task state: elements of task_struct
   -> thread state: elements of thread_struct and thread_info
diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index 64155de..12eb1d5 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -193,6 +193,82 @@ static int cr_write_tail(struct cr_ctx *ctx)
 	return ret;
 }
 
+static int cr_write_ns_uts(struct cr_ctx *ctx, struct task_struct *t)
+{
+	struct cr_hdr h;
+	struct cr_hdr_utsns *hh = cr_hbuf_get(ctx, sizeof(*hh));
+	struct new_utsname *n = &t->nsproxy->uts_ns->name;
+	int ret;
+
+	h.type = CR_HDR_UTSNS;
+	h.len = sizeof(*hh);
+	h.parent = 0;
+
+	memcpy(hh->nodename, n->nodename, sizeof(n->nodename));
+	memcpy(hh->domainname, n->domainname, sizeof(n->domainname));
+
+	ret = cr_write_obj(ctx, &h, hh);
+	cr_hbuf_put(ctx, sizeof(*hh));
+
+	return ret;
+}
+
+static int cr_write_namespaces(struct cr_ctx *ctx, struct task_struct *t)
+{
+	struct cr_hdr h;
+	struct cr_hdr_nsproxy *hh = cr_hbuf_get(ctx, sizeof(*hh));
+	struct nsproxy *nsp = t->nsproxy;
+	int ret;
+	int new;
+
+	if (nsp != ctx->root_nsproxy) {
+		/* Don't allow checkpoint of nested namespaces yet */
+		pr_debug("Task has different nsproxy than root\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	h.type = CR_HDR_NSP;
+	h.len = sizeof(*hh);
+	h.parent = 0;
+
+	new = cr_obj_add_ptr(ctx, nsp, &hh->objref, CR_OBJ_NSP, 0);
+	if (new)
+		hh->types = CR_NSP_UTS; /* Record types we support */
+	else
+		hh->types = 0;
+
+	ret = cr_write_obj(ctx, &h, hh);
+	if (ret)
+		goto out;
+
+	if (new) {
+		ret = cr_write_ns_uts(ctx, t);
+		if (ret < 0)
+			goto out;
+
+		/* FIXME: Write other namespaces here */
+	}
+ out:
+	cr_hbuf_put(ctx, sizeof(*hh));
+
+	return ret;
+}
+
+static int cr_write_all_namespaces(struct cr_ctx *ctx)
+{
+	int n, ret = 0;
+
+	for (n = 0; n < ctx->tasks_nr; n++) {
+		pr_debug("dumping ns for task #%d\n", n);
+		ret = cr_write_namespaces(ctx, ctx->tasks_arr[n]);
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
+
 /* dump the task_struct of a given task */
 static int cr_write_task_struct(struct cr_ctx *ctx, struct task_struct *t)
 {
@@ -549,6 +625,10 @@ int do_checkpoint(struct cr_ctx *ctx, pid_t pid)
 	if (ret < 0)
 		goto out;
 
+	ret = cr_write_all_namespaces(ctx);
+	if (ret < 0)
+		goto out;
+
 	ret = cr_write_all_tasks(ctx);
 	if (ret < 0)
 		goto out;
diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index ee31b38..aaaf583 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -12,6 +12,7 @@
 #include <linux/file.h>
 #include <linux/hash.h>
 #include <linux/checkpoint.h>
+#include <linux/utsname.h>
 
 struct cr_objref {
 	int objref;
@@ -35,6 +36,9 @@ static void cr_obj_ref_drop(struct cr_objref *obj)
 	case CR_OBJ_FILE:
 		fput((struct file *) obj->ptr);
 		break;
+	case CR_OBJ_NSP:
+		put_nsproxy((struct nsproxy *) obj->ptr);
+		break;
 	default:
 		BUG();
 	}
@@ -46,6 +50,9 @@ static void cr_obj_ref_grab(struct cr_objref *obj)
 	case CR_OBJ_FILE:
 		get_file((struct file *) obj->ptr);
 		break;
+	case CR_OBJ_NSP:
+		get_nsproxy((struct nsproxy *) obj->ptr);
+		break;
 	default:
 		BUG();
 	}
diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 7ec4de4..0ed01aa 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -15,6 +15,7 @@
 #include <linux/magic.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
+#include <linux/utsname.h>
 
 #include "checkpoint_arch.h"
 
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 217cf6e..5966275 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -75,6 +75,7 @@ extern void cr_ctx_put(struct cr_ctx *ctx);
 
 enum {
 	CR_OBJ_FILE = 1,
+	CR_OBJ_NSP,
 	CR_OBJ_MAX
 };
 
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 6dc739f..1a2fc81 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -49,6 +49,8 @@ enum {
 	CR_HDR_TASK,
 	CR_HDR_THREAD,
 	CR_HDR_CPU,
+	CR_HDR_NSP,
+	CR_HDR_UTSNS,
 
 	CR_HDR_MM = 201,
 	CR_HDR_VMA,
@@ -156,4 +158,17 @@ struct cr_hdr_fd_data {
 	__u64 f_version;
 } __attribute__((aligned(8)));
 
+#define CR_NSP_UTS 1
+
+struct cr_hdr_nsproxy {
+	__u32 objref;
+	__u32 types;
+};
+
+struct cr_hdr_utsns {
+	/* Both of these fields are defined as 65-chars long */
+	char nodename[__NEW_UTS_LEN+1];
+	char domainname[__NEW_UTS_LEN+1];
+};
+
 #endif /* _CHECKPOINT_CKPT_HDR_H_ */
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] c/r: Add UTS support (v3)
       [not found] ` <1237306139-23075-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-03-18  9:22   ` Oren Laadan
       [not found]     ` <49C0BD63.7000309-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Oren Laadan @ 2009-03-18  9:22 UTC (permalink / raw)
  To: Dan Smith
  Cc: containers-qjLDD68F18O7TbgM5vRIOg,
	adobriyan-Re5JQEeQqe8AvxtiuMwx3w



Dan Smith wrote:
> This patch adds a "phase" of checkpoint that saves out information about any
> namespaces the task(s) may have.  Do this by tracking the nsproxy of the
> first task and making sure that the tasks that follow get hooked back to
> share the same one on restart.
> 
> Restart is handled in userspace by reading the UTS record(s), calling
> unshare() and setting the hostname accordingly.  See my changes to
> mktree.c for details.
> 
> I tested this with single and multiple task restore, on top of Oren's
> v13 tree.

Thanks, Dan.

[..]

> diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
> index 64155de..12eb1d5 100644
> --- a/checkpoint/checkpoint.c
> +++ b/checkpoint/checkpoint.c
> @@ -193,6 +193,82 @@ static int cr_write_tail(struct cr_ctx *ctx)
>  	return ret;
>  }
>  
> +static int cr_write_ns_uts(struct cr_ctx *ctx, struct task_struct *t)
> +{
> +	struct cr_hdr h;
> +	struct cr_hdr_utsns *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	struct new_utsname *n = &t->nsproxy->uts_ns->name;
> +	int ret;
> +
> +	h.type = CR_HDR_UTSNS;
> +	h.len = sizeof(*hh);
> +	h.parent = 0;
> +
> +	memcpy(hh->nodename, n->nodename, sizeof(n->nodename));
> +	memcpy(hh->domainname, n->domainname, sizeof(n->domainname));
> +

The length of ->nodename etc may change in the future. It's probably
safer to also add the length of the field as well (see for instance
how task->comm is saved).

> +	ret = cr_write_obj(ctx, &h, hh);
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +
> +	return ret;
> +}

[...]

> @@ -156,4 +158,17 @@ struct cr_hdr_fd_data {
>  	__u64 f_version;
>  } __attribute__((aligned(8)));
>  
> +#define CR_NSP_UTS 1
> +
> +struct cr_hdr_nsproxy {
> +	__u32 objref;
> +	__u32 types;
> +};
> +
> +struct cr_hdr_utsns {
> +	/* Both of these fields are defined as 65-chars long */
> +	char nodename[__NEW_UTS_LEN+1];
> +	char domainname[__NEW_UTS_LEN+1];
> +};

add ?
	__u16 nodename_len;
	__u16 domainname_len;

> +
>  #endif /* _CHECKPOINT_CKPT_HDR_H_ */

Oren.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] c/r: Add UTS support (v3)
       [not found]     ` <49C0BD63.7000309-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2009-03-18 13:30       ` Dan Smith
  0 siblings, 0 replies; 3+ messages in thread
From: Dan Smith @ 2009-03-18 13:30 UTC (permalink / raw)
  To: Oren Laadan
  Cc: containers-qjLDD68F18O7TbgM5vRIOg,
	adobriyan-Re5JQEeQqe8AvxtiuMwx3w

OL> The length of ->nodename etc may change in the future. It's
OL> probably safer to also add the length of the field as well (see
OL> for instance how task->comm is saved).

Ah, good point.  I'll fix it up.

Thanks!

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-03-18 13:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-17 16:08 [PATCH] c/r: Add UTS support (v3) Dan Smith
     [not found] ` <1237306139-23075-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-03-18  9:22   ` Oren Laadan
     [not found]     ` <49C0BD63.7000309-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-03-18 13:30       ` Dan Smith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.