From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
To: "Serge E. Hallyn" <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: Linux Containers <containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org>
Subject: Re: [PATCH] Restore task fs_root and pwd (v2)
Date: Wed, 20 Jan 2010 15:28:13 -0500 [thread overview]
Message-ID: <4B57675D.8080609@cs.columbia.edu> (raw)
In-Reply-To: <20091228183948.GA19788-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Very necessary !
See comments inline.
Serge E. Hallyn wrote:
> Checkpoint and restore task->fs. Tasks sharing task->fs will
> share them again after restart.
>
> Changelog:
> Dec 28: Addressed comments by Oren (and Dave)
> 1. define and use {get,put}_fs_struct helpers
> 2. fix locking comment
> 3. define ckpt_read_fname() and use in checkpoint/files.c
>
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
> checkpoint/files.c | 197 +++++++++++++++++++++++++++++++++++++++-
> checkpoint/objhash.c | 9 ++
> checkpoint/process.c | 13 +++
> fs/fs_struct.c | 21 ++++
> fs/open.c | 53 ++++++-----
> include/linux/checkpoint.h | 10 ++-
> include/linux/checkpoint_hdr.h | 10 ++
> include/linux/fs.h | 4 +
> include/linux/fs_struct.h | 2 +
> 9 files changed, 293 insertions(+), 26 deletions(-)
>
> diff --git a/checkpoint/files.c b/checkpoint/files.c
> index b622588..d6cf945 100644
> --- a/checkpoint/files.c
> +++ b/checkpoint/files.c
> @@ -24,6 +24,9 @@
> #include <linux/checkpoint_hdr.h>
> #include <linux/eventpoll.h>
> #include <linux/eventfd.h>
> +#include <linux/fs.h>
> +#include <linux/fs_struct.h>
> +#include <linux/namei.h>
> #include <net/sock.h>
>
>
> @@ -449,10 +452,81 @@ int ckpt_collect_file_table(struct ckpt_ctx *ctx, struct task_struct *t)
> return ret;
> }
>
> +int checkpoint_get_task_fs(struct ckpt_ctx *ctx, struct task_struct *t)
Renaming these to {checkpoint/restore}_obj_filesystem() will be
consistent with existing naming convention there.
> +{
> + struct fs_struct *fs;
> + int fs_objref;
> +
> + task_lock(current);
> + fs = t->fs;
> + get_fs_struct(fs);
> + task_unlock(current);
> +
> + fs_objref = checkpoint_obj(ctx, fs, CKPT_OBJ_TASK_FS);
> + put_fs_struct(fs);
> +
> + return fs_objref;
> +}
> +
> +/*
> + * called with fs refcount bumped so it won't disappear
> + */
> +int checkpoint_obj_task_fs(struct ckpt_ctx *ctx, struct fs_struct *fs)
> +{
> + struct ckpt_hdr_task_fs *h;
> + int ret;
> + struct fs_struct *fscopy;
> +
> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_FS);
> + if (!h)
> + return -ENOMEM;
> + ret = ckpt_write_obj(ctx, &h->h);
> + ckpt_hdr_put(ctx, h);
> + if (ret)
> + return ret;
> +
> + fscopy = copy_fs_struct(fs);
> + if (!fs)
> + return -ENOMEM;
> +
Reverse the order below: cwd first then root - this will allow
the cwd to be outside of the current chroot, in the case that
the task escaped the chroot.
> + ret = checkpoint_fname(ctx, &fscopy->root, &ctx->fs_mnt);
> + if (ret < 0) {
> + ckpt_err(ctx, ret, "%(T)writing name of fs root");
> + goto out;
> + }
> + ret = checkpoint_fname(ctx, &fscopy->pwd, &ctx->fs_mnt);
> + if (ret < 0) {
> + ckpt_err(ctx, ret, "%(T)writing name of pwd");
> + goto out;
> + }
> + ret = 0;
[...]
> +/* this is the fn called by objhash when it runs into a
> + * CKPT_OBJ_TASK_FS entry. Creates an fs_struct and
> + * places it in the hash. */
> +static struct fs_struct *restore_obj_task_fs(struct ckpt_ctx *ctx)
> +{
> + struct ckpt_hdr_task_fs *h;
> + struct fs_struct *fs;
> + int ret = 0;
> + char *root, *cwd;
> +
> + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_FS);
> + if (IS_ERR(h))
> + return ERR_PTR(PTR_ERR(h));
> + ckpt_hdr_put(ctx, h);
> +
> + fs = copy_fs_struct(current->fs);
> + if (!fs)
> + return ERR_PTR(-ENOMEM);
> +
Please reverse the order below: restore cwd before chroot.
> + ret = ckpt_read_fname(ctx, &root);
> + if (ret < 0)
> + goto out;
> + ret = restore_chroot(ctx, fs, root);
> + kfree(root);
> + if (ret)
> + goto out;
> +
> + ret = ckpt_read_fname(ctx, &cwd);
> + if (ret < 0)
> + goto out;
> + ret = restore_cwd(ctx, fs, cwd);
> + kfree(cwd);
> +
> +out:
> + if (ret) {
> + free_fs_struct(fs);
> + return ERR_PTR(ret);
> + }
> + return fs;
> +}
> +
> +void *restore_task_fs(struct ckpt_ctx *ctx)
> +{
> + return (void *) restore_obj_task_fs(ctx);
> +}
> +
> +/*
> + * Called by task restore code to set the restarted task's
> + * current->fs to an entry on the hash
> + */
> +int restore_set_task_fs(struct ckpt_ctx *ctx, int fs_objref)
(Please rename this one, too)
> +{
> + struct fs_struct *newfs, *oldfs;
> +
> + newfs = ckpt_obj_fetch(ctx, fs_objref, CKPT_OBJ_TASK_FS);
> + if (IS_ERR(newfs))
> + return PTR_ERR(newfs);
> +
> + task_lock(current);
> + get_fs_struct(newfs);
> + oldfs = current->fs;
> + current->fs = newfs;
> + task_unlock(current);
> + put_fs_struct(oldfs);
> +
> + return 0;
> +}
> diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
> index 782661d..20fd3e9 100644
> --- a/checkpoint/objhash.c
> +++ b/checkpoint/objhash.c
> @@ -417,6 +417,15 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
> .checkpoint = checkpoint_userns,
> .restore = restore_userns,
> },
> + /* struct fs_struct */
> + {
> + .obj_name = "TASK_FS",
> + .obj_type = CKPT_OBJ_TASK_FS,
> + .ref_drop = obj_task_fs_drop,
> + .ref_grab = obj_task_fs_grab,
> + .checkpoint = checkpoint_task_fs,
> + .restore = restore_task_fs,
I think we also need a .collect method to prevent leaks ?
> + },
> /* struct cred */
> {
> .obj_name = "CRED",
> diff --git a/checkpoint/process.c b/checkpoint/process.c
> index 9c0463d..603bbf4 100644
> --- a/checkpoint/process.c
> +++ b/checkpoint/process.c
> @@ -234,6 +234,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
> int mm_objref;
> int sighand_objref;
> int signal_objref;
> + int fs_objref;
> int first, ret;
>
> /*
> @@ -253,6 +254,12 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
> if (ret < 0)
> return ret;
>
> + fs_objref = checkpoint_get_task_fs(ctx, t);
> + if (fs_objref < 0) {
> + ckpt_err(ctx, fs_objref, "%(T)process fs\n");
> + return fs_objref;
> + }
> +
> files_objref = checkpoint_obj_file_table(ctx, t);
The fs should be saved and restored after the file-table, or otherwise
I'd expect files that were open before a task changed called chroot()
to be unreachable.
> ckpt_debug("files: objref %d\n", files_objref);
> if (files_objref < 0) {
> @@ -294,6 +301,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
> h->mm_objref = mm_objref;
> h->sighand_objref = sighand_objref;
> h->signal_objref = signal_objref;
> + h->fs_objref = fs_objref;
> ret = ckpt_write_obj(ctx, &h->h);
> ckpt_hdr_put(ctx, h);
> if (ret < 0)
> @@ -628,6 +636,11 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
> return PTR_ERR(h);
> }
>
> + ret = restore_set_task_fs(ctx, h->fs_objref);
> + ckpt_debug("restore_task_fs returned %d\n", ret);
> + if (ret < 0)
> + return ret;
> +
> ret = restore_obj_file_table(ctx, h->files_objref);
> ckpt_debug("file_table: ret %d (%p)\n", ret, current->files);
> if (ret < 0)
Here too.
> diff --git a/fs/fs_struct.c b/fs/fs_struct.c
> index eee0590..2a4c6f5 100644
> --- a/fs/fs_struct.c
> +++ b/fs/fs_struct.c
> @@ -6,6 +6,27 @@
> #include <linux/fs_struct.h>
>
> /*
> + * call with owning task locked
> + */
> +void get_fs_struct(struct fs_struct *fs)
> +{
> + write_lock(&fs->lock);
> + fs->users++;
> + write_unlock(&fs->lock);
> +}
> +
> +void put_fs_struct(struct fs_struct *fs)
> +{
> + int kill;
> +
> + write_lock(&fs->lock);
> + kill = !--fs->users;
> + write_unlock(&fs->lock);
> + if (kill)
> + free_fs_struct(fs);
> +}
I didn't look everywhere, however - could there be other users of
these two elsewhere in the kernel ?
> +
> +/*
> * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
> * It can block.
> */
[...]
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 5ae34fc..0efa62a 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1819,9 +1819,13 @@ extern struct vfsmount *collect_mounts(struct path *);
> extern void drop_collected_mounts(struct vfsmount *);
>
> extern int vfs_statfs(struct dentry *, struct kstatfs *);
> +struct fs_struct;
> +extern int do_chdir(struct fs_struct *fs, struct path *path);
> +extern int do_chroot(struct fs_struct *fs, struct path *path);
>
> extern int current_umask(void);
>
> +
:(
> /* /sys/fs */
> extern struct kobject *fs_kobj;
>
> diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
> index 78a05bf..a73cbcb 100644
> --- a/include/linux/fs_struct.h
> +++ b/include/linux/fs_struct.h
> @@ -20,5 +20,7 @@ extern struct fs_struct *copy_fs_struct(struct fs_struct *);
> extern void free_fs_struct(struct fs_struct *);
> extern void daemonize_fs_struct(void);
> extern int unshare_fs_struct(void);
> +extern void get_fs_struct(struct fs_struct *);
> +extern void put_fs_struct(struct fs_struct *);
>
> #endif /* _LINUX_FS_STRUCT_H */
Thanks,
Oren.
next prev parent reply other threads:[~2010-01-20 20:28 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-12-28 18:39 [PATCH] Restore task fs_root and pwd (v2) Serge E. Hallyn
[not found] ` <20091228183948.GA19788-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-12-29 2:58 ` Serge E. Hallyn
[not found] ` <20091229025846.GA16492-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-12-29 19:01 ` Serge E. Hallyn
2010-01-20 20:28 ` Oren Laadan [this message]
[not found] ` <4B57675D.8080609-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-01-20 20:47 ` Oren Laadan
[not found] ` <4B576BF3.9030506-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-01-20 20:55 ` Oren Laadan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B57675D.8080609@cs.columbia.edu \
--to=orenl-eqauephvms7envbuuze7ea@public.gmane.org \
--cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
--cc=serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.