linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's
@ 2009-02-19 18:20 Dave Hansen
  2009-02-19 18:20 ` [RFC][PATCH 2/5] file c/r: expose functions to query fs support Dave Hansen
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Dave Hansen @ 2009-02-19 18:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: containers, linux-kernel@vger.kernel.org, Serge E. Hallyn,
	Oren Laadan, Alexey Dobriyan, Dave Hansen


There are plenty of filesystems that are not supported for
c/r at this point.  Think of things like hugetlbfs which
are externally visible or pipefs which are kernel-internal.

This provides a quick way to make the "normal" filesystems
which are currently supported.  This is also safe if any
new code gets added.  We assume that a fs is non-supported
unless someone takes explicit action to the contrary.

I bet there are some more filesystems that are OK, but
these probably cover 99% of the users for now.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---

 linux-2.6.git-dave/fs/ext2/super.c              |    2 +-
 linux-2.6.git-dave/fs/ext3/super.c              |    2 +-
 linux-2.6.git-dave/fs/ext4/super.c              |    4 ++--
 linux-2.6.git-dave/fs/fat/namei_msdos.c         |    2 +-
 linux-2.6.git-dave/fs/fat/namei_vfat.c          |    2 +-
 linux-2.6.git-dave/fs/jfs/super.c               |    2 +-
 linux-2.6.git-dave/fs/minix/inode.c             |    2 +-
 linux-2.6.git-dave/fs/reiserfs/super.c          |    2 +-
 linux-2.6.git-dave/fs/xfs/linux-2.6/xfs_super.c |    2 +-
 linux-2.6.git-dave/include/linux/fs.h           |    1 +
 10 files changed, 11 insertions(+), 10 deletions(-)

diff -puN fs/ext2/super.c~sb_op-for-checkpointable-files fs/ext2/super.c
--- linux-2.6.git/fs/ext2/super.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/ext2/super.c	2009-02-19 10:17:23.000000000 -0800
@@ -1401,7 +1401,7 @@ static struct file_system_type ext2_fs_t
 	.name		= "ext2",
 	.get_sb		= ext2_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 static int __init init_ext2_fs(void)
diff -puN fs/ext3/super.c~sb_op-for-checkpointable-files fs/ext3/super.c
--- linux-2.6.git/fs/ext3/super.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/ext3/super.c	2009-02-19 10:17:23.000000000 -0800
@@ -2969,7 +2969,7 @@ static struct file_system_type ext3_fs_t
 	.name		= "ext3",
 	.get_sb		= ext3_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 static int __init init_ext3_fs(void)
diff -puN fs/ext4/super.c~sb_op-for-checkpointable-files fs/ext4/super.c
--- linux-2.6.git/fs/ext4/super.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/ext4/super.c	2009-02-19 10:17:23.000000000 -0800
@@ -3543,7 +3543,7 @@ static struct file_system_type ext4_fs_t
 	.name		= "ext4",
 	.get_sb		= ext4_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 #ifdef CONFIG_EXT4DEV_COMPAT
@@ -3562,7 +3562,7 @@ static struct file_system_type ext4dev_f
 	.name		= "ext4dev",
 	.get_sb		= ext4dev_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 MODULE_ALIAS("ext4dev");
 #endif
diff -puN fs/fat/namei_msdos.c~sb_op-for-checkpointable-files fs/fat/namei_msdos.c
--- linux-2.6.git/fs/fat/namei_msdos.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/fat/namei_msdos.c	2009-02-19 10:17:23.000000000 -0800
@@ -685,7 +685,7 @@ static struct file_system_type msdos_fs_
 	.name		= "msdos",
 	.get_sb		= msdos_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 static int __init init_msdos_fs(void)
diff -puN fs/fat/namei_vfat.c~sb_op-for-checkpointable-files fs/fat/namei_vfat.c
--- linux-2.6.git/fs/fat/namei_vfat.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/fat/namei_vfat.c	2009-02-19 10:17:23.000000000 -0800
@@ -1077,7 +1077,7 @@ static struct file_system_type vfat_fs_t
 	.name		= "vfat",
 	.get_sb		= vfat_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 static int __init init_vfat_fs(void)
diff -puN fs/jfs/super.c~sb_op-for-checkpointable-files fs/jfs/super.c
--- linux-2.6.git/fs/jfs/super.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/jfs/super.c	2009-02-19 10:17:23.000000000 -0800
@@ -757,7 +757,7 @@ static struct file_system_type jfs_fs_ty
 	.name		= "jfs",
 	.get_sb		= jfs_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 static void init_once(void *foo)
diff -puN fs/minix/inode.c~sb_op-for-checkpointable-files fs/minix/inode.c
--- linux-2.6.git/fs/minix/inode.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/minix/inode.c	2009-02-19 10:17:23.000000000 -0800
@@ -623,7 +623,7 @@ static struct file_system_type minix_fs_
 	.name		= "minix",
 	.get_sb		= minix_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 static int __init init_minix_fs(void)
diff -puN fs/reiserfs/super.c~sb_op-for-checkpointable-files fs/reiserfs/super.c
--- linux-2.6.git/fs/reiserfs/super.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/reiserfs/super.c	2009-02-19 10:17:23.000000000 -0800
@@ -2284,7 +2284,7 @@ struct file_system_type reiserfs_fs_type
 	.name = "reiserfs",
 	.get_sb = get_super_block,
 	.kill_sb = reiserfs_kill_sb,
-	.fs_flags = FS_REQUIRES_DEV,
+	.fs_flags = FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 MODULE_DESCRIPTION("ReiserFS journaled filesystem");
diff -puN fs/xfs/linux-2.6/xfs_super.c~sb_op-for-checkpointable-files fs/xfs/linux-2.6/xfs_super.c
--- linux-2.6.git/fs/xfs/linux-2.6/xfs_super.c~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/fs/xfs/linux-2.6/xfs_super.c	2009-02-19 10:17:23.000000000 -0800
@@ -1866,7 +1866,7 @@ static struct file_system_type xfs_fs_ty
 	.name			= "xfs",
 	.get_sb			= xfs_fs_get_sb,
 	.kill_sb		= kill_block_super,
-	.fs_flags		= FS_REQUIRES_DEV,
+	.fs_flags		= FS_REQUIRES_DEV | FS_CHECKPOINTABLE,
 };
 
 STATIC int __init
diff -puN include/linux/fs.h~sb_op-for-checkpointable-files include/linux/fs.h
--- linux-2.6.git/include/linux/fs.h~sb_op-for-checkpointable-files	2009-02-19 10:17:23.000000000 -0800
+++ linux-2.6.git-dave/include/linux/fs.h	2009-02-19 10:17:23.000000000 -0800
@@ -104,6 +104,7 @@ extern int dir_notify_enable;
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
 #define FS_HAS_SUBTYPE 4
+#define FS_CHECKPOINTABLE 8
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
 					 * during rename() internally.
_

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC][PATCH 2/5] file c/r: expose functions to query fs support
  2009-02-19 18:20 [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
@ 2009-02-19 18:20 ` Dave Hansen
  2009-02-19 18:20 ` [RFC][PATCH 3/5] check files for checkpointability Dave Hansen
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Dave Hansen @ 2009-02-19 18:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: containers, linux-kernel@vger.kernel.org, Serge E. Hallyn,
	Oren Laadan, Alexey Dobriyan, Dave Hansen


This pair of functions will check to see whether a given
'struct file' can be checkpointed.  If it can't be, the
"explain" function can also give a description why.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---

 linux-2.6.git-dave/checkpoint/ckpt_file.c     |   30 ++++++++++++++++++++++++++
 linux-2.6.git-dave/include/linux/checkpoint.h |   18 +++++++++++++++
 2 files changed, 48 insertions(+)

diff -puN checkpoint/ckpt_file.c~cr-explain-unckpt-file checkpoint/ckpt_file.c
--- linux-2.6.git/checkpoint/ckpt_file.c~cr-explain-unckpt-file	2009-02-19 10:17:24.000000000 -0800
+++ linux-2.6.git-dave/checkpoint/ckpt_file.c	2009-02-19 10:17:24.000000000 -0800
@@ -72,6 +72,36 @@ int cr_scan_fds(struct files_struct *fil
 	return n;
 }
 
+int cr_explain_file(struct file *file, char *explain, int left)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct file_system_type *fs_type = inode->i_sb->s_type;
+
+	if (!(fs_type->fs_flags & FS_CHECKPOINTABLE))
+		return snprintf(explain, left,
+				" (%s does not support checkpoint)",
+				fs_type->name);
+
+	if (special_file(inode->i_mode))
+		return snprintf(explain, left, " (special file)");
+
+	return 0;
+}
+
+int cr_file_supported(struct file *file)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct file_system_type *fs_type = inode->i_sb->s_type;
+
+	if (!(fs_type->fs_flags & FS_CHECKPOINTABLE))
+		return 0;
+
+	if (special_file(inode->i_mode))
+		return 0;
+
+	return 1;
+}
+
 /* cr_write_fd_data - dump the state of a given file pointer */
 static int cr_write_fd_data(struct cr_ctx *ctx, struct file *file, int parent)
 {
diff -puN include/linux/checkpoint.h~cr-explain-unckpt-file include/linux/checkpoint.h
--- linux-2.6.git/include/linux/checkpoint.h~cr-explain-unckpt-file	2009-02-19 10:17:24.000000000 -0800
+++ linux-2.6.git-dave/include/linux/checkpoint.h	2009-02-19 10:17:24.000000000 -0800
@@ -13,6 +13,8 @@
 #include <linux/path.h>
 #include <linux/fs.h>
 
+#ifdef CONFIG_CHECKPOINT_RESTART
+
 #define CR_VERSION  2
 
 struct cr_ctx {
@@ -99,4 +101,20 @@ extern int cr_read_files(struct cr_ctx *
 
 #define pr_fmt(fmt) "[%d:c/r:%s] " fmt, task_pid_vnr(current), __func__
 
+int cr_explain_file(struct file *file, char *explain, int left);
+int cr_file_supported(struct file *file);
+
+#else
+
+static inline int cr_explain_file(struct file *file, char *explain, int left)
+{
+	return 0;
+}
+
+int cr_file_supported(struct file *file)
+{
+	return 0;
+}
+
+#endif
 #endif /* _CHECKPOINT_CKPT_H_ */
_

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC][PATCH 3/5] check files for checkpointability
  2009-02-19 18:20 [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
  2009-02-19 18:20 ` [RFC][PATCH 2/5] file c/r: expose functions to query fs support Dave Hansen
@ 2009-02-19 18:20 ` Dave Hansen
  2009-02-23 23:49   ` Serge E. Hallyn
  2009-02-19 18:20 ` [RFC][PATCH 4/5] breakout fdinfo sprintf() into its own function Dave Hansen
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Dave Hansen @ 2009-02-19 18:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: containers, linux-kernel@vger.kernel.org, Serge E. Hallyn,
	Oren Laadan, Alexey Dobriyan, Dave Hansen


Introduce a files_struct counter to indicate whether a particular
file_struct has ever contained a file which can not be
checkpointed.  This flag is a one-way trip; once it is set, it may
not be unset.

We assume at allocation that a new files_struct is clean and may
be checkpointed.  However, as soon as it has had its files filled
from its parent's, we check it for real in __scan_files_for_cr().
At that point, we mark it if it contained any uncheckpointable
files.

We also check each 'struct file' when it is installed in a fd
slot.  This way, if anyone open()s or managed to dup() an
unsuppored file, we can catch it.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---

 linux-2.6.git-dave/fs/file.c                  |   19 +++++++++++++++++++
 linux-2.6.git-dave/fs/open.c                  |    5 +++++
 linux-2.6.git-dave/include/linux/checkpoint.h |   17 ++++++++++++++++-
 linux-2.6.git-dave/include/linux/fdtable.h    |    3 +++
 4 files changed, 43 insertions(+), 1 deletion(-)

diff -puN fs/file.c~deny-ckpt fs/file.c
--- linux-2.6.git/fs/file.c~deny-ckpt	2009-02-19 10:17:25.000000000 -0800
+++ linux-2.6.git-dave/fs/file.c	2009-02-19 10:17:25.000000000 -0800
@@ -15,6 +15,7 @@
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/bitops.h>
+#include <linux/checkpoint.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
@@ -285,6 +286,20 @@ static int count_open_files(struct fdtab
 	return i;
 }
 
+static void __scan_files_for_cr(struct files_struct *files)
+{
+	int i;
+
+	for (i = 0; i < files->fdtab.max_fds; i++) {
+		struct file *f = fcheck_files(files, i);
+		if (!f)
+			continue;
+		if (cr_file_supported(f))
+			continue;
+		files_deny_checkpointing(files);
+	}
+}
+
 /*
  * Allocate a new files structure and copy contents from the
  * passed in files structure.
@@ -303,6 +318,9 @@ struct files_struct *dup_fd(struct files
 		goto out;
 
 	atomic_set(&newf->count, 1);
+#ifdef CONFIG_CHECKPOINT_RESTART
+	atomic_set(&newf->may_checkpoint, 1);
+#endif
 
 	spin_lock_init(&newf->file_lock);
 	newf->next_fd = 0;
@@ -396,6 +414,7 @@ struct files_struct *dup_fd(struct files
 
 	rcu_assign_pointer(newf->fdt, new_fdt);
 
+	__scan_files_for_cr(newf);
 	return newf;
 
 out_release:
diff -puN fs/open.c~deny-ckpt fs/open.c
--- linux-2.6.git/fs/open.c~deny-ckpt	2009-02-19 10:17:25.000000000 -0800
+++ linux-2.6.git-dave/fs/open.c	2009-02-19 10:17:25.000000000 -0800
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/audit.h>
 #include <linux/falloc.h>
+#include <linux/checkpoint.h>
 
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
@@ -998,6 +999,10 @@ void fd_install(unsigned int fd, struct 
 {
 	struct files_struct *files = current->files;
 	struct fdtable *fdt;
+
+	if (!cr_file_supported(file))
+		files_deny_checkpointing(files);
+
 	spin_lock(&files->file_lock);
 	fdt = files_fdtable(files);
 	BUG_ON(fdt->fd[fd] != NULL);
diff -puN include/linux/checkpoint.h~deny-ckpt include/linux/checkpoint.h
--- linux-2.6.git/include/linux/checkpoint.h~deny-ckpt	2009-02-19 10:17:25.000000000 -0800
+++ linux-2.6.git-dave/include/linux/checkpoint.h	2009-02-19 10:17:25.000000000 -0800
@@ -12,6 +12,7 @@
 
 #include <linux/path.h>
 #include <linux/fs.h>
+#include <linux/fdtable.h>
 
 #ifdef CONFIG_CHECKPOINT_RESTART
 
@@ -101,17 +102,31 @@ extern int cr_read_files(struct cr_ctx *
 
 #define pr_fmt(fmt) "[%d:c/r:%s] " fmt, task_pid_vnr(current), __func__
 
+static inline void __files_deny_checkpointing(struct files_struct *files,
+		char *file, int line)
+{
+	if (!atomic_dec_and_test(&files->may_checkpoint))
+		return;
+	printk(KERN_INFO "process performed an action that can not be "
+			"checkpointed at: %s:%d\n", file, line);
+	WARN_ON(1);
+}
+#define files_deny_checkpointing(f)  \
+	__files_deny_checkpointing(f, __FILE__, __LINE__)
+
 int cr_explain_file(struct file *file, char *explain, int left);
 int cr_file_supported(struct file *file);
 
 #else
 
+static inline void files_deny_checkpointing(struct files_struct *files) {}
+
 static inline int cr_explain_file(struct file *file, char *explain, int left)
 {
 	return 0;
 }
 
-int cr_file_supported(struct file *file)
+static int cr_file_supported(struct file *file)
 {
 	return 0;
 }
diff -puN include/linux/fdtable.h~deny-ckpt include/linux/fdtable.h
--- linux-2.6.git/include/linux/fdtable.h~deny-ckpt	2009-02-19 10:17:25.000000000 -0800
+++ linux-2.6.git-dave/include/linux/fdtable.h	2009-02-19 10:17:25.000000000 -0800
@@ -45,6 +45,9 @@ struct files_struct {
 	atomic_t count;
 	struct fdtable *fdt;
 	struct fdtable fdtab;
+#ifdef CONFIG_CHECKPOINT_RESTART
+	atomic_t may_checkpoint;
+#endif
   /*
    * written part on a separate cache line in SMP
    */
_

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC][PATCH 4/5] breakout fdinfo sprintf() into its own function
  2009-02-19 18:20 [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
  2009-02-19 18:20 ` [RFC][PATCH 2/5] file c/r: expose functions to query fs support Dave Hansen
  2009-02-19 18:20 ` [RFC][PATCH 3/5] check files for checkpointability Dave Hansen
@ 2009-02-19 18:20 ` Dave Hansen
  2009-02-19 18:20 ` [RFC][PATCH 5/5] add c/r info to fdinfo Dave Hansen
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Dave Hansen @ 2009-02-19 18:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: containers, linux-kernel@vger.kernel.org, Serge E. Hallyn,
	Oren Laadan, Alexey Dobriyan, Dave Hansen


I'll be adding to this in a moment and it is in a bad place
to do that cleanly now.

Also, increase the buffer size.  Most /proc files can
output up to a page, so use the same here.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---

 linux-2.6.git-dave/fs/proc/base.c |   23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff -puN fs/proc/base.c~breakout-fdinfo fs/proc/base.c
--- linux-2.6.git/fs/proc/base.c~breakout-fdinfo	2009-02-19 10:17:25.000000000 -0800
+++ linux-2.6.git-dave/fs/proc/base.c	2009-02-19 10:17:25.000000000 -0800
@@ -1597,7 +1597,18 @@ out:
 	return ~0U;
 }
 
-#define PROC_FDINFO_MAX 64
+#define PROC_FDINFO_MAX PAGE_SIZE
+
+static void proc_fd_write_info(struct file *file, char *info)
+{
+	int max = PROC_FDINFO_MAX;
+	int p = 0;
+	if (!info)
+		return;
+
+	p += snprintf(info+p, max-p, "pos:\t%lli\n", (long long) file->f_pos);
+	p += snprintf(info+p, max-p, "flags:\t0%o\n", file->f_flags);
+}
 
 static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 {
@@ -1622,12 +1633,7 @@ static int proc_fd_info(struct inode *in
 				*path = file->f_path;
 				path_get(&file->f_path);
 			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 file->f_flags);
+			proc_fd_write_info(file, info);
 			spin_unlock(&files->file_lock);
 			put_files_struct(files);
 			return 0;
@@ -1831,10 +1837,11 @@ static int proc_readfd(struct file *filp
 static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
 				      size_t len, loff_t *ppos)
 {
-	char tmp[PROC_FDINFO_MAX];
+	char *tmp = kmalloc(PROC_FDINFO_MAX, GFP_KERNEL);
 	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
 	if (!err)
 		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
+	kfree(tmp);
 	return err;
 }
 
_

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC][PATCH 5/5] add c/r info to fdinfo
  2009-02-19 18:20 [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
                   ` (2 preceding siblings ...)
  2009-02-19 18:20 ` [RFC][PATCH 4/5] breakout fdinfo sprintf() into its own function Dave Hansen
@ 2009-02-19 18:20 ` Dave Hansen
  2009-02-19 18:23 ` [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
  2009-02-19 19:00 ` Christoph Hellwig
  5 siblings, 0 replies; 14+ messages in thread
From: Dave Hansen @ 2009-02-19 18:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: containers, linux-kernel@vger.kernel.org, Serge E. Hallyn,
	Oren Laadan, Alexey Dobriyan, Dave Hansen


Use the new checkpoint/restart file functions to query
and report on each fd in the /proc/$$/fdinfo/X file.

This should provide an easy way to examine processes
at runtime to see what exactly is causing their inability
to checkpoint.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---

 linux-2.6.git-dave/fs/proc/base.c |    8 ++++++++
 1 file changed, 8 insertions(+)

diff -puN fs/proc/base.c~add-cr-part-to-fdinfo fs/proc/base.c
--- linux-2.6.git/fs/proc/base.c~add-cr-part-to-fdinfo	2009-02-19 10:17:26.000000000 -0800
+++ linux-2.6.git-dave/fs/proc/base.c	2009-02-19 10:17:26.000000000 -0800
@@ -79,6 +79,7 @@
 #include <linux/oom.h>
 #include <linux/elf.h>
 #include <linux/pid_namespace.h>
+#include <linux/checkpoint.h>
 #include "internal.h"
 
 /* NOTE:
@@ -1601,6 +1602,7 @@ out:
 
 static void proc_fd_write_info(struct file *file, char *info)
 {
+	int checkpointable;
 	int max = PROC_FDINFO_MAX;
 	int p = 0;
 	if (!info)
@@ -1608,6 +1610,12 @@ static void proc_fd_write_info(struct fi
 
 	p += snprintf(info+p, max-p, "pos:\t%lli\n", (long long) file->f_pos);
 	p += snprintf(info+p, max-p, "flags:\t0%o\n", file->f_flags);
+
+	checkpointable = cr_file_supported(file);
+	p += snprintf(info+p, max-p, "checkpointable:\t%d", checkpointable);
+	if (!checkpointable)
+		p += cr_explain_file(file, info+p, max-p);
+	p += snprintf(info+p, max-p, "\n");
 }
 
 static int proc_fd_info(struct inode *inode, struct path *path, char *info)
_

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's
  2009-02-19 18:20 [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
                   ` (3 preceding siblings ...)
  2009-02-19 18:20 ` [RFC][PATCH 5/5] add c/r info to fdinfo Dave Hansen
@ 2009-02-19 18:23 ` Dave Hansen
  2009-02-19 19:00 ` Christoph Hellwig
  5 siblings, 0 replies; 14+ messages in thread
From: Dave Hansen @ 2009-02-19 18:23 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: containers, linux-kernel@vger.kernel.org, Serge E. Hallyn,
	Oren Laadan, Alexey Dobriyan

BTW, after you apply this and turn on the config option, you do get a
ton of warnings at runtime.  

qemu:~# cat /proc/*/fdinfo/* | grep check  | sort | uniq -c | sort -n 
      1 checkpointable:	0(proc does not support checkpoint)
      6 checkpointable:	0(pipefs does not support checkpoint)
      8 checkpointable:	0(devpts does not support checkpoint)
      9 checkpointable:	0(sockfs does not support checkpoint)
     17 checkpointable:	0(special file)
     17 checkpointable:	1

The special files are /dev/{null,console,xconsole}.  The next step
should probably be to address simple special files like /dev/null.  I
think I'll do that with a new f_op.  Any thoughts?

-- Dave


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's
  2009-02-19 18:20 [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
                   ` (4 preceding siblings ...)
  2009-02-19 18:23 ` [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
@ 2009-02-19 19:00 ` Christoph Hellwig
  2009-02-19 19:24   ` Dave Hansen
  5 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2009-02-19 19:00 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Ingo Molnar, containers, linux-kernel@vger.kernel.org,
	Serge E. Hallyn, Oren Laadan, Alexey Dobriyan

On Thu, Feb 19, 2009 at 10:20:07AM -0800, Dave Hansen wrote:
> 
> There are plenty of filesystems that are not supported for
> c/r at this point.  Think of things like hugetlbfs which
> are externally visible or pipefs which are kernel-internal.
> 
> This provides a quick way to make the "normal" filesystems
> which are currently supported.  This is also safe if any
> new code gets added.  We assume that a fs is non-supported
> unless someone takes explicit action to the contrary.
> 
> I bet there are some more filesystems that are OK, but
> these probably cover 99% of the users for now.

Given that a normal fs should be checkpointable you should
make those exposing internal state, not the other way around.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's
  2009-02-19 19:00 ` Christoph Hellwig
@ 2009-02-19 19:24   ` Dave Hansen
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Hansen @ 2009-02-19 19:24 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: containers, linux-kernel@vger.kernel.org, Ingo Molnar,
	Alexey Dobriyan

On Thu, 2009-02-19 at 14:00 -0500, Christoph Hellwig wrote:
> On Thu, Feb 19, 2009 at 10:20:07AM -0800, Dave Hansen wrote:
> > 
> > There are plenty of filesystems that are not supported for
> > c/r at this point.  Think of things like hugetlbfs which
> > are externally visible or pipefs which are kernel-internal.
> > 
> > This provides a quick way to make the "normal" filesystems
> > which are currently supported.  This is also safe if any
> > new code gets added.  We assume that a fs is non-supported
> > unless someone takes explicit action to the contrary.
> > 
> > I bet there are some more filesystems that are OK, but
> > these probably cover 99% of the users for now.
> 
> Given that a normal fs should be checkpointable you should
> make those exposing internal state, not the other way around.

In general I agree with you.  But, I think practicality gets in the way
here.  Here's the cscope output from file_system_type and
FS_REQUIRES_DEV (basically grepping the tree for them):

$ wc -l file_system_type FS_REQUIRES_DEV 
  256 file_system_type
   41 FS_REQUIRES_DEV

So, (very) roughly 1/6 of the filesystems are the "normal" block-based
ones that we all know and love.  The rest are ones that I'd have to at
the very least think about before saying that they're supported.

I guess we could say that FS_REQUIRES_DEV by default implies
FS_CHECKPOINTABLE:

#define __FS_REQUIRES_DEV 1
#define FS_REQUIRES_DEV (__FS_REQUIRES_DEV|FS_CHECKPOINTABLE)

I really don't mind doing it *that* much either way, but I'd sure like
to go specifically tag ~40 filesystems rather than 200.  

-- Dave


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 3/5] check files for checkpointability
  2009-02-19 18:20 ` [RFC][PATCH 3/5] check files for checkpointability Dave Hansen
@ 2009-02-23 23:49   ` Serge E. Hallyn
  2009-02-24  0:30     ` Dave Hansen
  0 siblings, 1 reply; 14+ messages in thread
From: Serge E. Hallyn @ 2009-02-23 23:49 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Ingo Molnar, containers, linux-kernel@vger.kernel.org,
	Oren Laadan, Alexey Dobriyan

Quoting Dave Hansen (dave@linux.vnet.ibm.com):
> 
> Introduce a files_struct counter to indicate whether a particular
> file_struct has ever contained a file which can not be
> checkpointed.  This flag is a one-way trip; once it is set, it may
> not be unset.
> 
> We assume at allocation that a new files_struct is clean and may
> be checkpointed.  However, as soon as it has had its files filled
> from its parent's, we check it for real in __scan_files_for_cr().
> At that point, we mark it if it contained any uncheckpointable
> files.
> 
> We also check each 'struct file' when it is installed in a fd
> slot.  This way, if anyone open()s or managed to dup() an
> unsuppored file, we can catch it.

So what is the point of tagging the files_struct counter and
making it a one-way trip?  Why not just check every file at
checkpoint time?

-serge

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 3/5] check files for checkpointability
  2009-02-23 23:49   ` Serge E. Hallyn
@ 2009-02-24  0:30     ` Dave Hansen
  2009-02-24  1:10       ` Serge E. Hallyn
  0 siblings, 1 reply; 14+ messages in thread
From: Dave Hansen @ 2009-02-24  0:30 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Alexey Dobriyan, Ingo Molnar, containers,
	linux-kernel@vger.kernel.org

On Mon, 2009-02-23 at 17:49 -0600, Serge E. Hallyn wrote:
> Quoting Dave Hansen (dave@linux.vnet.ibm.com): 
> > Introduce a files_struct counter to indicate whether a particular
> > file_struct has ever contained a file which can not be
> > checkpointed.  This flag is a one-way trip; once it is set, it may
> > not be unset.
> > 
> > We assume at allocation that a new files_struct is clean and may
> > be checkpointed.  However, as soon as it has had its files filled
> > from its parent's, we check it for real in __scan_files_for_cr().
> > At that point, we mark it if it contained any uncheckpointable
> > files.
> > 
> > We also check each 'struct file' when it is installed in a fd
> > slot.  This way, if anyone open()s or managed to dup() an
> > unsuppored file, we can catch it.
> 
> So what is the point of tagging the files_struct counter and
> making it a one-way trip?  Why not just check every file at
> checkpoint time?

We need both.

This allows us to tell where and when we went wrong.  Take a process
that's been running for a month.  After 5 days it did something random
to keep it from being checkpointed.  You're going to have forgotten all
about it 25 days later.  This gives us an opportunity to spit into dmesg
or just plain log it.  It also gives the app some ability to reflect and
see what its uncheckpointable attributes are.  

-- Dave


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 3/5] check files for checkpointability
  2009-02-24  0:30     ` Dave Hansen
@ 2009-02-24  1:10       ` Serge E. Hallyn
  2009-02-24  1:20         ` Dave Hansen
  0 siblings, 1 reply; 14+ messages in thread
From: Serge E. Hallyn @ 2009-02-24  1:10 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Alexey Dobriyan, Ingo Molnar, containers,
	linux-kernel@vger.kernel.org

Quoting Dave Hansen (dave@linux.vnet.ibm.com):
> On Mon, 2009-02-23 at 17:49 -0600, Serge E. Hallyn wrote:
> > Quoting Dave Hansen (dave@linux.vnet.ibm.com): 
> > > Introduce a files_struct counter to indicate whether a particular
> > > file_struct has ever contained a file which can not be
> > > checkpointed.  This flag is a one-way trip; once it is set, it may
> > > not be unset.
> > > 
> > > We assume at allocation that a new files_struct is clean and may
> > > be checkpointed.  However, as soon as it has had its files filled
> > > from its parent's, we check it for real in __scan_files_for_cr().
> > > At that point, we mark it if it contained any uncheckpointable
> > > files.
> > > 
> > > We also check each 'struct file' when it is installed in a fd
> > > slot.  This way, if anyone open()s or managed to dup() an
> > > unsuppored file, we can catch it.
> > 
> > So what is the point of tagging the files_struct counter and
> > making it a one-way trip?  Why not just check every file at
> > checkpoint time?
> 
> We need both.
> 
> This allows us to tell where and when we went wrong.  Take a process
> that's been running for a month.  After 5 days it did something random
> to keep it from being checkpointed.  You're going to have forgotten all
> about it 25 days later.  This gives us an opportunity to spit into dmesg
> or just plain log it.  It also gives the app some ability to reflect and
> see what its uncheckpointable attributes are.  

Hmm.  In that case, rather than refuse checkpoint, I prefer that we make
this a footnote in the /proc/$$/checkpointable output.

-serge

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 3/5] check files for checkpointability
  2009-02-24  1:10       ` Serge E. Hallyn
@ 2009-02-24  1:20         ` Dave Hansen
  2009-02-24 19:43           ` Serge E. Hallyn
  0 siblings, 1 reply; 14+ messages in thread
From: Dave Hansen @ 2009-02-24  1:20 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: containers, Ingo Molnar, Alexey Dobriyan,
	linux-kernel@vger.kernel.org

On Mon, 2009-02-23 at 19:10 -0600, Serge E. Hallyn wrote:
> > This allows us to tell where and when we went wrong.  Take a process
> > that's been running for a month.  After 5 days it did something random
> > to keep it from being checkpointed.  You're going to have forgotten all
> > about it 25 days later.  This gives us an opportunity to spit into dmesg
> > or just plain log it.  It also gives the app some ability to reflect and
> > see what its uncheckpointable attributes are.  
> 
> Hmm.  In that case, rather than refuse checkpoint, I prefer that we make
> this a footnote in the /proc/$$/checkpointable output.

Yeah, that's cool.  If we were smart, we'd also get hooked into some of
the ftrace output so that we have a real chance of logging these things
and being able to go look at something about them down the line.

-- Dave


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 3/5] check files for checkpointability
  2009-02-24  1:20         ` Dave Hansen
@ 2009-02-24 19:43           ` Serge E. Hallyn
  2009-02-24 19:47             ` Dave Hansen
  0 siblings, 1 reply; 14+ messages in thread
From: Serge E. Hallyn @ 2009-02-24 19:43 UTC (permalink / raw)
  To: Dave Hansen
  Cc: containers, Ingo Molnar, Alexey Dobriyan,
	linux-kernel@vger.kernel.org

Quoting Dave Hansen (dave@linux.vnet.ibm.com):
> On Mon, 2009-02-23 at 19:10 -0600, Serge E. Hallyn wrote:
> > > This allows us to tell where and when we went wrong.  Take a process
> > > that's been running for a month.  After 5 days it did something random
> > > to keep it from being checkpointed.  You're going to have forgotten all
> > > about it 25 days later.  This gives us an opportunity to spit into dmesg
> > > or just plain log it.  It also gives the app some ability to reflect and
> > > see what its uncheckpointable attributes are.  
> > 
> > Hmm.  In that case, rather than refuse checkpoint, I prefer that we make
> > this a footnote in the /proc/$$/checkpointable output.
> 
> Yeah, that's cool.  If we were smart, we'd also get hooked into some of
> the ftrace output so that we have a real chance of logging these things
> and being able to go look at something about them down the line.

How exactly woudl that work?  Would that work as a *replacement* for
filling up the logs as I was recommending?  If so I'm all for it...

-serge

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 3/5] check files for checkpointability
  2009-02-24 19:43           ` Serge E. Hallyn
@ 2009-02-24 19:47             ` Dave Hansen
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Hansen @ 2009-02-24 19:47 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: containers, Ingo Molnar, Alexey Dobriyan,
	linux-kernel@vger.kernel.org

On Tue, 2009-02-24 at 13:43 -0600, Serge E. Hallyn wrote:
> Quoting Dave Hansen (dave@linux.vnet.ibm.com):
> > On Mon, 2009-02-23 at 19:10 -0600, Serge E. Hallyn wrote:
> > > > This allows us to tell where and when we went wrong.  Take a process
> > > > that's been running for a month.  After 5 days it did something random
> > > > to keep it from being checkpointed.  You're going to have forgotten all
> > > > about it 25 days later.  This gives us an opportunity to spit into dmesg
> > > > or just plain log it.  It also gives the app some ability to reflect and
> > > > see what its uncheckpointable attributes are.  
> > > 
> > > Hmm.  In that case, rather than refuse checkpoint, I prefer that we make
> > > this a footnote in the /proc/$$/checkpointable output.
> > 
> > Yeah, that's cool.  If we were smart, we'd also get hooked into some of
> > the ftrace output so that we have a real chance of logging these things
> > and being able to go look at something about them down the line.
> 
> How exactly woudl that work?  Would that work as a *replacement* for
> filling up the logs as I was recommending?  If so I'm all for it...

I think it would be a replacement for dmesg, at least.  

-- Dave


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2009-02-24 19:47 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-19 18:20 [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
2009-02-19 18:20 ` [RFC][PATCH 2/5] file c/r: expose functions to query fs support Dave Hansen
2009-02-19 18:20 ` [RFC][PATCH 3/5] check files for checkpointability Dave Hansen
2009-02-23 23:49   ` Serge E. Hallyn
2009-02-24  0:30     ` Dave Hansen
2009-02-24  1:10       ` Serge E. Hallyn
2009-02-24  1:20         ` Dave Hansen
2009-02-24 19:43           ` Serge E. Hallyn
2009-02-24 19:47             ` Dave Hansen
2009-02-19 18:20 ` [RFC][PATCH 4/5] breakout fdinfo sprintf() into its own function Dave Hansen
2009-02-19 18:20 ` [RFC][PATCH 5/5] add c/r info to fdinfo Dave Hansen
2009-02-19 18:23 ` [RFC][PATCH 1/5] create fs flag to mark c/r supported fs's Dave Hansen
2009-02-19 19:00 ` Christoph Hellwig
2009-02-19 19:24   ` Dave Hansen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).