From: Dave Hansen <dave-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
To: Linus Torvalds <torvalds-3NddpPZAyC0@public.gmane.org>
Cc: containers
<containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>,
"linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org"
<linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
Dave Hansen
<dave-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>,
linux-mm <linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org>,
Thomas Gleixner <tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org>,
Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
Subject: [PATCH 9/9] Restore open file descriprtors
Date: Thu, 16 Oct 2008 11:14:27 -0700 [thread overview]
Message-ID: <20081016181427.1E5C5472@kernel> (raw)
In-Reply-To: <20081016181414.934C4FCC@kernel>
From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
Restore open file descriptors: for each FD read 'struct cr_hdr_fd_ent'
and lookup objref in the hash table; if not found (first occurence), read
in 'struct cr_hdr_fd_data', create a new FD and register in the hash.
Otherwise attach the file pointer from the hash as an FD.
This patch only handles basic FDs - regular files, directories and also
symbolic links.
Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Dave Hansen <dave-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
linux-2.6.git-dave/checkpoint/Makefile | 2
linux-2.6.git-dave/checkpoint/restart.c | 4
linux-2.6.git-dave/checkpoint/rstr_file.c | 246 ++++++++++++++++++++++++++
linux-2.6.git-dave/include/linux/checkpoint.h | 1
4 files changed, 252 insertions(+), 1 deletion(-)
diff -puN checkpoint/Makefile~v6_PATCH_9_9_Restore_open_file_descriprtors checkpoint/Makefile
--- linux-2.6.git/checkpoint/Makefile~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/Makefile 2008-10-16 10:53:39.000000000 -0700
@@ -3,4 +3,4 @@
#
obj-$(CONFIG_CHECKPOINT_RESTART) += sys.o checkpoint.o restart.o objhash.o \
- ckpt_mem.o rstr_mem.o ckpt_file.o
+ ckpt_mem.o rstr_mem.o ckpt_file.o rstr_file.o
diff -puN checkpoint/restart.c~v6_PATCH_9_9_Restore_open_file_descriprtors checkpoint/restart.c
--- linux-2.6.git/checkpoint/restart.c~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/restart.c 2008-10-16 10:53:39.000000000 -0700
@@ -219,6 +219,10 @@ static int cr_read_task(struct cr_ctx *c
cr_debug("memory: ret %d\n", ret);
if (ret < 0)
goto out;
+ ret = cr_read_files(ctx);
+ cr_debug("files: ret %d\n", ret);
+ if (ret < 0)
+ goto out;
ret = cr_read_thread(ctx);
cr_debug("thread: ret %d\n", ret);
if (ret < 0)
diff -puN /dev/null checkpoint/rstr_file.c
--- /dev/null 2008-09-02 09:40:19.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/rstr_file.c 2008-10-16 10:53:39.000000000 -0700
@@ -0,0 +1,246 @@
+/*
+ * Checkpoint file descriptors
+ *
+ * Copyright (C) 2008 Oren Laadan
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fsnotify.h>
+#include <linux/syscalls.h>
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+
+#include "checkpoint_file.h"
+
+static int cr_close_all_fds(struct files_struct *files)
+{
+ int *fdtable;
+ int nfds;
+
+ nfds = cr_scan_fds(files, &fdtable);
+ if (nfds < 0)
+ return nfds;
+ while (nfds--)
+ sys_close(fdtable[nfds]);
+ kfree(fdtable);
+ return 0;
+}
+
+/**
+ * cr_attach_file - attach a lonely file ptr to a file descriptor
+ * @file: lonely file pointer
+ */
+static int cr_attach_file(struct file *file)
+{
+ int fd = get_unused_fd_flags(0);
+
+ if (fd >= 0) {
+ fsnotify_open(file->f_path.dentry);
+ fd_install(fd, file);
+ }
+ return fd;
+}
+
+/**
+ * cr_attach_get_file - attach (and get) lonely file ptr to a file descriptor
+ * @file: lonely file pointer
+ */
+static int cr_attach_get_file(struct file *file)
+{
+ int fd = get_unused_fd_flags(0);
+
+ if (fd >= 0) {
+ fsnotify_open(file->f_path.dentry);
+ fd_install(fd, file);
+ get_file(file);
+ }
+ return fd;
+}
+
+#define CR_SETFL_MASK (O_APPEND|O_NONBLOCK|O_NDELAY|FASYNC|O_DIRECT|O_NOATIME)
+
+/* cr_read_fd_data - restore the state of a given file pointer */
+static int
+cr_read_fd_data(struct cr_ctx *ctx, struct files_struct *files, int parent)
+{
+ struct cr_hdr_fd_data *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct file *file;
+ int rparent, ret;
+ int fd = 0; /* pacify gcc warning */
+
+ rparent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FD_DATA);
+ cr_debug("rparent %d parent %d flags %#x mode %#x how %d\n",
+ rparent, parent, hh->f_flags, hh->f_mode, hh->fd_type);
+ if (rparent < 0) {
+ ret = parent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+
+ if (rparent != parent)
+ goto out;
+
+ /* FIX: more sanity checks on f_flags, f_mode etc */
+
+ switch (hh->fd_type) {
+ case CR_FD_FILE:
+ case CR_FD_DIR:
+ case CR_FD_LINK:
+ file = cr_read_open_fname(ctx, hh->f_flags, hh->f_mode);
+ break;
+ default:
+ goto out;
+ }
+
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto out;
+ }
+
+ /* FIX: need to restore uid, gid, owner etc */
+
+ fd = cr_attach_file(file); /* no need to cleanup 'file' below */
+ if (fd < 0) {
+ filp_close(file, NULL);
+ ret = fd;
+ goto out;
+ }
+
+ /* register new <objref, file> tuple in hash table */
+ ret = cr_obj_add_ref(ctx, (void *) file, parent, CR_OBJ_FILE, 0);
+ if (ret < 0)
+ goto out;
+ ret = sys_fcntl(fd, F_SETFL, hh->f_flags & CR_SETFL_MASK);
+ if (ret < 0)
+ goto out;
+ ret = vfs_llseek(file, hh->f_pos, SEEK_SET);
+ if (ret == -ESPIPE) /* ignore error on non-seekable files */
+ ret = 0;
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret < 0 ? ret : fd;
+}
+
+/**
+ * cr_read_fd_ent - restore the state of a given file descriptor
+ * @ctx: checkpoint context
+ * @files: files_struct pointer
+ * @parent: parent objref
+ *
+ * Restores the state of a file descriptor; looks up the objref (in the
+ * header) in the hash table, and if found picks the matching file and
+ * use it; otherwise calls cr_read_fd_data to restore the file too.
+ */
+static int
+cr_read_fd_ent(struct cr_ctx *ctx, struct files_struct *files, int parent)
+{
+ struct cr_hdr_fd_ent *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct file *file;
+ int newfd, rparent, ret;
+
+ rparent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FD_ENT);
+ cr_debug("rparent %d parent %d ref %d fd %d c.o.e %d\n",
+ rparent, parent, hh->objref, hh->fd, hh->close_on_exec);
+ if (rparent < 0) {
+ ret = rparent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+
+ if (rparent != parent)
+ goto out;
+ if (hh->objref <= 0)
+ goto out;
+
+ file = cr_obj_get_by_ref(ctx, hh->objref, CR_OBJ_FILE);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto out;
+ }
+
+ if (file) {
+ /* reuse file descriptor found in the hash table */
+ newfd = cr_attach_get_file(file);
+ } else {
+ /* create new file pointer (and register in hash table) */
+ newfd = cr_read_fd_data(ctx, files, hh->objref);
+ }
+
+ if (newfd < 0) {
+ ret = newfd;
+ goto out;
+ }
+
+ cr_debug("newfd got %d wanted %d\n", newfd, hh->fd);
+
+ /* if newfd isn't desired fd then reposition it */
+ if (newfd != hh->fd) {
+ ret = sys_dup2(newfd, hh->fd);
+ if (ret < 0)
+ goto out;
+ sys_close(newfd);
+ }
+
+ if (hh->close_on_exec)
+ set_close_on_exec(hh->fd, 1);
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
+
+int cr_read_files(struct cr_ctx *ctx)
+{
+ struct cr_hdr_files *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct files_struct *files = current->files;
+ int i, parent, ret;
+
+ parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FILES);
+ if (parent < 0) {
+ ret = parent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+#if 0 /* activate when containers are used */
+ if (parent != task_pid_vnr(current))
+ goto out;
+#endif
+ cr_debug("objref %d nfds %d\n", hh->objref, hh->nfds);
+ if (hh->objref < 0 || hh->nfds < 0)
+ goto out;
+
+ if (hh->nfds > sysctl_nr_open) {
+ ret = -EMFILE;
+ goto out;
+ }
+
+ /* point of no return -- close all file descriptors */
+ ret = cr_close_all_fds(files);
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < hh->nfds; i++) {
+ ret = cr_read_fd_ent(ctx, files, hh->objref);
+ if (ret < 0)
+ break;
+ }
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
diff -puN include/linux/checkpoint.h~v6_PATCH_9_9_Restore_open_file_descriprtors include/linux/checkpoint.h
--- linux-2.6.git/include/linux/checkpoint.h~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/include/linux/checkpoint.h 2008-10-16 10:53:39.000000000 -0700
@@ -85,6 +85,7 @@ extern int cr_write_files(struct cr_ctx
extern int do_restart(struct cr_ctx *ctx);
extern int cr_read_mm(struct cr_ctx *ctx);
+extern int cr_read_files(struct cr_ctx *ctx);
/* there are from fs/read_write.c, not exported otherwise in a header */
extern loff_t file_pos_read(struct file *file);
_
WARNING: multiple messages have this Message-ID (diff)
From: Dave Hansen <dave@linux.vnet.ibm.com>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
linux-mm <linux-mm@kvack.org>,
containers <containers@lists.linux-foundation.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"Serge E. Hallyn" <serue@us.ibm.com>,
Oren Laadan <orenl@cs.columbia.edu>,
Dave Hansen <dave@linux.vnet.ibm.com>
Subject: [PATCH 9/9] Restore open file descriprtors
Date: Thu, 16 Oct 2008 11:14:27 -0700 [thread overview]
Message-ID: <20081016181427.1E5C5472@kernel> (raw)
In-Reply-To: <20081016181414.934C4FCC@kernel>
From: Oren Laadan <orenl@cs.columbia.edu>
Restore open file descriptors: for each FD read 'struct cr_hdr_fd_ent'
and lookup objref in the hash table; if not found (first occurence), read
in 'struct cr_hdr_fd_data', create a new FD and register in the hash.
Otherwise attach the file pointer from the hash as an FD.
This patch only handles basic FDs - regular files, directories and also
symbolic links.
Signed-off-by: Oren Laadan <orenl@cs.columbia.edu>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---
linux-2.6.git-dave/checkpoint/Makefile | 2
linux-2.6.git-dave/checkpoint/restart.c | 4
linux-2.6.git-dave/checkpoint/rstr_file.c | 246 ++++++++++++++++++++++++++
linux-2.6.git-dave/include/linux/checkpoint.h | 1
4 files changed, 252 insertions(+), 1 deletion(-)
diff -puN checkpoint/Makefile~v6_PATCH_9_9_Restore_open_file_descriprtors checkpoint/Makefile
--- linux-2.6.git/checkpoint/Makefile~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/Makefile 2008-10-16 10:53:39.000000000 -0700
@@ -3,4 +3,4 @@
#
obj-$(CONFIG_CHECKPOINT_RESTART) += sys.o checkpoint.o restart.o objhash.o \
- ckpt_mem.o rstr_mem.o ckpt_file.o
+ ckpt_mem.o rstr_mem.o ckpt_file.o rstr_file.o
diff -puN checkpoint/restart.c~v6_PATCH_9_9_Restore_open_file_descriprtors checkpoint/restart.c
--- linux-2.6.git/checkpoint/restart.c~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/restart.c 2008-10-16 10:53:39.000000000 -0700
@@ -219,6 +219,10 @@ static int cr_read_task(struct cr_ctx *c
cr_debug("memory: ret %d\n", ret);
if (ret < 0)
goto out;
+ ret = cr_read_files(ctx);
+ cr_debug("files: ret %d\n", ret);
+ if (ret < 0)
+ goto out;
ret = cr_read_thread(ctx);
cr_debug("thread: ret %d\n", ret);
if (ret < 0)
diff -puN /dev/null checkpoint/rstr_file.c
--- /dev/null 2008-09-02 09:40:19.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/rstr_file.c 2008-10-16 10:53:39.000000000 -0700
@@ -0,0 +1,246 @@
+/*
+ * Checkpoint file descriptors
+ *
+ * Copyright (C) 2008 Oren Laadan
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fsnotify.h>
+#include <linux/syscalls.h>
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+
+#include "checkpoint_file.h"
+
+static int cr_close_all_fds(struct files_struct *files)
+{
+ int *fdtable;
+ int nfds;
+
+ nfds = cr_scan_fds(files, &fdtable);
+ if (nfds < 0)
+ return nfds;
+ while (nfds--)
+ sys_close(fdtable[nfds]);
+ kfree(fdtable);
+ return 0;
+}
+
+/**
+ * cr_attach_file - attach a lonely file ptr to a file descriptor
+ * @file: lonely file pointer
+ */
+static int cr_attach_file(struct file *file)
+{
+ int fd = get_unused_fd_flags(0);
+
+ if (fd >= 0) {
+ fsnotify_open(file->f_path.dentry);
+ fd_install(fd, file);
+ }
+ return fd;
+}
+
+/**
+ * cr_attach_get_file - attach (and get) lonely file ptr to a file descriptor
+ * @file: lonely file pointer
+ */
+static int cr_attach_get_file(struct file *file)
+{
+ int fd = get_unused_fd_flags(0);
+
+ if (fd >= 0) {
+ fsnotify_open(file->f_path.dentry);
+ fd_install(fd, file);
+ get_file(file);
+ }
+ return fd;
+}
+
+#define CR_SETFL_MASK (O_APPEND|O_NONBLOCK|O_NDELAY|FASYNC|O_DIRECT|O_NOATIME)
+
+/* cr_read_fd_data - restore the state of a given file pointer */
+static int
+cr_read_fd_data(struct cr_ctx *ctx, struct files_struct *files, int parent)
+{
+ struct cr_hdr_fd_data *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct file *file;
+ int rparent, ret;
+ int fd = 0; /* pacify gcc warning */
+
+ rparent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FD_DATA);
+ cr_debug("rparent %d parent %d flags %#x mode %#x how %d\n",
+ rparent, parent, hh->f_flags, hh->f_mode, hh->fd_type);
+ if (rparent < 0) {
+ ret = parent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+
+ if (rparent != parent)
+ goto out;
+
+ /* FIX: more sanity checks on f_flags, f_mode etc */
+
+ switch (hh->fd_type) {
+ case CR_FD_FILE:
+ case CR_FD_DIR:
+ case CR_FD_LINK:
+ file = cr_read_open_fname(ctx, hh->f_flags, hh->f_mode);
+ break;
+ default:
+ goto out;
+ }
+
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto out;
+ }
+
+ /* FIX: need to restore uid, gid, owner etc */
+
+ fd = cr_attach_file(file); /* no need to cleanup 'file' below */
+ if (fd < 0) {
+ filp_close(file, NULL);
+ ret = fd;
+ goto out;
+ }
+
+ /* register new <objref, file> tuple in hash table */
+ ret = cr_obj_add_ref(ctx, (void *) file, parent, CR_OBJ_FILE, 0);
+ if (ret < 0)
+ goto out;
+ ret = sys_fcntl(fd, F_SETFL, hh->f_flags & CR_SETFL_MASK);
+ if (ret < 0)
+ goto out;
+ ret = vfs_llseek(file, hh->f_pos, SEEK_SET);
+ if (ret == -ESPIPE) /* ignore error on non-seekable files */
+ ret = 0;
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret < 0 ? ret : fd;
+}
+
+/**
+ * cr_read_fd_ent - restore the state of a given file descriptor
+ * @ctx: checkpoint context
+ * @files: files_struct pointer
+ * @parent: parent objref
+ *
+ * Restores the state of a file descriptor; looks up the objref (in the
+ * header) in the hash table, and if found picks the matching file and
+ * use it; otherwise calls cr_read_fd_data to restore the file too.
+ */
+static int
+cr_read_fd_ent(struct cr_ctx *ctx, struct files_struct *files, int parent)
+{
+ struct cr_hdr_fd_ent *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct file *file;
+ int newfd, rparent, ret;
+
+ rparent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FD_ENT);
+ cr_debug("rparent %d parent %d ref %d fd %d c.o.e %d\n",
+ rparent, parent, hh->objref, hh->fd, hh->close_on_exec);
+ if (rparent < 0) {
+ ret = rparent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+
+ if (rparent != parent)
+ goto out;
+ if (hh->objref <= 0)
+ goto out;
+
+ file = cr_obj_get_by_ref(ctx, hh->objref, CR_OBJ_FILE);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto out;
+ }
+
+ if (file) {
+ /* reuse file descriptor found in the hash table */
+ newfd = cr_attach_get_file(file);
+ } else {
+ /* create new file pointer (and register in hash table) */
+ newfd = cr_read_fd_data(ctx, files, hh->objref);
+ }
+
+ if (newfd < 0) {
+ ret = newfd;
+ goto out;
+ }
+
+ cr_debug("newfd got %d wanted %d\n", newfd, hh->fd);
+
+ /* if newfd isn't desired fd then reposition it */
+ if (newfd != hh->fd) {
+ ret = sys_dup2(newfd, hh->fd);
+ if (ret < 0)
+ goto out;
+ sys_close(newfd);
+ }
+
+ if (hh->close_on_exec)
+ set_close_on_exec(hh->fd, 1);
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
+
+int cr_read_files(struct cr_ctx *ctx)
+{
+ struct cr_hdr_files *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct files_struct *files = current->files;
+ int i, parent, ret;
+
+ parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FILES);
+ if (parent < 0) {
+ ret = parent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+#if 0 /* activate when containers are used */
+ if (parent != task_pid_vnr(current))
+ goto out;
+#endif
+ cr_debug("objref %d nfds %d\n", hh->objref, hh->nfds);
+ if (hh->objref < 0 || hh->nfds < 0)
+ goto out;
+
+ if (hh->nfds > sysctl_nr_open) {
+ ret = -EMFILE;
+ goto out;
+ }
+
+ /* point of no return -- close all file descriptors */
+ ret = cr_close_all_fds(files);
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < hh->nfds; i++) {
+ ret = cr_read_fd_ent(ctx, files, hh->objref);
+ if (ret < 0)
+ break;
+ }
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
diff -puN include/linux/checkpoint.h~v6_PATCH_9_9_Restore_open_file_descriprtors include/linux/checkpoint.h
--- linux-2.6.git/include/linux/checkpoint.h~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/include/linux/checkpoint.h 2008-10-16 10:53:39.000000000 -0700
@@ -85,6 +85,7 @@ extern int cr_write_files(struct cr_ctx
extern int do_restart(struct cr_ctx *ctx);
extern int cr_read_mm(struct cr_ctx *ctx);
+extern int cr_read_files(struct cr_ctx *ctx);
/* there are from fs/read_write.c, not exported otherwise in a header */
extern loff_t file_pos_read(struct file *file);
_
WARNING: multiple messages have this Message-ID (diff)
From: Dave Hansen <dave@linux.vnet.ibm.com>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
linux-mm <linux-mm@kvack.org>,
containers <containers@lists.linux-foundation.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"Serge E. Hallyn" <serue@us.ibm.com>,
Oren Laadan <orenl@cs.columbia.edu>,
Dave Hansen <dave@linux.vnet.ibm.com>
Subject: [PATCH 9/9] Restore open file descriprtors
Date: Thu, 16 Oct 2008 11:14:27 -0700 [thread overview]
Message-ID: <20081016181427.1E5C5472@kernel> (raw)
In-Reply-To: <20081016181414.934C4FCC@kernel>
From: Oren Laadan <orenl@cs.columbia.edu>
Restore open file descriptors: for each FD read 'struct cr_hdr_fd_ent'
and lookup objref in the hash table; if not found (first occurence), read
in 'struct cr_hdr_fd_data', create a new FD and register in the hash.
Otherwise attach the file pointer from the hash as an FD.
This patch only handles basic FDs - regular files, directories and also
symbolic links.
Signed-off-by: Oren Laadan <orenl@cs.columbia.edu>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---
linux-2.6.git-dave/checkpoint/Makefile | 2
linux-2.6.git-dave/checkpoint/restart.c | 4
linux-2.6.git-dave/checkpoint/rstr_file.c | 246 ++++++++++++++++++++++++++
linux-2.6.git-dave/include/linux/checkpoint.h | 1
4 files changed, 252 insertions(+), 1 deletion(-)
diff -puN checkpoint/Makefile~v6_PATCH_9_9_Restore_open_file_descriprtors checkpoint/Makefile
--- linux-2.6.git/checkpoint/Makefile~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/Makefile 2008-10-16 10:53:39.000000000 -0700
@@ -3,4 +3,4 @@
#
obj-$(CONFIG_CHECKPOINT_RESTART) += sys.o checkpoint.o restart.o objhash.o \
- ckpt_mem.o rstr_mem.o ckpt_file.o
+ ckpt_mem.o rstr_mem.o ckpt_file.o rstr_file.o
diff -puN checkpoint/restart.c~v6_PATCH_9_9_Restore_open_file_descriprtors checkpoint/restart.c
--- linux-2.6.git/checkpoint/restart.c~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/restart.c 2008-10-16 10:53:39.000000000 -0700
@@ -219,6 +219,10 @@ static int cr_read_task(struct cr_ctx *c
cr_debug("memory: ret %d\n", ret);
if (ret < 0)
goto out;
+ ret = cr_read_files(ctx);
+ cr_debug("files: ret %d\n", ret);
+ if (ret < 0)
+ goto out;
ret = cr_read_thread(ctx);
cr_debug("thread: ret %d\n", ret);
if (ret < 0)
diff -puN /dev/null checkpoint/rstr_file.c
--- /dev/null 2008-09-02 09:40:19.000000000 -0700
+++ linux-2.6.git-dave/checkpoint/rstr_file.c 2008-10-16 10:53:39.000000000 -0700
@@ -0,0 +1,246 @@
+/*
+ * Checkpoint file descriptors
+ *
+ * Copyright (C) 2008 Oren Laadan
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fsnotify.h>
+#include <linux/syscalls.h>
+#include <linux/checkpoint.h>
+#include <linux/checkpoint_hdr.h>
+
+#include "checkpoint_file.h"
+
+static int cr_close_all_fds(struct files_struct *files)
+{
+ int *fdtable;
+ int nfds;
+
+ nfds = cr_scan_fds(files, &fdtable);
+ if (nfds < 0)
+ return nfds;
+ while (nfds--)
+ sys_close(fdtable[nfds]);
+ kfree(fdtable);
+ return 0;
+}
+
+/**
+ * cr_attach_file - attach a lonely file ptr to a file descriptor
+ * @file: lonely file pointer
+ */
+static int cr_attach_file(struct file *file)
+{
+ int fd = get_unused_fd_flags(0);
+
+ if (fd >= 0) {
+ fsnotify_open(file->f_path.dentry);
+ fd_install(fd, file);
+ }
+ return fd;
+}
+
+/**
+ * cr_attach_get_file - attach (and get) lonely file ptr to a file descriptor
+ * @file: lonely file pointer
+ */
+static int cr_attach_get_file(struct file *file)
+{
+ int fd = get_unused_fd_flags(0);
+
+ if (fd >= 0) {
+ fsnotify_open(file->f_path.dentry);
+ fd_install(fd, file);
+ get_file(file);
+ }
+ return fd;
+}
+
+#define CR_SETFL_MASK (O_APPEND|O_NONBLOCK|O_NDELAY|FASYNC|O_DIRECT|O_NOATIME)
+
+/* cr_read_fd_data - restore the state of a given file pointer */
+static int
+cr_read_fd_data(struct cr_ctx *ctx, struct files_struct *files, int parent)
+{
+ struct cr_hdr_fd_data *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct file *file;
+ int rparent, ret;
+ int fd = 0; /* pacify gcc warning */
+
+ rparent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FD_DATA);
+ cr_debug("rparent %d parent %d flags %#x mode %#x how %d\n",
+ rparent, parent, hh->f_flags, hh->f_mode, hh->fd_type);
+ if (rparent < 0) {
+ ret = parent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+
+ if (rparent != parent)
+ goto out;
+
+ /* FIX: more sanity checks on f_flags, f_mode etc */
+
+ switch (hh->fd_type) {
+ case CR_FD_FILE:
+ case CR_FD_DIR:
+ case CR_FD_LINK:
+ file = cr_read_open_fname(ctx, hh->f_flags, hh->f_mode);
+ break;
+ default:
+ goto out;
+ }
+
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto out;
+ }
+
+ /* FIX: need to restore uid, gid, owner etc */
+
+ fd = cr_attach_file(file); /* no need to cleanup 'file' below */
+ if (fd < 0) {
+ filp_close(file, NULL);
+ ret = fd;
+ goto out;
+ }
+
+ /* register new <objref, file> tuple in hash table */
+ ret = cr_obj_add_ref(ctx, (void *) file, parent, CR_OBJ_FILE, 0);
+ if (ret < 0)
+ goto out;
+ ret = sys_fcntl(fd, F_SETFL, hh->f_flags & CR_SETFL_MASK);
+ if (ret < 0)
+ goto out;
+ ret = vfs_llseek(file, hh->f_pos, SEEK_SET);
+ if (ret == -ESPIPE) /* ignore error on non-seekable files */
+ ret = 0;
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret < 0 ? ret : fd;
+}
+
+/**
+ * cr_read_fd_ent - restore the state of a given file descriptor
+ * @ctx: checkpoint context
+ * @files: files_struct pointer
+ * @parent: parent objref
+ *
+ * Restores the state of a file descriptor; looks up the objref (in the
+ * header) in the hash table, and if found picks the matching file and
+ * use it; otherwise calls cr_read_fd_data to restore the file too.
+ */
+static int
+cr_read_fd_ent(struct cr_ctx *ctx, struct files_struct *files, int parent)
+{
+ struct cr_hdr_fd_ent *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct file *file;
+ int newfd, rparent, ret;
+
+ rparent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FD_ENT);
+ cr_debug("rparent %d parent %d ref %d fd %d c.o.e %d\n",
+ rparent, parent, hh->objref, hh->fd, hh->close_on_exec);
+ if (rparent < 0) {
+ ret = rparent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+
+ if (rparent != parent)
+ goto out;
+ if (hh->objref <= 0)
+ goto out;
+
+ file = cr_obj_get_by_ref(ctx, hh->objref, CR_OBJ_FILE);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto out;
+ }
+
+ if (file) {
+ /* reuse file descriptor found in the hash table */
+ newfd = cr_attach_get_file(file);
+ } else {
+ /* create new file pointer (and register in hash table) */
+ newfd = cr_read_fd_data(ctx, files, hh->objref);
+ }
+
+ if (newfd < 0) {
+ ret = newfd;
+ goto out;
+ }
+
+ cr_debug("newfd got %d wanted %d\n", newfd, hh->fd);
+
+ /* if newfd isn't desired fd then reposition it */
+ if (newfd != hh->fd) {
+ ret = sys_dup2(newfd, hh->fd);
+ if (ret < 0)
+ goto out;
+ sys_close(newfd);
+ }
+
+ if (hh->close_on_exec)
+ set_close_on_exec(hh->fd, 1);
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
+
+int cr_read_files(struct cr_ctx *ctx)
+{
+ struct cr_hdr_files *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct files_struct *files = current->files;
+ int i, parent, ret;
+
+ parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_FILES);
+ if (parent < 0) {
+ ret = parent;
+ goto out;
+ }
+
+ ret = -EINVAL;
+#if 0 /* activate when containers are used */
+ if (parent != task_pid_vnr(current))
+ goto out;
+#endif
+ cr_debug("objref %d nfds %d\n", hh->objref, hh->nfds);
+ if (hh->objref < 0 || hh->nfds < 0)
+ goto out;
+
+ if (hh->nfds > sysctl_nr_open) {
+ ret = -EMFILE;
+ goto out;
+ }
+
+ /* point of no return -- close all file descriptors */
+ ret = cr_close_all_fds(files);
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < hh->nfds; i++) {
+ ret = cr_read_fd_ent(ctx, files, hh->objref);
+ if (ret < 0)
+ break;
+ }
+
+ ret = 0;
+ out:
+ cr_hbuf_put(ctx, sizeof(*hh));
+ return ret;
+}
diff -puN include/linux/checkpoint.h~v6_PATCH_9_9_Restore_open_file_descriprtors include/linux/checkpoint.h
--- linux-2.6.git/include/linux/checkpoint.h~v6_PATCH_9_9_Restore_open_file_descriprtors 2008-10-16 10:53:39.000000000 -0700
+++ linux-2.6.git-dave/include/linux/checkpoint.h 2008-10-16 10:53:39.000000000 -0700
@@ -85,6 +85,7 @@ extern int cr_write_files(struct cr_ctx
extern int do_restart(struct cr_ctx *ctx);
extern int cr_read_mm(struct cr_ctx *ctx);
+extern int cr_read_files(struct cr_ctx *ctx);
/* there are from fs/read_write.c, not exported otherwise in a header */
extern loff_t file_pos_read(struct file *file);
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-10-16 18:14 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-16 18:14 [PATCH 0/9] Kernel-based checkpoint/restart Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` [PATCH 1/9] Create syscalls: sys_checkpoint, sys_restart Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-17 20:01 ` Michael Kerrisk
2008-10-17 20:01 ` Michael Kerrisk
2008-10-17 20:01 ` Michael Kerrisk
2008-10-17 20:01 ` Michael Kerrisk
2008-10-16 18:14 ` [PATCH 2/9] General infrastructure for checkpoint restart Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` [PATCH 3/9] x86 support for checkpoint/restart Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` [PATCH 4/9] Dump memory address space Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-17 8:41 ` Nadia Derbey
2008-10-17 8:41 ` Nadia Derbey
2008-10-17 8:41 ` Nadia Derbey
2008-10-16 18:14 ` [PATCH 5/9] Restore " Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-17 8:44 ` Nadia Derbey
2008-10-17 8:44 ` Nadia Derbey
2008-10-17 8:44 ` Nadia Derbey
[not found] ` <1224233070.2634.114.camel-okVqAf2pJUFftS2PfwdgCzWMkbuR3peG@public.gmane.org>
2008-10-17 15:39 ` Dave Hansen
2008-10-17 16:15 ` Linus Torvalds
2008-10-17 16:15 ` Linus Torvalds
2008-10-17 16:15 ` Linus Torvalds
2008-10-17 15:39 ` Dave Hansen
2008-10-17 15:39 ` Dave Hansen
2008-10-16 18:14 ` [PATCH 6/9] Checkpoint/restart: initial documentation Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` [PATCH 7/9] Infrastructure for shared objects Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` [PATCH 8/9] Dump open file descriptors Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen
2008-10-16 18:14 ` Dave Hansen [this message]
2008-10-16 18:14 ` [PATCH 9/9] Restore open file descriprtors Dave Hansen
2008-10-16 18:14 ` Dave Hansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081016181427.1E5C5472@kernel \
--to=dave-23vcf4htsmix0ybbhkvfkdbpr1lh4cv8@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
--cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org \
--cc=mingo-X9Un+BFzKDI@public.gmane.org \
--cc=tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org \
--cc=torvalds-3NddpPZAyC0@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.