* [RFC][cr][PATCH 1/6] Move file_lock macros into linux/fs.h
2010-05-05 5:30 [RFC][PATCH 0/6][cr]: Checkpoint/restart file locks and leases Sukadev Bhattiprolu
@ 2010-05-05 5:30 ` Sukadev Bhattiprolu
2010-05-05 5:31 ` [RFC][cr][PATCH 2/6] Checkpoint file-locks Sukadev Bhattiprolu
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-05 5:30 UTC (permalink / raw)
To: Oren Laadan; +Cc: Containers, linux-fsdevel, serue, matthltc, sukadev
>From 3c9fd11b0bdd12ba7b3f78e7fc810f1d83ca6986 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Thu, 29 Apr 2010 23:33:39 -0700
Subject: [RFC][cr][PATCH 1/6] Move file_lock macros into linux/fs.h
Move IS_POSIX(), IS_FLOCK(), IS_LEASE() and 'for_each_lock()' into
include/linux/fs.h since these are also needed to checkpoint/restart
file-locks.
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
fs/locks.c | 7 -------
include/linux/fs.h | 7 +++++++
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/locks.c b/fs/locks.c
index 9cd859e..da53795 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -130,16 +130,9 @@
#include <asm/uaccess.h>
-#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
-#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
-#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
-
int leases_enable = 1;
int lease_break_time = 45;
-#define for_each_lock(inode, lockp) \
- for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
-
static LIST_HEAD(file_lock_list);
static LIST_HEAD(blocked_list);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ee725ff..909a535 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1088,6 +1088,13 @@ struct file_lock {
} fl_u;
};
+#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
+#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
+#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
+
+#define for_each_lock(inode, lockp) \
+ for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
+
/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
--
1.6.0.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC][cr][PATCH 2/6] Checkpoint file-locks
2010-05-05 5:30 [RFC][PATCH 0/6][cr]: Checkpoint/restart file locks and leases Sukadev Bhattiprolu
2010-05-05 5:30 ` [RFC][cr][PATCH 1/6] Move file_lock macros into linux/fs.h Sukadev Bhattiprolu
@ 2010-05-05 5:31 ` Sukadev Bhattiprolu
2010-05-05 5:31 ` [RFC][cr][PATCH 3/6] Define flock_set() Sukadev Bhattiprolu
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-05 5:31 UTC (permalink / raw)
To: Oren Laadan; +Cc: Containers, linux-fsdevel, serue, matthltc, sukadev
>From 46f8a088d15eda817f537cbb17574253a3af4e8c Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Thu, 29 Apr 2010 23:43:46 -0700
Subject: [RFC][cr][PATCH 2/6] Checkpoint file-locks
While checkpointing each file-descriptor, find all the locks on the
file and save information about the lock in the checkpoint-image.
A follow-on patch will use this informaiton to restore the file-locks.
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
fs/checkpoint.c | 98 ++++++++++++++++++++++++++++++++++-----
include/linux/checkpoint_hdr.h | 10 ++++
2 files changed, 95 insertions(+), 13 deletions(-)
diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index e036a7a..180302e 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -26,6 +26,7 @@
#include <linux/checkpoint.h>
#include <linux/eventpoll.h>
#include <linux/eventfd.h>
+#include <linux/smp_lock.h>
#include <net/sock.h>
/**************************************************************************
@@ -249,8 +250,86 @@ static int checkpoint_file(struct ckpt_ctx *ctx, void *ptr)
return ret;
}
+static int checkpoint_one_file_lock(struct ckpt_ctx *ctx, struct file *file,
+ int fd, struct file_lock *lock)
+{
+ int rc;
+ struct ckpt_hdr_file_lock *h;
+
+ h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE_LOCK);
+ if (!h)
+ return -ENOMEM;
+
+ h->fl_start = lock->fl_start;
+ h->fl_end = lock->fl_end;
+ h->fl_type = lock->fl_type;
+ h->fl_flags = lock->fl_flags;
+
+ rc = ckpt_write_obj(ctx, &h->h);
+
+ ckpt_hdr_put(ctx, h);
+
+ ckpt_debug("Lock [%lld, %lld, %d, 0x%x] fd %d, rc %d\n", lock->fl_start,
+ lock->fl_end, lock->fl_type, lock->fl_flags, fd, rc);
+
+ return rc;
+}
+
+int
+checkpoint_file_locks(struct ckpt_ctx *ctx, struct files_struct *files,
+ struct file *file, int fd)
+{
+ int rc;
+ struct inode *inode;
+ struct file_lock **lockpp;
+ struct file_lock *lockp;
+ struct file_lock last_lock;
+
+ lock_kernel();
+ inode = file->f_path.dentry->d_inode;
+ for_each_lock(inode, lockpp) {
+ lockp = *lockpp;
+ ckpt_debug("Found lock [%lld, %lld, %d, 0x%x]\n",
+ lockp->fl_start, lockp->fl_end,
+ lockp->fl_type, lockp->fl_flags);
+
+ if (lockp->fl_owner != files)
+ continue;
+
+ if (IS_POSIX(lockp)) {
+ rc = checkpoint_one_file_lock(ctx, file, fd, lockp);
+ if (rc < 0) {
+ ckpt_err(ctx, rc, "%(T)fd %d, checkpoint "
+ "lock failed\n", fd);
+ goto out;
+ }
+ } else {
+ rc = -EBADF;
+ ckpt_err(ctx, rc, "%(T)fd %d has unsupported file "
+ "lock type, flags 0x%x\n", fd,
+ lockp->fl_flags);
+ goto out;
+ }
+ }
+
+ /*
+ * Checkpoint a dummy file-lock to mark the end of file-locks
+ * for this fd.
+ */
+ memset(&last_lock, 0, sizeof(struct file_lock));
+ last_lock.fl_start = -1;
+ last_lock.fl_flags = FL_POSIX;
+ rc = checkpoint_one_file_lock(ctx, file, fd, &last_lock);
+ if (rc < 0)
+ ckpt_err(ctx, rc, "%(T)fd %d, checkpoint last-lock failed\n",
+ fd);
+out:
+ unlock_kernel();
+ return rc;
+}
+
/**
- * ckpt_write_file_desc - dump the state of a given file descriptor
+ * checkpoint_file_desc - dump the state of a given file descriptor
* @ctx: checkpoint context
* @files: files_struct pointer
* @fd: file descriptor
@@ -282,18 +361,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
}
rcu_read_unlock();
- ret = find_locks_with_owner(file, files);
- /*
- * find_locks_with_owner() returns an error when there
- * are no locks found, so we *want* it to return an error
- * code. Its success means we have to fail the checkpoint.
- */
- if (!ret) {
- ret = -EBADF;
- ckpt_err(ctx, ret, "%(T)fd %d has file lock or lease\n", fd);
- goto out;
- }
-
/* sanity check (although this shouldn't happen) */
ret = -EBADF;
if (!file) {
@@ -328,6 +395,11 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
h->fd_close_on_exec = coe;
ret = ckpt_write_obj(ctx, &h->h);
+ if (ret < 0)
+ goto out;
+
+ ret = checkpoint_file_locks(ctx, files, file, fd);
+
out:
ckpt_hdr_put(ctx, h);
if (file)
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 790214f..d2a0fcd 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -144,6 +144,8 @@ enum {
#define CKPT_HDR_TTY_LDISC CKPT_HDR_TTY_LDISC
CKPT_HDR_EPOLL_ITEMS, /* must be after file-table */
#define CKPT_HDR_EPOLL_ITEMS CKPT_HDR_EPOLL_ITEMS
+ CKPT_HDR_FILE_LOCK,
+#define CKPT_HDR_FILE_LOCK CKPT_HDR_FILE_LOCK
CKPT_HDR_MM = 401,
#define CKPT_HDR_MM CKPT_HDR_MM
@@ -576,6 +578,14 @@ struct ckpt_hdr_file_generic {
struct ckpt_hdr_file common;
} __attribute__((aligned(8)));
+struct ckpt_hdr_file_lock {
+ struct ckpt_hdr h;
+ loff_t fl_start;
+ loff_t fl_end;
+ __u8 fl_type;
+ __u8 fl_flags;
+};
+
struct ckpt_hdr_file_pipe {
struct ckpt_hdr_file common;
__s32 pipe_objref;
--
1.6.0.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC][cr][PATCH 3/6] Define flock_set()
2010-05-05 5:30 [RFC][PATCH 0/6][cr]: Checkpoint/restart file locks and leases Sukadev Bhattiprolu
2010-05-05 5:30 ` [RFC][cr][PATCH 1/6] Move file_lock macros into linux/fs.h Sukadev Bhattiprolu
2010-05-05 5:31 ` [RFC][cr][PATCH 2/6] Checkpoint file-locks Sukadev Bhattiprolu
@ 2010-05-05 5:31 ` Sukadev Bhattiprolu
2010-05-05 5:31 ` [RFC][cr][PATCH 4/6] Restore file-locks Sukadev Bhattiprolu
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-05 5:31 UTC (permalink / raw)
To: Oren Laadan; +Cc: Containers, linux-fsdevel, serue, matthltc, sukadev
>From e773dc3d5e3d4ead9db7ddbd85b58a20ec95afc0 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 30 Apr 2010 11:03:28 -0700
Subject: [RFC][cr][PATCH 3/6] Define flock_set()
Extract core functionality of fcntl_setlk() into a separate function,
flock_set(). flock_set() can be also used when restarting a checkpointed
application and restoring its file-locks.
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
fs/locks.c | 44 +++++++++++++++++++++++++++-----------------
include/linux/fs.h | 1 +
2 files changed, 28 insertions(+), 17 deletions(-)
diff --git a/fs/locks.c b/fs/locks.c
index da53795..6c6ced4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1758,14 +1758,10 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
return error;
}
-/* Apply the lock described by l to an open file descriptor.
- * This implements both the F_SETLK and F_SETLKW commands of fcntl().
- */
-int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
- struct flock __user *l)
+int flock_set(unsigned int fd, struct file *filp, unsigned int cmd,
+ struct flock *flock)
{
struct file_lock *file_lock = locks_alloc_lock();
- struct flock flock;
struct inode *inode;
struct file *f;
int error;
@@ -1773,13 +1769,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
- /*
- * This might block, so we do it before checking the inode.
- */
- error = -EFAULT;
- if (copy_from_user(&flock, l, sizeof(flock)))
- goto out;
-
inode = filp->f_path.dentry->d_inode;
/* Don't allow mandatory locks on files that may be memory mapped
@@ -1791,7 +1780,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
}
again:
- error = flock_to_posix_lock(filp, file_lock, &flock);
+ error = flock_to_posix_lock(filp, file_lock, flock);
if (error)
goto out;
if (cmd == F_SETLKW) {
@@ -1799,7 +1788,7 @@ again:
}
error = -EBADF;
- switch (flock.l_type) {
+ switch (flock->l_type) {
case F_RDLCK:
if (!(filp->f_mode & FMODE_READ))
goto out;
@@ -1829,8 +1818,8 @@ again:
spin_lock(¤t->files->file_lock);
f = fcheck(fd);
spin_unlock(¤t->files->file_lock);
- if (!error && f != filp && flock.l_type != F_UNLCK) {
- flock.l_type = F_UNLCK;
+ if (!error && f != filp && flock->l_type != F_UNLCK) {
+ flock->l_type = F_UNLCK;
goto again;
}
@@ -1839,6 +1828,27 @@ out:
return error;
}
+/* Apply the lock described by l to an open file descriptor.
+ * This implements both the F_SETLK and F_SETLKW commands of fcntl().
+ */
+int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
+ struct flock __user *l)
+{
+ int error;
+ struct flock flock;
+
+ /*
+ * This might block, so we do it before checking the inode
+ * in flock_set().
+ */
+ error = -EFAULT;
+ if (copy_from_user(&flock, l, sizeof(flock)))
+ return error;
+
+ return flock_set(fd, filp, cmd, &flock);
+}
+
+
#if BITS_PER_LONG == 32
/* Report the first existing lock that would conflict with l.
* This implements the F_GETLK command of fcntl().
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 909a535..5e9ea17 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1112,6 +1112,7 @@ extern void send_sigio(struct fown_struct *fown, int fd, int band);
extern int fcntl_getlk(struct file *, struct flock __user *);
extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
struct flock __user *);
+extern int flock_set(unsigned int, struct file *, unsigned int, struct flock *);
#if BITS_PER_LONG == 32
extern int fcntl_getlk64(struct file *, struct flock64 __user *);
--
1.6.0.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC][cr][PATCH 4/6] Restore file-locks
2010-05-05 5:30 [RFC][PATCH 0/6][cr]: Checkpoint/restart file locks and leases Sukadev Bhattiprolu
` (2 preceding siblings ...)
2010-05-05 5:31 ` [RFC][cr][PATCH 3/6] Define flock_set() Sukadev Bhattiprolu
@ 2010-05-05 5:31 ` Sukadev Bhattiprolu
2010-05-05 5:32 ` [RFC][cr][PATCH 5/6] Define do_setlease() Sukadev Bhattiprolu
2010-05-05 5:32 ` [RFC][cr][PATCH 6/6] Checkpoint/restart file leases Sukadev Bhattiprolu
5 siblings, 0 replies; 7+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-05 5:31 UTC (permalink / raw)
To: Oren Laadan; +Cc: Containers, linux-fsdevel, serue, matthltc, sukadev
>From 58da2b18ee4389fdb1c0602d098ccde2dd3bf194 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Sun, 2 May 2010 23:10:18 -0700
Subject: [RFC][cr][PATCH 4/6] Restore file-locks
Restore POSIX file-locks of an application from its checkpoint image.
Read the saved file-locks from the checkpoint image and for each POSIX
lock, call flock_set() to set the lock on the file.
As pointed out by Matt Helsley, no special handling is necessary for a
process P2 in the checkpointed container that is blocked on a lock, L1
held by another process P1. Since processes in the restarted container
begin execution only after all processes have restored. If the blocked
process P2 is restored first, first, it will prepare to return an
-ERESTARTSYS from the fcntl() system call, but wait for P1 to be
restored. When P1 is restored, it will re-acquire the lock L1 before P1
and P2 begin actual execution. This ensures that even if P2 is scheduled
to run before P1, P2 will go back to waiting for the lock L1.
TODO:
Checkpoint/restart 64-bit file-locks (set by fctnl_getlk64() and
fcntl_setlk64()).
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
fs/checkpoint.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index 180302e..625ccb9 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -269,9 +269,6 @@ static int checkpoint_one_file_lock(struct ckpt_ctx *ctx, struct file *file,
ckpt_hdr_put(ctx, h);
- ckpt_debug("Lock [%lld, %lld, %d, 0x%x] fd %d, rc %d\n", lock->fl_start,
- lock->fl_end, lock->fl_type, lock->fl_flags, fd, rc);
-
return rc;
}
@@ -289,9 +286,9 @@ checkpoint_file_locks(struct ckpt_ctx *ctx, struct files_struct *files,
inode = file->f_path.dentry->d_inode;
for_each_lock(inode, lockpp) {
lockp = *lockpp;
- ckpt_debug("Found lock [%lld, %lld, %d, 0x%x]\n",
- lockp->fl_start, lockp->fl_end,
- lockp->fl_type, lockp->fl_flags);
+ ckpt_debug("Lock [%lld, %lld, %d, 0x%x], fd %d\n",
+ lockp->fl_start, lockp->fl_end,
+ lockp->fl_type, lockp->fl_flags, fd);
if (lockp->fl_owner != files)
continue;
@@ -831,6 +828,86 @@ static struct restore_file_ops restore_file_ops[] = {
},
};
+static int
+ckpt_hdr_file_lock_to_flock(struct ckpt_hdr_file_lock *h, struct flock *fl)
+{
+ /*
+ * We checkpoint the 'raw' fl_type which in case of leases includes
+ * the F_INPROGRESS flag. But for posix-locks, the fl_type should
+ * be simple.
+ */
+ switch(h->fl_type) {
+ case F_RDLCK:
+ case F_WRLCK:
+ case F_UNLCK:
+ break;
+ default:
+ ckpt_debug("Bad posix lock type 0x%x ?\n", h->fl_type);
+ return -EINVAL;
+ }
+
+ memset(fl, 0, sizeof(*fl));
+ fl->l_type = h->fl_type;
+ fl->l_start = h->fl_start;
+ fl->l_len = h->fl_end - h->fl_start;
+ fl->l_whence = SEEK_SET;
+
+ /* TODO: Init ->l_sysid, l_pid fields */
+
+ return 0;
+}
+
+static int restore_file_locks(struct ckpt_ctx *ctx, struct file *file, int fd)
+{
+ int ret;
+ struct flock fl;
+ struct ckpt_hdr_file_lock *h;
+
+ ret = 0;
+ while (!ret) {
+
+ h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_FILE_LOCK);
+ if (IS_ERR(h))
+ return PTR_ERR(h);
+
+ ckpt_debug("Lock [%lld, %lld, %d, 0x%x]\n", h->fl_start,
+ h->fl_end, (int)h->fl_type, h->fl_flags);
+
+ /*
+ * If we found a dummy-lock, then the fd has no more
+ * file-locks
+ */
+ if ((h->fl_flags & FL_POSIX) && (h->fl_start == (loff_t)-1)) {
+ ckpt_debug("Found last lock for fd\n");
+ break;
+ }
+
+ if (h->fl_flags & FL_POSIX) {
+ ret = ckpt_hdr_file_lock_to_flock(h, &fl);
+ if (ret < 0) {
+ ckpt_err(ctx, ret, "%(T) Unexpected flock\n");
+ break;
+ }
+ /*
+ * Use F_SETLK because we should not have to wait for
+ * the lock. If another process holds the lock, it
+ * indicates that filesystem-state is not consistent
+ * with what it was at checkpoint. In which case we
+ * better fail.
+ */
+ ret = flock_set(fd, file, F_SETLK, &fl);
+ if (ret)
+ ckpt_err(ctx, ret, "flock_set(): %d\n",
+ (int)h->fl_type);
+ } else {
+ ret = EINVAL;
+ ckpt_err(ctx, ret, "%(T) Unexpected fl_flags 0x%x\n",
+ h->fl_flags);
+ }
+ }
+ return ret;
+}
+
static void *restore_file(struct ckpt_ctx *ctx)
{
struct restore_file_ops *ops;
@@ -862,7 +939,7 @@ static void *restore_file(struct ckpt_ctx *ctx)
}
/**
- * ckpt_read_file_desc - restore the state of a given file descriptor
+ * restore_file_desc - restore the state of a given file descriptor
* @ctx: checkpoint context
*
* Restores the state of a file descriptor; looks up the objref (in the
@@ -908,7 +985,11 @@ static int restore_file_desc(struct ckpt_ctx *ctx)
}
set_close_on_exec(h->fd_descriptor, h->fd_close_on_exec);
- ret = 0;
+ ret = restore_file_locks(ctx, file, h->fd_descriptor);
+ if (ret < 0) {
+ ckpt_err(ctx, ret, "Error restoring locks on fd %d\n",
+ h->fd_descriptor);
+ }
out:
ckpt_hdr_put(ctx, h);
return ret;
--
1.6.0.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC][cr][PATCH 5/6] Define do_setlease()
2010-05-05 5:30 [RFC][PATCH 0/6][cr]: Checkpoint/restart file locks and leases Sukadev Bhattiprolu
` (3 preceding siblings ...)
2010-05-05 5:31 ` [RFC][cr][PATCH 4/6] Restore file-locks Sukadev Bhattiprolu
@ 2010-05-05 5:32 ` Sukadev Bhattiprolu
2010-05-05 5:32 ` [RFC][cr][PATCH 6/6] Checkpoint/restart file leases Sukadev Bhattiprolu
5 siblings, 0 replies; 7+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-05 5:32 UTC (permalink / raw)
To: Oren Laadan; +Cc: Containers, linux-fsdevel, serue, matthltc, sukadev
>From 3ee14213853e8d72fb8b9791bc1441533952347e Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 4 May 2010 10:59:09 -0700
Subject: [RFC][cr][PATCH 5/6] Define do_setlease()
Move the core functionality of fcntl_setlease() into a new function,
do_setlease(). do_setlease() is same as fcntl_setlease() except that
it takes an extra 'rem_lease' parameter. do_setlease() will be used
in a follow-on patch to checkpoint/restart file-leases.
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
fs/locks.c | 28 +++++++++++++++++-----------
include/linux/fs.h | 1 +
2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/fs/locks.c b/fs/locks.c
index 6c6ced4..053ac5f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1471,17 +1471,7 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
}
EXPORT_SYMBOL_GPL(vfs_setlease);
-/**
- * fcntl_setlease - sets a lease on an open file
- * @fd: open file descriptor
- * @filp: file pointer
- * @arg: type of lease to obtain
- *
- * Call this fcntl to establish a lease on the file.
- * Note that you also need to call %F_SETSIG to
- * receive a signal when the lease is broken.
- */
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+int do_setlease(unsigned int fd, struct file *filp, long arg, int rem_lease)
{
struct file_lock fl, *flp = &fl;
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -1508,12 +1498,28 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
}
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+
out_unlock:
unlock_kernel();
return error;
}
/**
+ * fcntl_setlease - sets a lease on an open file
+ * @fd: open file descriptor
+ * @filp: file pointer
+ * @arg: type of lease to obtain
+ *
+ * Call this fcntl to establish a lease on the file.
+ * Note that you also need to call %F_SETSIG to
+ * receive a signal when the lease is broken.
+ */
+int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+{
+ return do_setlease(fd, filp, arg, 0);
+}
+
+/**
* flock_lock_file_wait - Apply a FLOCK-style lock to a file
* @filp: The file to apply the lock to
* @fl: The lock to be applied
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e9ea17..137f244 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1120,6 +1120,7 @@ extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
struct flock64 __user *);
#endif
+extern int do_setlease(unsigned int fd, struct file *filp, long arg, int rem_lease);
extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
extern int fcntl_getlease(struct file *filp);
--
1.6.0.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC][cr][PATCH 6/6] Checkpoint/restart file leases
2010-05-05 5:30 [RFC][PATCH 0/6][cr]: Checkpoint/restart file locks and leases Sukadev Bhattiprolu
` (4 preceding siblings ...)
2010-05-05 5:32 ` [RFC][cr][PATCH 5/6] Define do_setlease() Sukadev Bhattiprolu
@ 2010-05-05 5:32 ` Sukadev Bhattiprolu
5 siblings, 0 replies; 7+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-05 5:32 UTC (permalink / raw)
To: Oren Laadan; +Cc: Containers, linux-fsdevel, serue, matthltc, sukadev
>From 909cd31ddd56d6858d56cd23b1bb5d8925e8bc87 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 4 May 2010 10:51:52 -0700
Subject: [RFC][cr][PATCH 6/6] Checkpoint/restart file leases
Build upon the C/R of file-locks to C/R file-leases. C/R of a lease that is
not being broken is almost identical to C/R of file-locks. i.e save the type
of lease for the file in the checkpoint image and when restarting, restore
the lease by calling do_setlease().
C/R of file-lease gets complicated (I think), if a process is checkpointed
when its lease was being revoked. i.e if P1 has a F_WRLCK lease on file F1
and P2 opens F1 for write, P2's open is blocked for lease_break_time (45 secs).
P1's lease is revoked (i.e set to F_UNLCK) and P1 is notified via a SIGIO to
flush any dirty data.
This brings up two issues:
First, if P1 is checkpointed during this lease_break_time, we need to remember
to that P1 originally had a F_WRLCK lease which is now revoked to F_UNLCK.
Checkpointing the "current lease type" would wrongly save the lease-type as
F_UNLCK.
Secondly, if P1 was checkpointed 40 seconds into the lease_break_time,(i.e.
it had 5 seconds remaining in the lease), we want to ensure that after restart,
P1 gets at least 5 more seconds in the lease (no ?). (i.e P1 could be in the
its SIGIO handler when it was checkpointed and may be about to start a new
write(). If P1 does not gets its 5 seconds and P2's open and a read()
completes, we would have a data corruption).
This patch addresses the first issue above by adding file_lock->fl_type_prev
field. When a lease is downgraded/revoked, the original lease type is saved
in ->fl_type_prev and is also checkpointed. When the process P1 is restarted,
the kernel temporarily restores the original (F_WRLCK) lease. When process
P2 is restarted, the open() would fail with -ERESTARTSYS and the open() would
be repeated. This open() would initiate the lease-break protocol again on P1.
To address the second issue above, this patch saves the remaining-lease in
the checkpoint image, but does not (yet) use this value. The plan is to use
this remaining-lease period when P1/P2 are restarted so that P2 is blocked
only for the remaining-lease rather than entire lease_break_time. I want to
check if there are better ways to address this.
TODO:
When the lease-break protocol is repeated:
- P1 gets a second SIGIO. We could add a flag to file_lock
to remember that we have already sent the SIGIO.
- P1 gets a full 'lease_break_time' again (i.e P2 will block
for 45-seconds again even though it had already blocked for
40 seconds before checkpoint).
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
fs/checkpoint.c | 36 ++++++++++++++++++++++++++++++++----
fs/locks.c | 40 ++++++++++++++++++++++++++++++++++++++++
include/linux/checkpoint_hdr.h | 2 ++
include/linux/fs.h | 1 +
4 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index 625ccb9..9bb3fa6 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -262,9 +262,16 @@ static int checkpoint_one_file_lock(struct ckpt_ctx *ctx, struct file *file,
h->fl_start = lock->fl_start;
h->fl_end = lock->fl_end;
- h->fl_type = lock->fl_type;
h->fl_flags = lock->fl_flags;
+ /* For now, checkpoint even F_INPROGRESS (if set) too. Maybe useful
+ * for debug */
+ h->fl_type = lock->fl_type;
+ h->fl_type_prev = lock->fl_type_prev;
+
+ if (h->fl_type & F_INPROGRESS && (lock->fl_break_time > jiffies))
+ h->fl_rem_lease = (lock->fl_break_time - jiffies) / HZ;
+
rc = ckpt_write_obj(ctx, &h->h);
ckpt_hdr_put(ctx, h);
@@ -293,7 +300,7 @@ checkpoint_file_locks(struct ckpt_ctx *ctx, struct files_struct *files,
if (lockp->fl_owner != files)
continue;
- if (IS_POSIX(lockp)) {
+ if (IS_POSIX(lockp) || IS_LEASE(lockp)) {
rc = checkpoint_one_file_lock(ctx, file, fd, lockp);
if (rc < 0) {
ckpt_err(ctx, rc, "%(T)fd %d, checkpoint "
@@ -369,12 +376,22 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
* TODO: Implement c/r of fowner and f_sigio. Should be
* trivial, but for now we just refuse its checkpoint
*/
+#if 0
+ /* We have not implemented C/R of f_setown()/f_getown() yet, but
+ * setting a file-lease also sets the owner of the file that will
+ * receive the SIGIO when the lease is broken.
+ *
+ * Disable this check for this version of patchset to test C/R of
+ * file leases. To be bisect-safe, we may need to C/R file-owner
+ * before file-leases.
+ */
pid = f_getown(file);
if (pid) {
ret = -EBUSY;
ckpt_err(ctx, ret, "%(T)fd %d has an owner (%d)\n", fd);
goto out;
}
+#endif
/*
* if seen first time, this will add 'file' to the objhash, keep
@@ -870,8 +887,10 @@ static int restore_file_locks(struct ckpt_ctx *ctx, struct file *file, int fd)
if (IS_ERR(h))
return PTR_ERR(h);
- ckpt_debug("Lock [%lld, %lld, %d, 0x%x]\n", h->fl_start,
- h->fl_end, (int)h->fl_type, h->fl_flags);
+ ckpt_debug("Lock [%lld, %lld, %d, 0x%x], rem-lease %lus, "
+ "fl-type-prev %d\n", h->fl_start, h->fl_end,
+ (int)h->fl_type, h->fl_flags, h->fl_rem_lease,
+ h->fl_type_prev);
/*
* If we found a dummy-lock, then the fd has no more
@@ -899,6 +918,15 @@ static int restore_file_locks(struct ckpt_ctx *ctx, struct file *file, int fd)
if (ret)
ckpt_err(ctx, ret, "flock_set(): %d\n",
(int)h->fl_type);
+ } else if (h->fl_flags & FL_LEASE) {
+ int type;
+
+ type = h->fl_type;
+ if (h->fl_type & F_INPROGRESS)
+ type = h->fl_type_prev;
+ ret = do_setlease(fd, file, type, h->fl_rem_lease);
+ if (ret)
+ ckpt_err(ctx, ret, "do_setlease(): %d\n", type);
} else {
ret = EINVAL;
ckpt_err(ctx, ret, "%(T) Unexpected fl_flags 0x%x\n",
diff --git a/fs/locks.c b/fs/locks.c
index 053ac5f..38bf95f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -184,6 +184,8 @@ void locks_init_lock(struct file_lock *fl)
fl->fl_file = NULL;
fl->fl_flags = 0;
fl->fl_type = 0;
+ fl->fl_type_prev = 0;
+ fl->fl_break_time = 0UL;
fl->fl_start = fl->fl_end = 0;
fl->fl_ops = NULL;
fl->fl_lmops = NULL;
@@ -291,6 +293,13 @@ static int assign_type(struct file_lock *fl, int type)
case F_WRLCK:
case F_UNLCK:
fl->fl_type = type;
+ /*
+ * Clear fl_type_prev since we now have a new lease-type.
+ * That way, break_lease() will know to save the new lease-type
+ * in case of a checkpoint. (non-lease file-locks don't use
+ * ->fl_type_prev).
+ */
+ fl->fl_type_prev = 0;
break;
default:
return -EINVAL;
@@ -1211,6 +1220,16 @@ int __break_lease(struct inode *inode, unsigned int mode)
goto out;
}
+ /*
+ * TODO: Checkpoint/restart. Suppose lease_break_time was 45 seonds and
+ * we were checkpointed when we had 35 seconds remaining in our
+ * lease. When we are restarted, should we get only 35 seconds
+ * of the lease and not the full lease_break_time ?
+ *
+ * We checkpoint ->fl_break_time in the hope that we can use it
+ * to calculate the remaining lease, but for now, give the
+ * restarted process the full 'lease_break_time'.
+ */
break_time = 0;
if (lease_break_time > 0) {
break_time = jiffies + lease_break_time * HZ;
@@ -1220,8 +1239,29 @@ int __break_lease(struct inode *inode, unsigned int mode)
for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
if (fl->fl_type != future) {
+ /*
+ * CHECK:
+ *
+ * If fl_type_prev is already set, we could be in a
+ * recursive checkpoint-restart i.e we were checkpointed
+ * once when our lease was being broken. We were then
+ * restarted from the checkpoint and checkpointed
+ * again before the restored lease expired. In this
+ * case, we want to restore the lease to the original
+ * type. So don't overwrite fl_type_prev if its already
+ * set.
+ */
+ if (!fl->fl_type_prev)
+ fl->fl_type_prev = fl->fl_type;
fl->fl_type = future;
fl->fl_break_time = break_time;
+
+ /*
+ * TODO: ->fl_break() sends the SIGIO to lease-holder.
+ * If lease-holder was checkpointed/restarted and
+ * this is a restarted lease, we should not
+ * re-send the SIGIO ?
+ */
/* lease must have lmops break callback */
fl->fl_lmops->fl_break(fl);
}
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index d2a0fcd..e9752ba 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -583,7 +583,9 @@ struct ckpt_hdr_file_lock {
loff_t fl_start;
loff_t fl_end;
__u8 fl_type;
+ __u8 fl_type_prev;
__u8 fl_flags;
+ unsigned long fl_rem_lease;
};
struct ckpt_hdr_file_pipe {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 137f244..c1d623c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,6 +1066,7 @@ struct file_lock {
fl_owner_t fl_owner;
unsigned char fl_flags;
unsigned char fl_type;
+ unsigned char fl_type_prev;
unsigned int fl_pid;
struct pid *fl_nspid;
wait_queue_head_t fl_wait;
--
1.6.0.4
^ permalink raw reply related [flat|nested] 7+ messages in thread