From: Jeff Layton <jlayton@redhat.com>
To: viro@ZenIV.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org, bfields@fieldses.org
Subject: [PATCH v7 14/17] locks: add new fcntl cmd values for handling file private locks
Date: Wed, 19 Mar 2014 16:45:58 -0400 [thread overview]
Message-ID: <1395261961-10855-15-git-send-email-jlayton@redhat.com> (raw)
In-Reply-To: <1395261961-10855-1-git-send-email-jlayton@redhat.com>
Due to some unfortunate history, POSIX locks have very strange and
unhelpful semantics. The thing that usually catches people by surprise
is that they are dropped whenever the process closes any file descriptor
associated with the inode.
This is extremely problematic for people developing file servers that
need to implement byte-range locks. Developers often need a "lock
management" facility to ensure that file descriptors are not closed
until all of the locks associated with the inode are finished.
Additionally, "classic" POSIX locks are owned by the process. Locks
taken between threads within the same process won't conflict with one
another, which renders them useless for synchronization between threads.
This patchset adds a new type of lock that attempts to address these
issues. These locks conflict with classic POSIX read/write locks, but
have semantics that are more like BSD locks with respect to inheritance
and behavior on close.
This is implemented primarily by changing how fl_owner field is set for
these locks. Instead of having them owned by the files_struct of the
process, they are instead owned by the filp on which they were acquired.
Thus, they are inherited across fork() and are only released when the
last reference to a filp is put.
These new semantics prevent them from being merged with classic POSIX
locks, even if they are acquired by the same process. These locks will
also conflict with classic POSIX locks even if they are acquired by
the same process or on the same file descriptor.
The new locks are managed using a new set of cmd values to the fcntl()
syscall. The initial implementation of this converts these values to
"classic" cmd values at a fairly high level, and the details are not
exposed to the underlying filesystem. We may eventually want to push
this handing out to the lower filesystem code but for now I don't
see any need for it.
Also, note that with this implementation the new cmd values are only
available via fcntl64() on 32-bit arches. There's little need to
add support for legacy apps on a new interface like this.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
arch/arm/kernel/sys_oabi-compat.c | 3 +++
fs/compat.c | 35 +++++++++++++++++++++----
fs/fcntl.c | 35 +++++++++++++++++--------
fs/locks.c | 54 ++++++++++++++++++++++++++++++++++++---
include/uapi/asm-generic/fcntl.h | 16 ++++++++++++
security/selinux/hooks.c | 3 +++
6 files changed, 126 insertions(+), 20 deletions(-)
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 3e94811690ce..702bd329d9d0 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -203,6 +203,9 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
int ret;
switch (cmd) {
+ case F_GETLKP:
+ case F_SETLKP:
+ case F_SETLKPW:
case F_GETLK64:
case F_SETLK64:
case F_SETLKW64:
diff --git a/fs/compat.c b/fs/compat.c
index 6af20de2c1a3..f340dcf11f68 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -399,12 +399,28 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u
}
#endif
+static unsigned int
+convert_fcntl_cmd(unsigned int cmd)
+{
+ switch (cmd) {
+ case F_GETLK64:
+ return F_GETLK;
+ case F_SETLK64:
+ return F_SETLK;
+ case F_SETLKW64:
+ return F_SETLKW;
+ }
+
+ return cmd;
+}
+
asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
unsigned long arg)
{
mm_segment_t old_fs;
struct flock f;
long ret;
+ unsigned int conv_cmd;
switch (cmd) {
case F_GETLK:
@@ -441,16 +457,18 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
case F_GETLK64:
case F_SETLK64:
case F_SETLKW64:
+ case F_GETLKP:
+ case F_SETLKP:
+ case F_SETLKPW:
ret = get_compat_flock64(&f, compat_ptr(arg));
if (ret != 0)
break;
old_fs = get_fs();
set_fs(KERNEL_DS);
- ret = sys_fcntl(fd, (cmd == F_GETLK64) ? F_GETLK :
- ((cmd == F_SETLK64) ? F_SETLK : F_SETLKW),
- (unsigned long)&f);
+ conv_cmd = convert_fcntl_cmd(cmd);
+ ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
set_fs(old_fs);
- if (cmd == F_GETLK64 && ret == 0) {
+ if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) {
/* need to return lock information - see above for commentary */
if (f.l_start > COMPAT_LOFF_T_MAX)
ret = -EOVERFLOW;
@@ -471,8 +489,15 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
unsigned long arg)
{
- if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64))
+ switch (cmd) {
+ case F_GETLK64:
+ case F_SETLK64:
+ case F_SETLKW64:
+ case F_GETLKP:
+ case F_SETLKP:
+ case F_SETLKPW:
return -EINVAL;
+ }
return compat_sys_fcntl64(fd, cmd, arg);
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 7ef7f2d2b608..9ead1596399a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -272,9 +272,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_SETFL:
err = setfl(fd, filp, arg);
break;
+#if BITS_PER_LONG != 32
+ /* 32-bit arches must use fcntl64() */
+ case F_GETLKP:
+#endif
case F_GETLK:
err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
break;
+#if BITS_PER_LONG != 32
+ /* 32-bit arches must use fcntl64() */
+ case F_SETLKP:
+ case F_SETLKPW:
+#endif
+ /* Fallthrough */
case F_SETLK:
case F_SETLKW:
err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
@@ -388,17 +398,20 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
goto out1;
switch (cmd) {
- case F_GETLK64:
- err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
- break;
- case F_SETLK64:
- case F_SETLKW64:
- err = fcntl_setlk64(fd, f.file, cmd,
- (struct flock64 __user *) arg);
- break;
- default:
- err = do_fcntl(fd, cmd, arg, f.file);
- break;
+ case F_GETLK64:
+ case F_GETLKP:
+ err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
+ break;
+ case F_SETLK64:
+ case F_SETLKW64:
+ case F_SETLKP:
+ case F_SETLKPW:
+ err = fcntl_setlk64(fd, f.file, cmd,
+ (struct flock64 __user *) arg);
+ break;
+ default:
+ err = do_fcntl(fd, cmd, arg, f.file);
+ break;
}
out1:
fdput(f);
diff --git a/fs/locks.c b/fs/locks.c
index 8c5bc07c360f..ebcc9968ddae 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1930,6 +1930,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
if (error)
goto out;
+ if (cmd == F_GETLKP) {
+ cmd = F_GETLK;
+ file_lock.fl_flags |= FL_FILE_PVT;
+ file_lock.fl_owner = (fl_owner_t)filp;
+ }
+
error = vfs_test_lock(filp, &file_lock);
if (error)
goto out;
@@ -2049,10 +2055,26 @@ again:
error = flock_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
- if (cmd == F_SETLKW) {
+
+ /*
+ * If the cmd is requesting file-private locks, then set the
+ * FL_FILE_PVT flag and override the owner.
+ */
+ switch (cmd) {
+ case F_SETLKP:
+ cmd = F_SETLK;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ break;
+ case F_SETLKPW:
+ cmd = F_SETLKW;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ /* Fallthrough */
+ case F_SETLKW:
file_lock->fl_flags |= FL_SLEEP;
}
-
+
error = do_lock_file_wait(filp, cmd, file_lock);
/*
@@ -2098,6 +2120,12 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
if (error)
goto out;
+ if (cmd == F_GETLKP) {
+ cmd = F_GETLK64;
+ file_lock.fl_flags |= FL_FILE_PVT;
+ file_lock.fl_owner = (fl_owner_t)filp;
+ }
+
error = vfs_test_lock(filp, &file_lock);
if (error)
goto out;
@@ -2150,10 +2178,26 @@ again:
error = flock64_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
- if (cmd == F_SETLKW64) {
+
+ /*
+ * If the cmd is requesting file-private locks, then set the
+ * FL_FILE_PVT flag and override the owner.
+ */
+ switch (cmd) {
+ case F_SETLKP:
+ cmd = F_SETLK64;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ break;
+ case F_SETLKPW:
+ cmd = F_SETLKW64;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ /* Fallthrough */
+ case F_SETLKW64:
file_lock->fl_flags |= FL_SLEEP;
}
-
+
error = do_lock_file_wait(filp, cmd, file_lock);
/*
@@ -2221,6 +2265,8 @@ void locks_remove_file(struct file *filp)
if (!inode->i_flock)
return;
+ locks_remove_posix(filp, (fl_owner_t)filp);
+
if (filp->f_op->flock) {
struct file_lock fl = {
.fl_pid = current->tgid,
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 36025f77c6ed..a9b13f8b3595 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -132,6 +132,22 @@
#define F_GETOWNER_UIDS 17
#endif
+/*
+ * fd "private" POSIX locks.
+ *
+ * Usually POSIX locks held by a process are released on *any* close and are
+ * not inherited across a fork().
+ *
+ * These cmd values will set locks that conflict with normal POSIX locks, but
+ * are "owned" by the opened file, not the process. This means that they are
+ * inherited across fork() like BSD (flock) locks, and they are only released
+ * automatically when the last reference to the the open file against which
+ * they were acquired is put.
+ */
+#define F_GETLKP 36
+#define F_SETLKP 37
+#define F_SETLKPW 38
+
#define F_OWNER_TID 0
#define F_OWNER_PID 1
#define F_OWNER_PGRP 2
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4b34847208cc..3aa876374883 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3302,6 +3302,9 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd,
case F_GETLK:
case F_SETLK:
case F_SETLKW:
+ case F_GETLKP:
+ case F_SETLKP:
+ case F_SETLKPW:
#if BITS_PER_LONG == 32
case F_GETLK64:
case F_SETLK64:
--
1.8.5.3
next prev parent reply other threads:[~2014-03-19 20:46 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-19 20:45 [PATCH v7 00/17] locks: fixes for 3.15 and file-private lock support Jeff Layton
2014-03-19 20:45 ` [PATCH v7 01/17] locks: close potential race between setlease and open Jeff Layton
2014-03-19 20:45 ` [PATCH v7 02/17] locks: clean up comment typo Jeff Layton
2014-03-23 19:54 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 03/17] locks: remove "inline" qualifier from fl_link manipulation functions Jeff Layton
2014-03-23 19:55 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 04/17] locks: add __acquires and __releases annotations to locks_start and locks_stop Jeff Layton
2014-03-23 19:55 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 05/17] locks: eliminate BUG() call when there's an unexpected lock on file close Jeff Layton
2014-03-23 20:01 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 06/17] locks: fix posix lock range overflow handling Jeff Layton
2014-03-19 20:45 ` [PATCH v7 07/17] locks: consolidate checks for compatible filp->f_mode values in setlk handlers Jeff Layton
2014-03-23 20:08 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 08/17] locks: rename locks_remove_flock to locks_remove_file Jeff Layton
2014-03-23 22:58 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 09/17] MAINTAINERS: add Bruce and myself to list of maintainers for file locking code Jeff Layton
2014-03-19 20:45 ` [PATCH v7 10/17] locks: make /proc/locks show IS_FILE_PVT locks with a P suffix Jeff Layton
2014-03-25 0:20 ` J. Bruce Fields
2014-03-25 0:57 ` Jeffrey Layton
2014-03-25 4:18 ` J. Bruce Fields
2014-03-29 14:18 ` Jeff Layton
2014-03-29 19:05 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 11/17] locks: report l_pid as -1 for FL_FILE_PVT locks Jeff Layton
2014-03-25 0:30 ` J. Bruce Fields
2014-03-19 20:45 ` [PATCH v7 12/17] locks: pass the cmd value to fcntl_getlk/getlk64 Jeff Layton
2014-03-19 20:45 ` [PATCH v7 13/17] locks: skip deadlock detection on FL_FILE_PVT locks Jeff Layton
2014-03-28 21:43 ` J. Bruce Fields
2014-03-19 20:45 ` Jeff Layton [this message]
2014-03-19 20:45 ` [PATCH v7 15/17] locks: require that flock->l_pid be set to 0 for file-private locks Jeff Layton
2014-03-19 20:46 ` [PATCH v7 16/17] locks: fix locks_mandatory_locked to respect " Jeff Layton
2014-03-19 20:46 ` [PATCH v7 17/17] locks: make locks_mandatory_area check for " Jeff Layton
2014-03-28 2:15 ` [PATCH v7 00/17] locks: fixes for 3.15 and file-private lock support J. Bruce Fields
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1395261961-10855-15-git-send-email-jlayton@redhat.com \
--to=jlayton@redhat.com \
--cc=bfields@fieldses.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@ZenIV.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).