* [RFC][PATCH 01/20] prepare for write access checks: collapse if()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 02/20] r/o bind mount prepwork: move open_namei()'s vfs_create() Dave Hansen
` (19 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
We're shortly going to be adding a bunch more permission
checks in these functions. That requires adding either a
bunch of new if() conditions, or some gotos. This patch
collapses existing if()s and uses gotos instead to
prepare for the upcoming changes.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 93 +++++++++++++++++++++++++++-------------------------
lxc-dave/fs/open.c | 22 +++++++-----
2 files changed, 64 insertions(+), 51 deletions(-)
diff -puN fs/namei.c~C-prepwork-collapse-ifs fs/namei.c
--- lxc/fs/namei.c~C-prepwork-collapse-ifs 2006-06-16 15:57:59.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:57:59.000000000 -0700
@@ -1889,30 +1889,32 @@ asmlinkage long sys_mkdirat(int dfd, con
{
int error = 0;
char * tmp;
+ struct dentry *dentry;
+ struct nameidata nd;
tmp = getname(pathname);
error = PTR_ERR(tmp);
- if (!IS_ERR(tmp)) {
- struct dentry *dentry;
- struct nameidata nd;
+ if (IS_ERR(tmp))
+ goto out_err;
- error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
- if (error)
- goto out;
- dentry = lookup_create(&nd, 1);
- error = PTR_ERR(dentry);
- if (!IS_ERR(dentry)) {
- if (!IS_POSIXACL(nd.dentry->d_inode))
- mode &= ~current->fs->umask;
- error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
- dput(dentry);
- }
- mutex_unlock(&nd.dentry->d_inode->i_mutex);
- path_release(&nd);
-out:
- putname(tmp);
- }
+ error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
+ dentry = lookup_create(&nd, 1);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ goto out_unlock;
+ if (!IS_POSIXACL(nd.dentry->d_inode))
+ mode &= ~current->fs->umask;
+ error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+ dput(dentry);
+out_unlock:
+ mutex_unlock(&nd.dentry->d_inode->i_mutex);
+ path_release(&nd);
+out:
+ putname(tmp);
+out_err:
return error;
}
@@ -2011,10 +2013,11 @@ static long do_rmdir(int dfd, const char
mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
error = PTR_ERR(dentry);
- if (!IS_ERR(dentry)) {
- error = vfs_rmdir(nd.dentry->d_inode, dentry);
- dput(dentry);
- }
+ if (IS_ERR(dentry))
+ goto exit2;
+ error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ dput(dentry);
+exit2:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
exit1:
path_release(&nd);
@@ -2154,30 +2157,33 @@ asmlinkage long sys_symlinkat(const char
int error = 0;
char * from;
char * to;
+ struct dentry *dentry;
+ struct nameidata nd;
from = getname(oldname);
if(IS_ERR(from))
return PTR_ERR(from);
to = getname(newname);
error = PTR_ERR(to);
- if (!IS_ERR(to)) {
- struct dentry *dentry;
- struct nameidata nd;
+ if (IS_ERR(to))
+ goto out_putname;
- error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
- if (error)
- goto out;
- dentry = lookup_create(&nd, 0);
- error = PTR_ERR(dentry);
- if (!IS_ERR(dentry)) {
- error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
- dput(dentry);
- }
- mutex_unlock(&nd.dentry->d_inode->i_mutex);
- path_release(&nd);
+ error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
+ dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ goto out_unlock;
+
+ error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
+ dput(dentry);
+out_unlock:
+ mutex_unlock(&nd.dentry->d_inode->i_mutex);
+ path_release(&nd);
out:
- putname(to);
- }
+ putname(to);
+out_putname:
putname(from);
return error;
}
@@ -2261,10 +2267,11 @@ asmlinkage long sys_linkat(int olddfd, c
goto out_release;
new_dentry = lookup_create(&nd, 0);
error = PTR_ERR(new_dentry);
- if (!IS_ERR(new_dentry)) {
- error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
- dput(new_dentry);
- }
+ if (IS_ERR(new_dentry))
+ goto out_unlock;
+ error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ dput(new_dentry);
+out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
out_release:
path_release(&nd);
diff -puN fs/open.c~C-prepwork-collapse-ifs fs/open.c
--- lxc/fs/open.c~C-prepwork-collapse-ifs 2006-06-16 15:57:59.000000000 -0700
+++ lxc-dave/fs/open.c 2006-06-16 15:57:59.000000000 -0700
@@ -520,15 +520,21 @@ asmlinkage long sys_faccessat(int dfd, c
current->cap_effective = current->cap_permitted;
res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
- if (!res) {
- res = vfs_permission(&nd, mode);
- /* SuS v2 requires we report a read only fs too */
- if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
- && !special_file(nd.dentry->d_inode->i_mode))
- res = -EROFS;
- path_release(&nd);
- }
+ if (res)
+ goto out;
+
+ res = vfs_permission(&nd, mode);
+ /* SuS v2 requires we report a read only fs too */
+ if(res || !(mode & S_IWOTH) ||
+ special_file(nd.dentry->d_inode->i_mode))
+ goto out_path_release;
+ if(IS_RDONLY(nd.dentry->d_inode))
+ res = -EROFS;
+
+out_path_release:
+ path_release(&nd);
+out:
current->fsuid = old_fsuid;
current->fsgid = old_fsgid;
current->cap_effective = old_cap;
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2)
@ 2006-06-16 23:12 Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 01/20] prepare for write access checks: collapse if() Dave Hansen
` (20 more replies)
0 siblings, 21 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
The following series implements read-only bind mounts. This feature
allows a read-only view into a read-write filesystem. In the process
of doing that, it also provides infrastructure for keeping track of
the number of writers to any given mount. New in this version is that
if that number is non-zero, remounts from r/w to r/o are not allowed.
This set does not take the previously tried approach of pushing down
the vfsmount structure deeply into call paths, such that it might be
checked in functions like permission(), may_create() and may_open().
Instead, it does checks near the entry points in the kernel, bumping
a reference count in the vfsmount structure. I've also eliminated
the use of the MNT_RDONLY flag. It was redundant since we have the
reference count.
This set also makes no attempt to keep the return codes for these
r/o bind mounts the same as for a real r/o filesystem or device.
It would require significantly more code and be quite a bit more
invasive. Unless there is a very strong reason to do so, I believe
it isn't worth the trouble.
One note: the previous patches all worked this way:
mount --bind -o ro /source /dest
These patches have changed that behavior. It now requires two steps:
mount --bind /source /dest
mount -o remount,ro /dest
If this set goes OK, I think it is time for a trip through -mm.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 02/20] r/o bind mount prepwork: move open_namei()'s vfs_create()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 01/20] prepare for write access checks: collapse if() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 03/20] Add vfsmount writer count Dave Hansen
` (18 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
The code around vfs_create() in open_namei() is getting a
bit too complex. Right now, there is at least the reference
count on the dentry, and the i_mutex to worry about. Soon,
we'll also have mnt_writecount.
So, break the vfs_create() call out of open_namei(), and
into a helper function. This duplicates the call to
may_open(), but that isn't such a bad thing since the
arguments (acc_mode and flag) were being heavily massaged
anyway.
Later in the series, we'll add the mnt_writecount handling
around this new function call.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 30 ++++++++++++++++++++----------
1 files changed, 20 insertions(+), 10 deletions(-)
diff -puN fs/namei.c~C-prepwork-cleanup-open_namei fs/namei.c
--- lxc/fs/namei.c~C-prepwork-cleanup-open_namei 2006-06-16 15:58:00.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:00.000000000 -0700
@@ -1560,6 +1560,24 @@ int may_open(struct nameidata *nd, int a
return 0;
}
+static int open_namei_create(struct nameidata *nd, struct path *path,
+ int flag, int mode)
+{
+ int error;
+ struct dentry *dir = nd->dentry;
+
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current->fs->umask;
+ error = vfs_create(dir->d_inode, path->dentry, mode, nd);
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(nd->dentry);
+ nd->dentry = path->dentry;
+ if (error)
+ return error;
+ /* Don't check for write permission, don't truncate */
+ return may_open(nd, 0, flag & ~O_TRUNC);
+}
+
/*
* open_namei()
*
@@ -1641,18 +1659,10 @@ do_last:
/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current->fs->umask;
- error = vfs_create(dir->d_inode, path.dentry, mode, nd);
- mutex_unlock(&dir->d_inode->i_mutex);
- dput(nd->dentry);
- nd->dentry = path.dentry;
+ error = open_namei_create(nd, &path, flag, mode);
if (error)
goto exit;
- /* Don't check for write permission, don't truncate */
- acc_mode = 0;
- flag &= ~O_TRUNC;
- goto ok;
+ return 0;
}
/*
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 03/20] Add vfsmount writer count
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 01/20] prepare for write access checks: collapse if() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 02/20] r/o bind mount prepwork: move open_namei()'s vfs_create() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-18 18:33 ` Al Viro
2006-06-16 23:12 ` [RFC][PATCH 04/20] elevate mnt writers for callers of vfs_mkdir() Dave Hansen
` (17 subsequent siblings)
20 siblings, 1 reply; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
This allows a vfsmount to keep track of how many instances
of files open for write there are at a given time.
A mount can refuse to allow writers. This can be because
it is a read-only bind mount, or for other functionality
in the future. A mount can also now refuse to make a
transition from r/w to r/o whenever it has currently active
writers.
Note that this version of the patch does not use an explicit
mount flag. It is redundant along with the information in
the reference count.
The WARN_ON()s can go away before this is merged into mainline
provided it has had some time in -mm or equivalent.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namespace.c | 1
lxc-dave/include/linux/mount.h | 49 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+)
diff -puN fs/namespace.c~C-add-vfsmount-writer_count fs/namespace.c
--- lxc/fs/namespace.c~C-add-vfsmount-writer_count 2006-06-16 15:58:01.000000000 -0700
+++ lxc-dave/fs/namespace.c 2006-06-16 15:58:01.000000000 -0700
@@ -66,6 +66,7 @@ struct vfsmount *alloc_vfsmnt(const char
if (mnt) {
memset(mnt, 0, sizeof(struct vfsmount));
atomic_set(&mnt->mnt_count, 1);
+ atomic_set(&mnt->mnt_writers, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
diff -puN include/linux/mount.h~C-add-vfsmount-writer_count include/linux/mount.h
--- lxc/include/linux/mount.h~C-add-vfsmount-writer_count 2006-06-16 15:58:01.000000000 -0700
+++ lxc-dave/include/linux/mount.h 2006-06-16 15:58:01.000000000 -0700
@@ -12,6 +12,8 @@
#define _LINUX_MOUNT_H
#ifdef __KERNEL__
+#include <linux/err.h>
+#include <linux/fs.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/spinlock.h>
@@ -38,6 +40,10 @@ struct vfsmount {
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
atomic_t mnt_count;
+ atomic_t mnt_writers; /* 0 - mount is r/o
+ * >0 - mount is r/w, there are
+ * mnt_writers-1 writers
+ */
int mnt_flags;
int mnt_expiry_mark; /* true if marked for expiry */
char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
@@ -58,6 +64,49 @@ static inline struct vfsmount *mntget(st
return mnt;
}
+/*
+ * Must be called under write lock on mnt->mnt_sb->s_umount,
+ * this prevents concurrent decrements which could make the
+ * value -1, and test in mnt_want_write() wrongly succeed
+ */
+static inline int mnt_make_readonly(struct vfsmount *mnt)
+{
+ if (!atomic_dec_and_test(&mnt->mnt_writers)) {
+ atomic_inc(&mnt->mnt_writers);
+ return -EBUSY;
+ }
+ return 0;
+}
+
+/*
+ * This can race with itself, so it must be serialized.
+ * It must also be called with mnt->mnt_writers == 0
+ */
+static inline void __mnt_make_writable(struct vfsmount *mnt)
+{
+ WARN_ON(atomic_read(&mnt->mnt_writers));
+ atomic_inc(&mnt->mnt_writers);
+}
+
+static inline int __mnt_is_readonly(struct vfsmount *mnt)
+{
+ return (atomic_read(&mnt->mnt_writers) == 0);
+}
+
+static inline int mnt_want_write(struct vfsmount *mnt)
+{
+ int ret = 0;
+ if (!atomic_add_unless(&mnt->mnt_writers, 1, 0))
+ ret = -EROFS;
+ return ret;
+}
+
+static inline void mnt_drop_write(struct vfsmount *mnt)
+{
+ atomic_dec(&mnt->mnt_writers);
+ WARN_ON(atomic_read(&mnt->mnt_writers) < 1);
+}
+
extern void mntput_no_expire(struct vfsmount *mnt);
extern void mnt_pin(struct vfsmount *mnt);
extern void mnt_unpin(struct vfsmount *mnt);
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 04/20] elevate mnt writers for callers of vfs_mkdir()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (2 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 03/20] Add vfsmount writer count Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 05/20] elevate write count during entire ncp_ioctl() Dave Hansen
` (16 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 5 +++++
lxc-dave/fs/nfsd/nfs4recover.c | 4 ++++
2 files changed, 9 insertions(+)
diff -puN fs/namei.c~C-elevate-writers-vfs_mkdir fs/namei.c
--- lxc/fs/namei.c~C-elevate-writers-vfs_mkdir 2006-06-16 15:58:01.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:01.000000000 -0700
@@ -1917,7 +1917,12 @@ asmlinkage long sys_mkdirat(int dfd, con
if (!IS_POSIXACL(nd.dentry->d_inode))
mode &= ~current->fs->umask;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ goto out_dput;
error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+ mnt_drop_write(nd.mnt);
+out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
diff -puN fs/nfsd/nfs4recover.c~C-elevate-writers-vfs_mkdir fs/nfsd/nfs4recover.c
--- lxc/fs/nfsd/nfs4recover.c~C-elevate-writers-vfs_mkdir 2006-06-16 15:58:01.000000000 -0700
+++ lxc-dave/fs/nfsd/nfs4recover.c 2006-06-16 15:58:01.000000000 -0700
@@ -155,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client
dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
goto out_put;
}
+ status = mnt_want_write(rec_dir.mnt);
+ if (status)
+ goto out_put;
status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
+ mnt_drop_write(rec_dir.mnt);
out_put:
dput(dentry);
out_unlock:
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 05/20] elevate write count during entire ncp_ioctl()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (3 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 04/20] elevate mnt writers for callers of vfs_mkdir() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 07/20] elevate mount count for extended attributes Dave Hansen
` (15 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Some ioctls need write access, but others don't. Make a helper
function to decide when write access is needed, and take it.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/ncpfs/ioctl.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 54 insertions(+), 1 deletion(-)
diff -puN fs/ncpfs/ioctl.c~C-elevate-writers-file_permission-callers fs/ncpfs/ioctl.c
--- lxc/fs/ncpfs/ioctl.c~C-elevate-writers-file_permission-callers 2006-06-16 15:58:02.000000000 -0700
+++ lxc-dave/fs/ncpfs/ioctl.c 2006-06-16 15:58:02.000000000 -0700
@@ -16,6 +16,7 @@
#include <linux/ioctl.h>
#include <linux/time.h>
#include <linux/mm.h>
+#include <linux/mount.h>
#include <linux/highuid.h>
#include <linux/vmalloc.h>
@@ -183,7 +184,7 @@ ncp_get_charsets(struct ncp_server* serv
}
#endif /* CONFIG_NCPFS_NLS */
-int ncp_ioctl(struct inode *inode, struct file *filp,
+static int __ncp_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct ncp_server *server = NCP_SERVER(inode);
@@ -654,3 +655,55 @@ outrel:
/* #endif */
return -EINVAL;
}
+
+static int ncp_ioctl_need_write(unsigned int cmd)
+{
+ switch (cmd) {
+ case NCP_IOC_GET_FS_INFO:
+ case NCP_IOC_GET_FS_INFO_V2:
+ case NCP_IOC_NCPREQUEST:
+ case NCP_IOC_SETDENTRYTTL:
+ case NCP_IOC_SIGN_INIT:
+ case NCP_IOC_LOCKUNLOCK:
+ case NCP_IOC_SET_SIGN_WANTED:
+ return 1;
+ case NCP_IOC_GETOBJECTNAME:
+ case NCP_IOC_SETOBJECTNAME:
+ case NCP_IOC_GETPRIVATEDATA:
+ case NCP_IOC_SETPRIVATEDATA:
+ case NCP_IOC_SETCHARSETS:
+ case NCP_IOC_GETCHARSETS:
+ case NCP_IOC_CONN_LOGGED_IN:
+ case NCP_IOC_GETDENTRYTTL:
+ case NCP_IOC_GETMOUNTUID2:
+ case NCP_IOC_SIGN_WANTED:
+ case NCP_IOC_GETROOT:
+ case NCP_IOC_SETROOT:
+ return 0;
+ default:
+ /* unkown IOCTL command, assume write */
+ WARN_ON(1);
+ }
+ return 1;
+}
+
+int ncp_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ if (ncp_ioctl_need_write(cmd)) {
+ /*
+ * inside the ioctl(), any failures which
+ * are because of file_permission() are
+ * -EACCESS, so it seems consistent to keep
+ * that here.
+ */
+ if (mnt_want_write(filp->f_vfsmnt))
+ return -EACCES;
+ }
+ ret = __ncp_ioctl(inode, filp, cmd, arg);
+ if (ncp_ioctl_need_write(cmd))
+ mnt_drop_write(filp->f_vfsmnt);
+ return ret;
+}
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 07/20] elevate mount count for extended attributes
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (4 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 05/20] elevate write count during entire ncp_ioctl() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 06/20] sys_symlinkat() elevate write count around vfs_symlink() Dave Hansen
` (14 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
This basically audits the callers of xattr_permission(), which
calls permission() and can perform writes to the filesystem.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/nfsd/nfs4proc.c | 7 ++++++-
lxc-dave/fs/xattr.c | 14 ++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
diff -puN fs/nfsd/nfs4proc.c~C-xattr fs/nfsd/nfs4proc.c
--- lxc/fs/nfsd/nfs4proc.c~C-xattr 2006-06-16 15:58:03.000000000 -0700
+++ lxc-dave/fs/nfsd/nfs4proc.c 2006-06-16 15:58:03.000000000 -0700
@@ -604,13 +604,18 @@ nfsd4_setattr(struct svc_rqst *rqstp, st
return status;
}
}
+ status = mnt_want_write(current_fh->fh_export->ex_mnt);
+ if (status)
+ return status;
status = nfs_ok;
if (setattr->sa_acl != NULL)
status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl);
if (status)
- return status;
+ goto out;
status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr,
0, (time_t)0);
+out:
+ mnt_drop_write(current_fh->fh_export->ex_mnt);
return status;
}
diff -puN fs/xattr.c~C-xattr fs/xattr.c
--- lxc/fs/xattr.c~C-xattr 2006-06-16 15:58:03.000000000 -0700
+++ lxc-dave/fs/xattr.c 2006-06-16 15:58:03.000000000 -0700
@@ -12,6 +12,7 @@
#include <linux/smp_lock.h>
#include <linux/file.h>
#include <linux/xattr.h>
+#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/syscalls.h>
@@ -210,7 +211,11 @@ sys_setxattr(char __user *path, char __u
error = user_path_walk(path, &nd);
if (error)
return error;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ return error;
error = setxattr(nd.dentry, name, value, size, flags);
+ mnt_drop_write(nd.mnt);
path_release(&nd);
return error;
}
@@ -225,7 +230,11 @@ sys_lsetxattr(char __user *path, char __
error = user_path_walk_link(path, &nd);
if (error)
return error;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ return error;
error = setxattr(nd.dentry, name, value, size, flags);
+ mnt_drop_write(nd.mnt);
path_release(&nd);
return error;
}
@@ -241,9 +250,14 @@ sys_fsetxattr(int fd, char __user *name,
f = fget(fd);
if (!f)
return error;
+ error = mnt_want_write(f->f_vfsmnt);
+ if (error)
+ goto out_fput;
dentry = f->f_dentry;
audit_inode(NULL, dentry->d_inode, 0);
error = setxattr(dentry, name, value, size, flags);
+ mnt_drop_write(f->f_vfsmnt);
+out_fput:
fput(f);
return error;
}
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 06/20] sys_symlinkat() elevate write count around vfs_symlink()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (5 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 07/20] elevate mount count for extended attributes Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 08/20] sys_linkat(): elevate write count around vfs_link() Dave Hansen
` (13 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 5 +++++
1 files changed, 5 insertions(+)
diff -puN fs/namei.c~C-elevate-writers-vfs_symlink-part3 fs/namei.c
--- lxc/fs/namei.c~C-elevate-writers-vfs_symlink-part3 2006-06-16 15:58:02.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:02.000000000 -0700
@@ -2191,7 +2191,12 @@ asmlinkage long sys_symlinkat(const char
if (IS_ERR(dentry))
goto out_unlock;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ goto out_dput;
error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
+ mnt_drop_write(nd.mnt);
+out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 08/20] sys_linkat(): elevate write count around vfs_link()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (6 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 06/20] sys_symlinkat() elevate write count around vfs_symlink() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 09/20] mount_is_safe(): add comment Dave Hansen
` (12 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 5 +++++
1 files changed, 5 insertions(+)
diff -puN fs/namei.c~C-elevate-writers-vfs_link-part1 fs/namei.c
--- lxc/fs/namei.c~C-elevate-writers-vfs_link-part1 2006-06-16 15:58:04.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:04.000000000 -0700
@@ -2289,7 +2289,12 @@ asmlinkage long sys_linkat(int olddfd, c
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto out_unlock;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ goto out_dput;
error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ mnt_drop_write(nd.mnt);
+out_dput:
dput(new_dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 09/20] mount_is_safe(): add comment
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (7 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 08/20] sys_linkat(): elevate write count around vfs_link() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 10/20] unix_find_other() elevate write count for touch_atime() Dave Hansen
` (11 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
This area of code is currently #ifdef'd out, so add a comment
for the time when it is actually used.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namespace.c | 4 ++++
1 files changed, 4 insertions(+)
diff -puN fs/namespace.c~C-elevate-writers-opens-part5 fs/namespace.c
--- lxc/fs/namespace.c~C-elevate-writers-opens-part5 2006-06-16 15:58:04.000000000 -0700
+++ lxc-dave/fs/namespace.c 2006-06-16 15:58:04.000000000 -0700
@@ -679,6 +679,10 @@ static int mount_is_safe(struct nameidat
if (current->uid != nd->dentry->d_inode->i_uid)
return -EPERM;
}
+ /*
+ * We will eventually check for the mnt->writer_count here,
+ * but since the code is not used now, skip it - Dave Hansen
+ */
if (vfs_permission(nd, MAY_WRITE))
return -EPERM;
return 0;
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 10/20] unix_find_other() elevate write count for touch_atime()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (8 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 09/20] mount_is_safe(): add comment Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 11/20] elevate write count over calls to vfs_rename() Dave Hansen
` (10 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/net/unix/af_unix.c | 16 ++++++++++++----
1 files changed, 12 insertions(+), 4 deletions(-)
diff -puN net/unix/af_unix.c~C-elevate-writers-opens-part4 net/unix/af_unix.c
--- lxc/net/unix/af_unix.c~C-elevate-writers-opens-part4 2006-06-16 15:58:05.000000000 -0700
+++ lxc-dave/net/unix/af_unix.c 2006-06-16 15:58:05.000000000 -0700
@@ -676,21 +676,27 @@ static struct sock *unix_find_other(stru
err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
if (err)
goto fail;
+
+ err = mnt_want_write(nd.mnt);
+ if (err)
+ goto put_path_fail;
+
err = vfs_permission(&nd, MAY_WRITE);
if (err)
- goto put_fail;
+ goto mnt_drop_write_fail;
err = -ECONNREFUSED;
if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
- goto put_fail;
+ goto mnt_drop_write_fail;
u=unix_find_socket_byinode(nd.dentry->d_inode);
if (!u)
- goto put_fail;
+ goto mnt_drop_write_fail;
if (u->sk_type == type)
touch_atime(nd.mnt, nd.dentry);
path_release(&nd);
+ mnt_drop_write(nd.mnt);
err=-EPROTOTYPE;
if (u->sk_type != type) {
@@ -710,7 +716,9 @@ static struct sock *unix_find_other(stru
}
return u;
-put_fail:
+mnt_drop_write_fail:
+ mnt_drop_write(nd.mnt);
+put_path_fail:
path_release(&nd);
fail:
*error=err;
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 11/20] elevate write count over calls to vfs_rename()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (9 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 10/20] unix_find_other() elevate write count for touch_atime() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-18 18:23 ` Al Viro
2006-06-16 23:12 ` [RFC][PATCH 12/20] tricky: elevate write count files are open()ed Dave Hansen
` (9 subsequent siblings)
20 siblings, 1 reply; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 4 ++++
lxc-dave/fs/nfsd/vfs.c | 12 +++++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff -puN fs/namei.c~C-elevate-writers-vfs_rename-part1 fs/namei.c
--- lxc/fs/namei.c~C-elevate-writers-vfs_rename-part1 2006-06-16 15:58:06.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:06.000000000 -0700
@@ -2520,8 +2520,12 @@ static int do_rename(int olddfd, const c
if (new_dentry == trap)
goto exit5;
+ error = mnt_want_write(oldnd.mnt);
+ if (error)
+ goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
+ mnt_drop_write(oldnd.mnt);
exit5:
dput(new_dentry);
exit4:
diff -puN fs/nfsd/vfs.c~C-elevate-writers-vfs_rename-part1 fs/nfsd/vfs.c
--- lxc/fs/nfsd/vfs.c~C-elevate-writers-vfs_rename-part1 2006-06-16 15:58:06.000000000 -0700
+++ lxc-dave/fs/nfsd/vfs.c 2006-06-16 15:58:06.000000000 -0700
@@ -1597,13 +1597,23 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = -EPERM;
} else
#endif
+ err = mnt_want_write(ffhp->fh_export->ex_mnt);
+ if (err)
+ goto out_dput_new;
+
+ err = mnt_want_write(tfhp->fh_export->ex_mnt);
+ if (err)
+ goto out_mnt_drop_write_old;
+
err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!err && EX_ISSYNC(tfhp->fh_export)) {
err = nfsd_sync_dir(tdentry);
if (!err)
err = nfsd_sync_dir(fdentry);
}
-
+ mnt_drop_write(tfhp->fh_export->ex_mnt);
+ out_mnt_drop_write_old:
+ mnt_drop_write(ffhp->fh_export->ex_mnt);
out_dput_new:
dput(ndentry);
out_dput_old:
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 12/20] tricky: elevate write count files are open()ed
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (10 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 11/20] elevate write count over calls to vfs_rename() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 13/20] elevate writer count for do_sys_truncate() Dave Hansen
` (8 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
This is the first really tricky patch in the series. It
elevates the writer count on a mount each time a
non-special file is opened for write.
This is not completely apparent in the patch because the
two if() conditions in may_open() above the
mnt_want_write() call are, combined, equivalent to
special_file().
There is also an elevated count around the vfs_create()
call in open_namei(). The count needs to be kept elevated
all the way into the may_open() call. Otherwise, when the
write is dropped, a ro->rw transisition could occur. This
would lead to having rw access on the newly created file,
while the vfsmount is ro. That is bad.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/file_table.c | 5 ++++-
lxc-dave/fs/namei.c | 22 ++++++++++++++++++----
lxc-dave/ipc/mqueue.c | 3 +++
3 files changed, 25 insertions(+), 5 deletions(-)
diff -puN fs/file_table.c~C-elevate-writers-opens-part1 fs/file_table.c
--- lxc/fs/file_table.c~C-elevate-writers-opens-part1 2006-06-16 15:58:06.000000000 -0700
+++ lxc-dave/fs/file_table.c 2006-06-16 15:58:06.000000000 -0700
@@ -180,8 +180,11 @@ void fastcall __fput(struct file *file)
if (unlikely(inode->i_cdev != NULL))
cdev_put(inode->i_cdev);
fops_put(file->f_op);
- if (file->f_mode & FMODE_WRITE)
+ if (file->f_mode & FMODE_WRITE) {
put_write_access(inode);
+ if(!special_file(inode->i_mode))
+ mnt_drop_write(mnt);
+ }
file_kill(file);
file->f_dentry = NULL;
file->f_vfsmnt = NULL;
diff -puN fs/namei.c~C-elevate-writers-opens-part1 fs/namei.c
--- lxc/fs/namei.c~C-elevate-writers-opens-part1 2006-06-16 15:58:06.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:06.000000000 -0700
@@ -1512,8 +1512,17 @@ int may_open(struct nameidata *nd, int a
return -EACCES;
flag &= ~O_TRUNC;
- } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
- return -EROFS;
+ } else if (flag & FMODE_WRITE) {
+ /*
+ * effectively: !special_file()
+ * balanced by __fput()
+ */
+ error = mnt_want_write(nd->mnt);
+ if (error)
+ return error;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ }
/*
* An append-only file must be opened in append mode for writing.
*/
@@ -1652,14 +1661,17 @@ do_last:
}
if (IS_ERR(nd->intent.open.file)) {
- mutex_unlock(&dir->d_inode->i_mutex);
error = PTR_ERR(nd->intent.open.file);
- goto exit_dput;
+ goto exit_mutex_unlock;
}
/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
+ error = mnt_want_write(nd->mnt);
+ if (error)
+ goto exit_mutex_unlock;
error = open_namei_create(nd, &path, flag, mode);
+ mnt_drop_write(nd->mnt);
if (error)
goto exit;
return 0;
@@ -1695,6 +1707,8 @@ ok:
goto exit;
return 0;
+exit_mutex_unlock:
+ mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
dput_path(&path, nd);
exit:
diff -puN ipc/mqueue.c~C-elevate-writers-opens-part1 ipc/mqueue.c
--- lxc/ipc/mqueue.c~C-elevate-writers-opens-part1 2006-06-16 15:58:06.000000000 -0700
+++ lxc-dave/ipc/mqueue.c 2006-06-16 15:58:06.000000000 -0700
@@ -679,6 +679,9 @@ asmlinkage long sys_mq_open(const char _
goto out;
filp = do_open(dentry, oflag);
} else {
+ error = mnt_want_write(mqueue_mnt);
+ if (error)
+ goto out;
filp = do_create(mqueue_mnt->mnt_root, dentry,
oflag, mode, u_attr);
}
diff -L fs/namei.c. -puN /dev/null /dev/null
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 13/20] elevate writer count for do_sys_truncate()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (11 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 12/20] tricky: elevate write count files are open()ed Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 14/20] elevate write count for do_utimes() Dave Hansen
` (7 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/open.c | 16 +++++++++++-----
1 files changed, 11 insertions(+), 5 deletions(-)
diff -puN fs/open.c~C-elevate-writers-opens-part2-do_sys_truncate fs/open.c
--- lxc/fs/open.c~C-elevate-writers-opens-part2-do_sys_truncate 2006-06-16 15:58:07.000000000 -0700
+++ lxc-dave/fs/open.c 2006-06-16 15:58:07.000000000 -0700
@@ -244,28 +244,32 @@ static long do_sys_truncate(const char _
if (!S_ISREG(inode->i_mode))
goto dput_and_out;
- error = vfs_permission(&nd, MAY_WRITE);
+ error = mnt_want_write(nd.mnt);
if (error)
goto dput_and_out;
+ error = vfs_permission(&nd, MAY_WRITE);
+ if (error)
+ goto mnt_drop_write_and_out;
+
error = -EROFS;
if (IS_RDONLY(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
/*
* Make sure that there are no leases.
*/
error = break_lease(inode, FMODE_WRITE);
if (error)
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
error = get_write_access(inode);
if (error)
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
error = locks_verify_truncate(inode, NULL, length);
if (!error) {
@@ -274,6 +278,8 @@ static long do_sys_truncate(const char _
}
put_write_access(inode);
+mnt_drop_write_and_out:
+ mnt_drop_write(nd.mnt);
dput_and_out:
path_release(&nd);
out:
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 15/20] elevate write count for do_sys_utime() and touch_atime()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (13 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 14/20] elevate write count for do_utimes() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 16/20] sys_mknodat(): elevate write count for vfs_mknod/create() Dave Hansen
` (5 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/inode.c | 9 +++++++--
lxc-dave/fs/open.c | 16 +++++++++++-----
2 files changed, 18 insertions(+), 7 deletions(-)
diff -puN fs/inode.c~C-elevate-writers-opens-part2-do_sys_utime fs/inode.c
--- lxc/fs/inode.c~C-elevate-writers-opens-part2-do_sys_utime 2006-06-16 15:58:08.000000000 -0700
+++ lxc-dave/fs/inode.c 2006-06-16 15:58:08.000000000 -0700
@@ -1188,10 +1188,13 @@ void touch_atime(struct vfsmount *mnt, s
if (IS_RDONLY(inode))
return;
+ if (!mnt_want_write(mnt))
+ return;
+
if ((inode->i_flags & S_NOATIME) ||
(inode->i_sb->s_flags & MS_NOATIME) ||
((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
- return;
+ goto out;
/*
* We may have a NULL vfsmount when coming from NFSD
@@ -1199,13 +1202,15 @@ void touch_atime(struct vfsmount *mnt, s
if (mnt &&
((mnt->mnt_flags & MNT_NOATIME) ||
((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))))
- return;
+ goto out;
now = current_fs_time(inode->i_sb);
if (!timespec_equal(&inode->i_atime, &now)) {
inode->i_atime = now;
mark_inode_dirty_sync(inode);
}
+out:
+ mnt_drop_write(mnt);
}
EXPORT_SYMBOL(touch_atime);
diff -puN fs/open.c~C-elevate-writers-opens-part2-do_sys_utime fs/open.c
--- lxc/fs/open.c~C-elevate-writers-opens-part2-do_sys_utime 2006-06-16 15:58:08.000000000 -0700
+++ lxc-dave/fs/open.c 2006-06-16 15:58:08.000000000 -0700
@@ -384,16 +384,20 @@ asmlinkage long sys_utime(char __user *
goto out;
inode = nd.dentry->d_inode;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ goto dput_and_out;
+
error = -EROFS;
if (IS_RDONLY(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
/* Don't worry, the checks are done in inode_change_ok() */
newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
if (times) {
error = -EPERM;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
error = get_user(newattrs.ia_atime.tv_sec, ×->actime);
newattrs.ia_atime.tv_nsec = 0;
@@ -401,21 +405,23 @@ asmlinkage long sys_utime(char __user *
error = get_user(newattrs.ia_mtime.tv_sec, ×->modtime);
newattrs.ia_mtime.tv_nsec = 0;
if (error)
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
} else {
error = -EACCES;
if (IS_IMMUTABLE(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
if (current->fsuid != inode->i_uid &&
(error = vfs_permission(&nd, MAY_WRITE)) != 0)
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
}
mutex_lock(&inode->i_mutex);
error = notify_change(nd.dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
+mnt_drop_write_and_out:
+ mnt_drop_write(nd.mnt);
dput_and_out:
path_release(&nd);
out:
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 14/20] elevate write count for do_utimes()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (12 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 13/20] elevate writer count for do_sys_truncate() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 15/20] elevate write count for do_sys_utime() and touch_atime() Dave Hansen
` (6 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/open.c | 13 +++++++++----
1 files changed, 9 insertions(+), 4 deletions(-)
diff -puN fs/open.c~C-elevate-writers-opens-part2-do_utimes fs/open.c
--- lxc/fs/open.c~C-elevate-writers-opens-part2-do_utimes 2006-06-16 15:58:08.000000000 -0700
+++ lxc-dave/fs/open.c 2006-06-16 15:58:08.000000000 -0700
@@ -441,16 +441,19 @@ long do_utimes(int dfd, char __user *fil
goto out;
inode = nd.dentry->d_inode;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ goto dput_and_out;
error = -EROFS;
if (IS_RDONLY(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
/* Don't worry, the checks are done in inode_change_ok() */
newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
if (times) {
error = -EPERM;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
newattrs.ia_atime.tv_sec = times[0].tv_sec;
newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000;
@@ -460,15 +463,17 @@ long do_utimes(int dfd, char __user *fil
} else {
error = -EACCES;
if (IS_IMMUTABLE(inode))
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
if (current->fsuid != inode->i_uid &&
(error = vfs_permission(&nd, MAY_WRITE)) != 0)
- goto dput_and_out;
+ goto mnt_drop_write_and_out;
}
mutex_lock(&inode->i_mutex);
error = notify_change(nd.dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
+mnt_drop_write_and_out:
+ mnt_drop_write(nd.mnt);
dput_and_out:
path_release(&nd);
out:
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 16/20] sys_mknodat(): elevate write count for vfs_mknod/create()
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (14 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 15/20] elevate write count for do_sys_utime() and touch_atime() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 17/20] elevate mnt writers for vfs_unlink() callers Dave Hansen
` (4 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
This takes care of all of the direct callers of vfs_mknod().
Since a few of these cases also handle normal file creation
as well, this also covers some calls to vfs_create().
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 12 ++++++++++++
lxc-dave/fs/nfsd/vfs.c | 4 ++++
lxc-dave/net/unix/af_unix.c | 4 ++++
3 files changed, 20 insertions(+)
diff -puN fs/namei.c~C-elevate-writers-vfs_mknod-try2 fs/namei.c
--- lxc/fs/namei.c~C-elevate-writers-vfs_mknod-try2 2006-06-16 15:58:09.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:09.000000000 -0700
@@ -1857,14 +1857,26 @@ asmlinkage long sys_mknodat(int dfd, con
if (!IS_ERR(dentry)) {
switch (mode & S_IFMT) {
case 0: case S_IFREG:
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ break;
error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
+ mnt_drop_write(nd.mnt);
break;
case S_IFCHR: case S_IFBLK:
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ break;
error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
new_decode_dev(dev));
+ mnt_drop_write(nd.mnt);
break;
case S_IFIFO: case S_IFSOCK:
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ break;
error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
+ mnt_drop_write(nd.mnt);
break;
case S_IFDIR:
error = -EPERM;
diff -puN fs/nfsd/vfs.c~C-elevate-writers-vfs_mknod-try2 fs/nfsd/vfs.c
--- lxc/fs/nfsd/vfs.c~C-elevate-writers-vfs_mknod-try2 2006-06-16 15:58:09.000000000 -0700
+++ lxc-dave/fs/nfsd/vfs.c 2006-06-16 15:58:09.000000000 -0700
@@ -1153,6 +1153,9 @@ nfsd_create(struct svc_rqst *rqstp, stru
/*
* Get the dir op function pointer.
*/
+ err = mnt_want_write(fhp->fh_export->ex_mnt);
+ if (err)
+ goto out_nfserr;
err = nfserr_perm;
switch (type) {
case S_IFREG:
@@ -1171,6 +1174,7 @@ nfsd_create(struct svc_rqst *rqstp, stru
printk("nfsd: bad file type %o in nfsd_create\n", type);
err = -EINVAL;
}
+ mnt_drop_write(fhp->fh_export->ex_mnt);
if (err < 0)
goto out_nfserr;
diff -puN net/unix/af_unix.c~C-elevate-writers-vfs_mknod-try2 net/unix/af_unix.c
--- lxc/net/unix/af_unix.c~C-elevate-writers-vfs_mknod-try2 2006-06-16 15:58:09.000000000 -0700
+++ lxc-dave/net/unix/af_unix.c 2006-06-16 15:58:09.000000000 -0700
@@ -789,7 +789,11 @@ static int unix_bind(struct socket *sock
*/
mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current->fs->umask);
+ err = mnt_want_write(nd.mnt);
+ if (err)
+ goto out_mknod_dput;
err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
+ mnt_drop_write(nd.mnt);
if (err)
goto out_mknod_dput;
mutex_unlock(&nd.dentry->d_inode->i_mutex);
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 17/20] elevate mnt writers for vfs_unlink() callers
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (15 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 16/20] sys_mknodat(): elevate write count for vfs_mknod/create() Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 19/20] elevate writer count for custom 'struct file' Dave Hansen
` (3 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 4 ++++
lxc-dave/ipc/mqueue.c | 5 ++++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff -puN fs/namei.c~C-elevate-writers-vfs_unlink fs/namei.c
--- lxc/fs/namei.c~C-elevate-writers-vfs_unlink 2006-06-16 15:58:10.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:10.000000000 -0700
@@ -2136,7 +2136,11 @@ static long do_unlinkat(int dfd, const c
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ goto exit2;
error = vfs_unlink(nd.dentry->d_inode, dentry);
+ mnt_drop_write(nd.mnt);
exit2:
dput(dentry);
}
diff -puN ipc/mqueue.c~C-elevate-writers-vfs_unlink ipc/mqueue.c
--- lxc/ipc/mqueue.c~C-elevate-writers-vfs_unlink 2006-06-16 15:58:10.000000000 -0700
+++ lxc-dave/ipc/mqueue.c 2006-06-16 15:58:10.000000000 -0700
@@ -741,8 +741,11 @@ asmlinkage long sys_mq_unlink(const char
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
-
+ err = mnt_want_write(mqueue_mnt);
+ if (err)
+ goto out_err;
err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+ mnt_drop_write(mqueue_mnt);
out_err:
dput(dentry);
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 19/20] elevate writer count for custom 'struct file'
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (16 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 17/20] elevate mnt writers for vfs_unlink() callers Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 18/20] do_rmdir(): elevate write count Dave Hansen
` (2 subsequent siblings)
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Some filesystems forego the vfs and may_open() and create their
own 'struct file's. Any of these users which set the write flag
on the file will cause an extra mnt_drop_write() on __fput(),
thus dropping the reference count too low.
These users tend to have artifical in-kernel vfsmounts which
aren't really exposed to userspace and can't be remounted, but
this patch is included for completeness and so that the warnings
don't trip over these cases.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/hugetlbfs/inode.c | 2 ++
lxc-dave/mm/shmem.c | 2 ++
lxc-dave/mm/tiny-shmem.c | 2 ++
lxc-dave/net/socket.c | 3 +++
4 files changed, 9 insertions(+)
diff -puN fs/hugetlbfs/inode.c~fix-__fput-rev2 fs/hugetlbfs/inode.c
--- lxc/fs/hugetlbfs/inode.c~fix-__fput-rev2 2006-06-16 15:58:11.000000000 -0700
+++ lxc-dave/fs/hugetlbfs/inode.c 2006-06-16 15:58:11.000000000 -0700
@@ -790,6 +790,8 @@ struct file *hugetlb_zero_setup(size_t s
file->f_mapping = inode->i_mapping;
file->f_op = &hugetlbfs_file_operations;
file->f_mode = FMODE_WRITE | FMODE_READ;
+ error = mnt_want_write(hugetlbfs_vfsmount);
+ WARN_ON(error);
return file;
out_inode:
diff -puN mm/shmem.c~fix-__fput-rev2 mm/shmem.c
--- lxc/mm/shmem.c~fix-__fput-rev2 2006-06-16 15:58:11.000000000 -0700
+++ lxc-dave/mm/shmem.c 2006-06-16 15:58:11.000000000 -0700
@@ -2324,6 +2324,8 @@ struct file *shmem_file_setup(char *name
file->f_mapping = inode->i_mapping;
file->f_op = &shmem_file_operations;
file->f_mode = FMODE_WRITE | FMODE_READ;
+ error = mnt_want_write(shm_mnt);
+ WARN_ON(error);
return file;
close_file:
diff -puN mm/tiny-shmem.c~fix-__fput-rev2 mm/tiny-shmem.c
--- lxc/mm/tiny-shmem.c~fix-__fput-rev2 2006-06-16 15:58:11.000000000 -0700
+++ lxc-dave/mm/tiny-shmem.c 2006-06-16 15:58:11.000000000 -0700
@@ -88,6 +88,8 @@ struct file *shmem_file_setup(char *name
file->f_mapping = inode->i_mapping;
file->f_op = &ramfs_file_operations;
file->f_mode = FMODE_WRITE | FMODE_READ;
+ error = mnt_want_write(shm_mnt);
+ WARN_ON(error);
/* notify everyone as to the change of file size */
error = do_truncate(dentry, size, 0, file);
diff -puN net/socket.c~fix-__fput-rev2 net/socket.c
--- lxc/net/socket.c~fix-__fput-rev2 2006-06-16 15:58:11.000000000 -0700
+++ lxc-dave/net/socket.c 2006-06-16 15:58:11.000000000 -0700
@@ -395,6 +395,7 @@ static int sock_attach_fd(struct socket
{
struct qstr this;
char name[32];
+ int error;
this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
this.name = name;
@@ -415,6 +416,8 @@ static int sock_attach_fd(struct socket
file->f_flags = O_RDWR;
file->f_pos = 0;
file->private_data = sock;
+ error = mnt_want_write(sock_mnt);
+ WARN_ON(error);
return 0;
}
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 18/20] do_rmdir(): elevate write count
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (17 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 19/20] elevate writer count for custom 'struct file' Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 20/20] honor r/w changes at do_remount() time Dave Hansen
2006-06-16 23:29 ` [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Grzegorz Kulewski
20 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Elevate the write count during the vfs_rmdir() call.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namei.c | 5 +++++
1 files changed, 5 insertions(+)
diff -puN fs/namei.c~C-rmdir1 fs/namei.c
--- lxc/fs/namei.c~C-rmdir1 2006-06-16 15:58:10.000000000 -0700
+++ lxc-dave/fs/namei.c 2006-06-16 15:58:10.000000000 -0700
@@ -2056,7 +2056,12 @@ static long do_rmdir(int dfd, const char
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit2;
+ error = mnt_want_write(nd.mnt);
+ if (error)
+ goto exit3;
error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ mnt_drop_write(nd.mnt);
+exit3:
dput(dentry);
exit2:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* [RFC][PATCH 20/20] honor r/w changes at do_remount() time
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (18 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 18/20] do_rmdir(): elevate write count Dave Hansen
@ 2006-06-16 23:12 ` Dave Hansen
2006-06-18 18:36 ` Al Viro
2006-06-16 23:29 ` [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Grzegorz Kulewski
20 siblings, 1 reply; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:12 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, herbert, viro, Dave Hansen
Originally from: Herbert Poetzl <herbert@13thfloor.at>
This is the core of the read-only bind mount patch set.
Note that this does _not_ add a "ro" option directly to
the bind mount operation. If you require such a mount,
you must first do the bind, then follow it up with a
'mount -o remount,ro' operation.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/namespace.c | 27 +++++++++++++++++++++++++--
lxc-dave/fs/open.c | 2 +-
lxc-dave/include/linux/mount.h | 1 -
3 files changed, 26 insertions(+), 4 deletions(-)
diff -puN fs/namespace.c~C-D8-actually-add-flags fs/namespace.c
--- lxc/fs/namespace.c~C-D8-actually-add-flags 2006-06-16 15:58:12.000000000 -0700
+++ lxc-dave/fs/namespace.c 2006-06-16 15:58:12.000000000 -0700
@@ -378,7 +378,10 @@ static int show_vfsmnt(struct seq_file *
seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
seq_putc(m, ' ');
mangle(m, mnt->mnt_sb->s_type->name);
- seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+ if ((mnt->mnt_sb->s_flags & MS_RDONLY) || __mnt_is_readonly(mnt))
+ seq_puts(m, " ro");
+ else
+ seq_puts(m, " rw");
for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
if (mnt->mnt_sb->s_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
@@ -947,6 +950,23 @@ out:
return err;
}
+static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+{
+ int error = 0;
+ int readonly_request = 0;
+
+ if (ms_flags & MS_RDONLY)
+ readonly_request = 1;
+ if (readonly_request == __mnt_is_readonly(mnt))
+ return 0;
+
+ if (readonly_request)
+ error = mnt_make_readonly(mnt);
+ else
+ __mnt_make_writable(mnt);
+ return error;
+}
+
/*
* change filesystem flags. dir should be a physical root of filesystem.
* If you've mounted a non-root directory somewhere and want to do remount
@@ -968,7 +988,10 @@ static int do_remount(struct nameidata *
return -EINVAL;
down_write(&sb->s_umount);
- err = do_remount_sb(sb, flags, data, 0);
+ if (flags & MS_BIND)
+ err = change_mount_flags(nd->mnt, flags);
+ else
+ err = do_remount_sb(sb, flags, data, 0);
if (!err)
nd->mnt->mnt_flags = mnt_flags;
up_write(&sb->s_umount);
diff -puN fs/open.c~C-D8-actually-add-flags fs/open.c
--- lxc/fs/open.c~C-D8-actually-add-flags 2006-06-16 15:58:12.000000000 -0700
+++ lxc-dave/fs/open.c 2006-06-16 15:58:12.000000000 -0700
@@ -546,7 +546,7 @@ asmlinkage long sys_faccessat(int dfd, c
special_file(nd.dentry->d_inode->i_mode))
goto out_path_release;
- if(IS_RDONLY(nd.dentry->d_inode))
+ if(__mnt_is_readonly(nd.mnt) || IS_RDONLY(nd.dentry->d_inode))
res = -EROFS;
out_path_release:
diff -puN include/linux/mount.h~C-D8-actually-add-flags include/linux/mount.h
--- lxc/include/linux/mount.h~C-D8-actually-add-flags 2006-06-16 15:58:12.000000000 -0700
+++ lxc-dave/include/linux/mount.h 2006-06-16 15:58:12.000000000 -0700
@@ -87,7 +87,6 @@ static inline void __mnt_make_writable(s
WARN_ON(atomic_read(&mnt->mnt_writers));
atomic_inc(&mnt->mnt_writers);
}
-
static inline int __mnt_is_readonly(struct vfsmount *mnt)
{
return (atomic_read(&mnt->mnt_writers) == 0);
_
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2)
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
` (19 preceding siblings ...)
2006-06-16 23:12 ` [RFC][PATCH 20/20] honor r/w changes at do_remount() time Dave Hansen
@ 2006-06-16 23:29 ` Grzegorz Kulewski
2006-06-16 23:41 ` Dave Hansen
20 siblings, 1 reply; 35+ messages in thread
From: Grzegorz Kulewski @ 2006-06-16 23:29 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-kernel, linux-fsdevel, herbert, viro
On Fri, 16 Jun 2006, Dave Hansen wrote:
> The following series implements read-only bind mounts. This feature
> allows a read-only view into a read-write filesystem.
Thanks, I like this idea very much. I think I have found at least one good
use for it even before it is merged. :-)
But...
> One note: the previous patches all worked this way:
>
> mount --bind -o ro /source /dest
>
> These patches have changed that behavior. It now requires two steps:
>
> mount --bind /source /dest
> mount -o remount,ro /dest
Isn't this some kind of security risk (at least in my planned use)? I mean
- for a small fraction of second somebody seeing /dest can write
/source... No?
Thanks,
Grzegorz Kulewski
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2)
2006-06-16 23:29 ` [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Grzegorz Kulewski
@ 2006-06-16 23:41 ` Dave Hansen
2006-06-17 0:10 ` Grzegorz Kulewski
0 siblings, 1 reply; 35+ messages in thread
From: Dave Hansen @ 2006-06-16 23:41 UTC (permalink / raw)
To: Grzegorz Kulewski; +Cc: linux-kernel, linux-fsdevel, herbert, viro
On Sat, 2006-06-17 at 01:29 +0200, Grzegorz Kulewski wrote:
> Isn't this some kind of security risk (at least in my planned use)? I mean
> - for a small fraction of second somebody seeing /dest can write
> /source... No?
I assume you're talking about this kind of situation:
mount --bind /local/writable/dir /chroot/untrusted/area/
mount --o remount,ro /chroot/untrusted/area/
Where there is a window where someone in the chroot can write into the
local directory. Yes. There would be a race there.
However, you can also do the following, which will have no window. It
has a few more steps, but it is a much simpler thing to deal with in the
kernel. Believe me, it starts to get ugly when you try to handle
permission and flag changes at mount time. Keeping --bind'ing as *just*
a simple "copy the source mount" operation makes the implementation very
much simpler.
This has no r/w window in the chroot area:
mount --bind /local/writable/dir /tmp/area/
mount --o remount,ro /tmp/area/
mount --bind /tmp/area/ /chroot/untrusted/area/
umount /tmp/area/
-- Dave
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2)
2006-06-16 23:41 ` Dave Hansen
@ 2006-06-17 0:10 ` Grzegorz Kulewski
2006-06-17 3:35 ` Herbert Poetzl
0 siblings, 1 reply; 35+ messages in thread
From: Grzegorz Kulewski @ 2006-06-17 0:10 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-kernel, linux-fsdevel, herbert, viro
On Fri, 16 Jun 2006, Dave Hansen wrote:
> On Sat, 2006-06-17 at 01:29 +0200, Grzegorz Kulewski wrote:
>> Isn't this some kind of security risk (at least in my planned use)? I mean
>> - for a small fraction of second somebody seeing /dest can write
>> /source... No?
>
> I assume you're talking about this kind of situation:
>
> mount --bind /local/writable/dir /chroot/untrusted/area/
> mount --o remount,ro /chroot/untrusted/area/
Well, actually about some kind of VPS: openvz or something like that. But
yes, this is the same kind of scenario.
> This has no r/w window in the chroot area:
>
> mount --bind /local/writable/dir /tmp/area/
> mount --o remount,ro /tmp/area/
> mount --bind /tmp/area/ /chroot/untrusted/area/
> umount /tmp/area/
Well, it looks a little scarry and complicated at first. And probably
requires you to know that semantic of --bind lets you do the last unmount.
But if you are saying that this makes kernel smaller, faster and less
buggy then you are probably very right.
Thank you for your explanation,
Grzegorz Kulewski
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2)
2006-06-17 0:10 ` Grzegorz Kulewski
@ 2006-06-17 3:35 ` Herbert Poetzl
2006-06-17 9:36 ` Jan Engelhardt
0 siblings, 1 reply; 35+ messages in thread
From: Herbert Poetzl @ 2006-06-17 3:35 UTC (permalink / raw)
To: Grzegorz Kulewski; +Cc: Dave Hansen, linux-kernel, linux-fsdevel, viro
On Sat, Jun 17, 2006 at 02:10:17AM +0200, Grzegorz Kulewski wrote:
> On Fri, 16 Jun 2006, Dave Hansen wrote:
> >On Sat, 2006-06-17 at 01:29 +0200, Grzegorz Kulewski wrote:
> >>Isn't this some kind of security risk (at least in my planned use)? I mean
> >>- for a small fraction of second somebody seeing /dest can write
> >>/source... No?
> >
> >I assume you're talking about this kind of situation:
> >
> >mount --bind /local/writable/dir /chroot/untrusted/area/
> >mount --o remount,ro /chroot/untrusted/area/
>
> Well, actually about some kind of VPS: openvz or something like that.
> But yes, this is the same kind of scenario.
yes, Linux-VServer provides this kind of ro --bind mounts
without the race, as the the flags are passed on the actual
mount
> >This has no r/w window in the chroot area:
> >
> >mount --bind /local/writable/dir /tmp/area/
> >mount --o remount,ro /tmp/area/
> >mount --bind /tmp/area/ /chroot/untrusted/area/
> >umount /tmp/area/
>
> Well, it looks a little scarry and complicated at first. And probably
> requires you to know that semantic of --bind lets you do the last
> unmount. But if you are saying that this makes kernel smaller, faster
> and less buggy then you are probably very right.
well, it makes the kernel more consistant in it's behaviour,
because especially for --rbind mounts, the logic what is
changed where and when is not as well defined as one would
wish ...
btw, you could get the same result by simply doing:
mount --bind /local/writable/dir /tmp/area/
mount --o remount,ro /tmp/area/
mount --move /tmp/area/ /chroot/untrusted/area/
without the duplicate mount and the unmount
HTH,
Herbert
> Thank you for your explanation,
>
> Grzegorz Kulewski
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2)
2006-06-17 3:35 ` Herbert Poetzl
@ 2006-06-17 9:36 ` Jan Engelhardt
2006-06-17 13:29 ` Herbert Poetzl
0 siblings, 1 reply; 35+ messages in thread
From: Jan Engelhardt @ 2006-06-17 9:36 UTC (permalink / raw)
To: Herbert Poetzl
Cc: Grzegorz Kulewski, Dave Hansen, linux-kernel, linux-fsdevel, viro
>> >I assume you're talking about this kind of situation:
>> >
>> >mount --bind /local/writable/dir /chroot/untrusted/area/
>> >mount --o remount,ro /chroot/untrusted/area/
>>
>> Well, actually about some kind of VPS: openvz or something like that.
>> But yes, this is the same kind of scenario.
>
>yes, Linux-VServer provides this kind of ro --bind mounts
>without the race, as the the flags are passed on the actual
>mount
>
>> >This has no r/w window in the chroot area:
>> >
>> >mount --bind /local/writable/dir /tmp/area/
>> >mount --o remount,ro /tmp/area/
>> >mount --bind /tmp/area/ /chroot/untrusted/area/
>> >umount /tmp/area/
>>
>> Well, it looks a little scarry and complicated at first. And probably
>> requires you to know that semantic of --bind lets you do the last
>> unmount. But if you are saying that this makes kernel smaller, faster
>> and less buggy then you are probably very right.
>
>well, it makes the kernel more consistant in it's behaviour,
>because especially for --rbind mounts, the logic what is
>changed where and when is not as well defined as one would
>wish ...
>
>btw, you could get the same result by simply doing:
>
>mount --bind /local/writable/dir /tmp/area/
>mount --o remount,ro /tmp/area/
>mount --move /tmp/area/ /chroot/untrusted/area/
>
Just a nitpick, you all use "--o" ... :)
mount: option `--o' is ambiguous
Jan Engelhardt
--
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2)
2006-06-17 9:36 ` Jan Engelhardt
@ 2006-06-17 13:29 ` Herbert Poetzl
0 siblings, 0 replies; 35+ messages in thread
From: Herbert Poetzl @ 2006-06-17 13:29 UTC (permalink / raw)
To: Jan Engelhardt
Cc: Grzegorz Kulewski, Dave Hansen, linux-kernel, linux-fsdevel, viro
On Sat, Jun 17, 2006 at 11:36:35AM +0200, Jan Engelhardt wrote:
> >> >I assume you're talking about this kind of situation:
> >> >
> >> >mount --bind /local/writable/dir /chroot/untrusted/area/
> >> >mount --o remount,ro /chroot/untrusted/area/
> >>
> >> Well, actually about some kind of VPS: openvz or something like that.
> >> But yes, this is the same kind of scenario.
> >
> >yes, Linux-VServer provides this kind of ro --bind mounts
> >without the race, as the the flags are passed on the actual
> >mount
> >
> >> >This has no r/w window in the chroot area:
> >> >
> >> >mount --bind /local/writable/dir /tmp/area/
> >> >mount --o remount,ro /tmp/area/
> >> >mount --bind /tmp/area/ /chroot/untrusted/area/
> >> >umount /tmp/area/
> >>
> >> Well, it looks a little scarry and complicated at first. And probably
> >> requires you to know that semantic of --bind lets you do the last
> >> unmount. But if you are saying that this makes kernel smaller, faster
> >> and less buggy then you are probably very right.
> >
> >well, it makes the kernel more consistant in it's behaviour,
> >because especially for --rbind mounts, the logic what is
> >changed where and when is not as well defined as one would
> >wish ...
> >
> >btw, you could get the same result by simply doing:
> >
> >mount --bind /local/writable/dir /tmp/area/
> >mount --o remount,ro /tmp/area/
> >mount --move /tmp/area/ /chroot/untrusted/area/
> >
>
> Just a nitpick, you all use "--o" ... :)
> mount: option `--o' is ambiguous
sorry, copy paste error on my side :)
(probably for Dave too ...)
best,
Herbert
> Jan Engelhardt
> --
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 11/20] elevate write count over calls to vfs_rename()
2006-06-16 23:12 ` [RFC][PATCH 11/20] elevate write count over calls to vfs_rename() Dave Hansen
@ 2006-06-18 18:23 ` Al Viro
2006-06-19 17:18 ` Dave Hansen
0 siblings, 1 reply; 35+ messages in thread
From: Al Viro @ 2006-06-18 18:23 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-kernel, linux-fsdevel, herbert, Trond Myklebust
On Fri, Jun 16, 2006 at 04:12:21PM -0700, Dave Hansen wrote:
> + err = mnt_want_write(ffhp->fh_export->ex_mnt);
> + if (err)
> + goto out_dput_new;
> +
> + err = mnt_want_write(tfhp->fh_export->ex_mnt);
> + if (err)
> + goto out_mnt_drop_write_old;
> +
> err = vfs_rename(fdir, odentry, tdir, ndentry);
> if (!err && EX_ISSYNC(tfhp->fh_export)) {
> err = nfsd_sync_dir(tdentry);
> if (!err)
> err = nfsd_sync_dir(fdentry);
> }
> -
> + mnt_drop_write(tfhp->fh_export->ex_mnt);
> + out_mnt_drop_write_old:
> + mnt_drop_write(ffhp->fh_export->ex_mnt);
Ahem...
a) nfsd_rename() should check that tfhp->fh_export->ex_mnt ==
ffhp->fh_export->ex_mnt (if not that tfhp->fh_export == ffhp->fh_export)
instead of comparing ->i_sb
b) that patch should do mnt_want_write() only once.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 03/20] Add vfsmount writer count
2006-06-16 23:12 ` [RFC][PATCH 03/20] Add vfsmount writer count Dave Hansen
@ 2006-06-18 18:33 ` Al Viro
2006-06-19 17:02 ` Dave Hansen
0 siblings, 1 reply; 35+ messages in thread
From: Al Viro @ 2006-06-18 18:33 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-kernel, linux-fsdevel, herbert
On Fri, Jun 16, 2006 at 04:12:15PM -0700, Dave Hansen wrote:
> +/*
> + * Must be called under write lock on mnt->mnt_sb->s_umount,
> + * this prevents concurrent decrements which could make the
> + * value -1, and test in mnt_want_write() wrongly succeed
> + */
> +static inline int mnt_make_readonly(struct vfsmount *mnt)
> +{
> + if (!atomic_dec_and_test(&mnt->mnt_writers)) {
> + atomic_inc(&mnt->mnt_writers);
> + return -EBUSY;
> + }
> + return 0;
> +}
Check in faccessat() could get screwed by that, if you just lose the
race with final writer going away. Then mnt_make_readonly() will
fail (as it should) and access(2) give -EROFS.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 20/20] honor r/w changes at do_remount() time
2006-06-16 23:12 ` [RFC][PATCH 20/20] honor r/w changes at do_remount() time Dave Hansen
@ 2006-06-18 18:36 ` Al Viro
2006-06-19 16:45 ` Dave Hansen
0 siblings, 1 reply; 35+ messages in thread
From: Al Viro @ 2006-06-18 18:36 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-kernel, linux-fsdevel, herbert
On Fri, Jun 16, 2006 at 04:12:28PM -0700, Dave Hansen wrote:
>
> Originally from: Herbert Poetzl <herbert@13thfloor.at>
>
> This is the core of the read-only bind mount patch set.
>
> Note that this does _not_ add a "ro" option directly to
> the bind mount operation. If you require such a mount,
> you must first do the bind, then follow it up with a
> 'mount -o remount,ro' operation.
Hrm... So you want r/o status of vfsmount completely independent from
that of superblock? I.e. allow writable vfsmount over r/o filesystem?
I realize that we have double checks, but...
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 20/20] honor r/w changes at do_remount() time
2006-06-18 18:36 ` Al Viro
@ 2006-06-19 16:45 ` Dave Hansen
0 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-19 16:45 UTC (permalink / raw)
To: Al Viro; +Cc: linux-kernel, linux-fsdevel, herbert
On Sun, 2006-06-18 at 19:36 +0100, Al Viro wrote:
> On Fri, Jun 16, 2006 at 04:12:28PM -0700, Dave Hansen wrote:
> >
> > Originally from: Herbert Poetzl <herbert@13thfloor.at>
> >
> > This is the core of the read-only bind mount patch set.
> >
> > Note that this does _not_ add a "ro" option directly to
> > the bind mount operation. If you require such a mount,
> > you must first do the bind, then follow it up with a
> > 'mount -o remount,ro' operation.
>
> Hrm... So you want r/o status of vfsmount completely independent from
> that of superblock? I.e. allow writable vfsmount over r/o filesystem?
> I realize that we have double checks, but...
I think it does make sense to keep them separate. I think of the
superblock flag is really there to describe the state of the filesystem.
Are we even _able_ to write to this thing now?
The vfsmount flag, on the other hand, spells out the intentions of the
person who _did_ the mount. Do I _want_ this to be writable?
Let's say that we eliminate the superblock r/o flag. There's a
filesystem with one regular vfsmount, one r/o bind vfsmount, and one r/w
bind vfsmount. It encounters an error. Not having a superblock flag,
we must go find each vfsmount and mark it r/o (this also enlarges the
window during which the f/s might be written).
Now, the administrator decides that the fs is OK, and remounts it r/w.
The vfsmounts should obviously regain their original permissions, any
other behavior is pretty screwy. To accomplish this, we need both a
"current write state" and a "previous write state", which probably means
a new flag in the vfsmount.
So, we've fanned out this "current state" information from the
superblock, increased the window of fs damage, and added some complexity
when we do the transitions between the states. I think I like the way
it is now. :)
-- Dave
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 03/20] Add vfsmount writer count
2006-06-18 18:33 ` Al Viro
@ 2006-06-19 17:02 ` Dave Hansen
2006-06-20 21:20 ` Al Viro
0 siblings, 1 reply; 35+ messages in thread
From: Dave Hansen @ 2006-06-19 17:02 UTC (permalink / raw)
To: Al Viro; +Cc: linux-kernel, linux-fsdevel, herbert
On Sun, 2006-06-18 at 19:33 +0100, Al Viro wrote:
> On Fri, Jun 16, 2006 at 04:12:15PM -0700, Dave Hansen wrote:
> > +/*
> > + * Must be called under write lock on mnt->mnt_sb->s_umount,
> > + * this prevents concurrent decrements which could make the
> > + * value -1, and test in mnt_want_write() wrongly succeed
> > + */
> > +static inline int mnt_make_readonly(struct vfsmount *mnt)
> > +{
> > + if (!atomic_dec_and_test(&mnt->mnt_writers)) {
> > + atomic_inc(&mnt->mnt_writers);
> > + return -EBUSY;
> > + }
> > + return 0;
> > +}
>
> Check in faccessat() could get screwed by that, if you just lose the
> race with final writer going away. Then mnt_make_readonly() will
> fail (as it should) and access(2) give -EROFS.
Very true. How about this to fix it?
--- lxc/fs//open.c~C8.1-fix-faccesat 2006-06-19 09:59:41.000000000 -0700
+++ lxc-dave/fs//open.c 2006-06-19 10:01:25.000000000 -0700
@@ -546,8 +546,12 @@ asmlinkage long sys_faccessat(int dfd, c
special_file(nd.dentry->d_inode->i_mode))
goto out_path_release;
- if(__mnt_is_readonly(nd.mnt) || IS_RDONLY(nd.dentry->d_inode))
- res = -EROFS;
+ res = mnt_want_write(nd.mnt);
+ if (!res) {
+ mnt_drop_write(nd.mnt);
+ if(IS_RDONLY(nd.dentry->d_inode))
+ res = -EROFS;
+ }
out_path_release:
path_release(&nd);
-- Dave
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 11/20] elevate write count over calls to vfs_rename()
2006-06-18 18:23 ` Al Viro
@ 2006-06-19 17:18 ` Dave Hansen
0 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-19 17:18 UTC (permalink / raw)
To: Al Viro; +Cc: linux-kernel, linux-fsdevel, herbert, Trond Myklebust
On Sun, 2006-06-18 at 19:23 +0100, Al Viro wrote:
> a) nfsd_rename() should check that tfhp->fh_export->ex_mnt ==
> ffhp->fh_export->ex_mnt (if not that tfhp->fh_export == ffhp->fh_export)
> instead of comparing ->i_sb
> b) that patch should do mnt_want_write() only once.
Here's a replacement.
diff -puN fs/nfsd/vfs.c~C-elevate-writers-vfs_rename-part1 fs/nfsd/vfs.c
--- lxc/fs/nfsd/vfs.c~C-elevate-writers-vfs_rename-part1 2006-06-19 08:43:10.000000000 -0700
+++ lxc-dave/fs/nfsd/vfs.c 2006-06-19 10:09:19.000000000 -0700
@@ -1597,13 +1597,21 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = -EPERM;
} else
#endif
+ err = -EXDEV
+ if (ffhp->fh_export->ex_mnt != tfhp->fh_export->ex_mnt)
+ goto out_dput_new;
+
+ err = mnt_want_write(ffhp->fh_export->ex_mnt);
+ if (err)
+ goto out_dput_new;
+
err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!err && EX_ISSYNC(tfhp->fh_export)) {
err = nfsd_sync_dir(tdentry);
if (!err)
err = nfsd_sync_dir(fdentry);
}
-
+ mnt_drop_write(ffhp->fh_export->ex_mnt);
out_dput_new:
dput(ndentry);
out_dput_old:
-- Dave
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 03/20] Add vfsmount writer count
2006-06-19 17:02 ` Dave Hansen
@ 2006-06-20 21:20 ` Al Viro
2006-06-22 17:01 ` Dave Hansen
0 siblings, 1 reply; 35+ messages in thread
From: Al Viro @ 2006-06-20 21:20 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-kernel, linux-fsdevel, herbert
On Mon, Jun 19, 2006 at 10:02:16AM -0700, Dave Hansen wrote:
> Very true. How about this to fix it?
>
> --- lxc/fs//open.c~C8.1-fix-faccesat 2006-06-19 09:59:41.000000000 -0700
> +++ lxc-dave/fs//open.c 2006-06-19 10:01:25.000000000 -0700
> @@ -546,8 +546,12 @@ asmlinkage long sys_faccessat(int dfd, c
> special_file(nd.dentry->d_inode->i_mode))
> goto out_path_release;
>
> - if(__mnt_is_readonly(nd.mnt) || IS_RDONLY(nd.dentry->d_inode))
> - res = -EROFS;
> + res = mnt_want_write(nd.mnt);
> + if (!res) {
> + mnt_drop_write(nd.mnt);
> + if(IS_RDONLY(nd.dentry->d_inode))
> + res = -EROFS;
> + }
So access() can make remount r/o fail? Uh-oh...
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC][PATCH 03/20] Add vfsmount writer count
2006-06-20 21:20 ` Al Viro
@ 2006-06-22 17:01 ` Dave Hansen
0 siblings, 0 replies; 35+ messages in thread
From: Dave Hansen @ 2006-06-22 17:01 UTC (permalink / raw)
To: Al Viro; +Cc: linux-kernel, linux-fsdevel, herbert
On Tue, 2006-06-20 at 22:20 +0100, Al Viro wrote:
> On Mon, Jun 19, 2006 at 10:02:16AM -0700, Dave Hansen wrote:
> > Very true. How about this to fix it?
> >
> > --- lxc/fs//open.c~C8.1-fix-faccesat 2006-06-19 09:59:41.000000000 -0700
> > +++ lxc-dave/fs//open.c 2006-06-19 10:01:25.000000000 -0700
> > @@ -546,8 +546,12 @@ asmlinkage long sys_faccessat(int dfd, c
> > special_file(nd.dentry->d_inode->i_mode))
> > goto out_path_release;
> >
> > - if(__mnt_is_readonly(nd.mnt) || IS_RDONLY(nd.dentry->d_inode))
> > - res = -EROFS;
> > + res = mnt_want_write(nd.mnt);
> > + if (!res) {
> > + mnt_drop_write(nd.mnt);
> > + if(IS_RDONLY(nd.dentry->d_inode))
> > + res = -EROFS;
> > + }
>
> So access() can make remount r/o fail? Uh-oh...
Good point. Thanks for catching that.
There are probably two solutions here: rework the atomics to always give
a consistent view, even during mnt_make_readonly() calls, or keep the
mnt_make_readonly() side from executing.
We're already protecting this mnt_make_readonly() from races with itself
with a write on the mnt->mnt_sb->s_umount semaphore. We should be able
to use a read lock on the same semaphore to exclude the
mnt_make_readonly() callers.
Thoughts?
diff -puN include/linux/mount.h~C-D8-actually-add-flags include/linux/mount.h
--- robind/include/linux/mount.h~C-D8-actually-add-flags 2006-06-20 14:11:15.000000000 -0700
+++ robind-dave/include/linux/mount.h 2006-06-20 16:01:11.000000000 -0700
@@ -91,6 +91,25 @@ static inline int __mnt_is_readonly(stru
return (atomic_read(&mnt->mnt_writers) == 0);
}
+/*
+ * This needs to get a consistent look at mnt_writers.
+ * Without the lock, it can race against mnt_make_readonly()
+ * and mistake a temporarily decremented mnt_writers
+ * for a real read-only mount.
+ *
+ * Note: this is never suitable if you need to perform any
+ * write *operations* on the mount, only as a snapshot.
+ */
+static inline int mnt_is_readonly(struct vfsmount *mnt)
+{
+ int ret;
+
+ down_read(&mnt->mnt_sb->s_umount);
+ ret = __mnt_is_readonly(mnt);
+ up_read(&mnt->mnt_sb->s_umount);
+ return ret;
+}
+
static inline int mnt_want_write(struct vfsmount *mnt)
{
int ret = 0;
--- lxc/fs/open.c~C8.1-fix-faccesat 2006-06-22 09:43:01.000000000 -0700
+++ lxc-dave/fs/open.c 2006-06-22 09:43:01.000000000 -0700
@@ -546,8 +546,12 @@ asmlinkage long sys_faccessat(int dfd, c
special_file(nd.dentry->d_inode->i_mode))
goto out_path_release;
- if(__mnt_is_readonly(nd.mnt) || IS_RDONLY(nd.dentry->d_inode))
+ if(mnt_is_readonly(nd.mnt) || IS_RDONLY(nd.dentry->d_inode))
res = -EROFS;
out_path_release:
path_release(&nd);
-- Dave
^ permalink raw reply [flat|nested] 35+ messages in thread
end of thread, other threads:[~2006-06-22 17:02 UTC | newest]
Thread overview: 35+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-16 23:12 [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 01/20] prepare for write access checks: collapse if() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 02/20] r/o bind mount prepwork: move open_namei()'s vfs_create() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 03/20] Add vfsmount writer count Dave Hansen
2006-06-18 18:33 ` Al Viro
2006-06-19 17:02 ` Dave Hansen
2006-06-20 21:20 ` Al Viro
2006-06-22 17:01 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 04/20] elevate mnt writers for callers of vfs_mkdir() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 05/20] elevate write count during entire ncp_ioctl() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 07/20] elevate mount count for extended attributes Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 06/20] sys_symlinkat() elevate write count around vfs_symlink() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 08/20] sys_linkat(): elevate write count around vfs_link() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 09/20] mount_is_safe(): add comment Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 10/20] unix_find_other() elevate write count for touch_atime() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 11/20] elevate write count over calls to vfs_rename() Dave Hansen
2006-06-18 18:23 ` Al Viro
2006-06-19 17:18 ` Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 12/20] tricky: elevate write count files are open()ed Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 13/20] elevate writer count for do_sys_truncate() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 14/20] elevate write count for do_utimes() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 15/20] elevate write count for do_sys_utime() and touch_atime() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 16/20] sys_mknodat(): elevate write count for vfs_mknod/create() Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 17/20] elevate mnt writers for vfs_unlink() callers Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 19/20] elevate writer count for custom 'struct file' Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 18/20] do_rmdir(): elevate write count Dave Hansen
2006-06-16 23:12 ` [RFC][PATCH 20/20] honor r/w changes at do_remount() time Dave Hansen
2006-06-18 18:36 ` Al Viro
2006-06-19 16:45 ` Dave Hansen
2006-06-16 23:29 ` [RFC][PATCH 00/20] Mount writer count and read-only bind mounts (v2) Grzegorz Kulewski
2006-06-16 23:41 ` Dave Hansen
2006-06-17 0:10 ` Grzegorz Kulewski
2006-06-17 3:35 ` Herbert Poetzl
2006-06-17 9:36 ` Jan Engelhardt
2006-06-17 13:29 ` Herbert Poetzl
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).