* [patch 1/9] unprivileged mounts: add user mounts to the kernel
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
2008-01-08 21:34 ` Pavel Machek
` (2 more replies)
2008-01-08 11:35 ` [patch 2/9] unprivileged mounts: allow unprivileged umount Miklos Szeredi
` (7 subsequent siblings)
8 siblings, 3 replies; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-add-user-mounts-to-the-kernel.patch --]
[-- Type: text/plain, Size: 8559 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
This patchset adds support for keeping mount ownership information in the
kernel, and allow unprivileged mount(2) and umount(2) in certain cases.
The mount owner has the following privileges:
- unmount the owned mount
- create a submount under the owned mount
The sysadmin can set the owner explicitly on mount and remount. When an
unprivileged user creates a mount, then the owner is automatically set to the
user.
The following use cases are envisioned:
1) Private namespace, with selected mounts owned by user. E.g.
/home/$USER is a good candidate for allowing unpriv mounts and unmounts
within.
2) Private namespace, with all mounts owned by user and having the "nosuid"
flag. User can mount and umount anywhere within the namespace, but suid
programs will not work.
3) Global namespace, with a designated directory, which is a mount owned by
the user. E.g. /mnt/users/$USER is set up so that it is bind mounted onto
itself, and set to be owned by $USER. The user can add/remove mounts only
under this directory.
The following extra security measures are taken for unprivileged mounts:
- usermounts are limited by a sysctl tunable
- force "nosuid,nodev" mount options on the created mount
For testing unprivileged mounts (and for other purposes) simple
mount/umount utilities are available from:
http://www.kernel.org/pub/linux/kernel/people/mszeredi/mmount/
After this series I'll be posting a preliminary patch for util-linux-ng,
to add the same functionality to mount(8) and umount(8).
This patch:
A new mount flag, MS_SETUSER is used to make a mount owned by a user. If this
flag is specified, then the owner will be set to the current fsuid and the
mount will be marked with the MNT_USER flag. On remount don't preserve
previous owner, and treat MS_SETUSER as for a new mount. The MS_SETUSER flag
is ignored on mount move.
The MNT_USER flag is not copied on any kind of mount cloning: namespace
creation, binding or propagation. For bind mounts the cloned mount(s) are set
to MNT_USER depending on the MS_SETUSER mount flag. In all the other cases
MNT_USER is always cleared.
For MNT_USER mounts a "user=UID" option is added to /proc/PID/mounts. This is
compatible with how mount ownership is stored in /etc/mtab.
The rationale for using MS_SETUSER and MNT_USER, to distinguish "user"
mounts from "non-user" or "legacy" mounts are follows:
a) Mount(2) and umount(2) on legacy mounts always need CAP_SYS_ADMIN
capability. As opposed to user mounts, which will only require,
that the mount owner matches the current fsuid. So a process
with fsuid=0 should not be able to mount/umount legacy mounts
without the CAP_SYS_ADMIN capability.
b) Legacy userspace programs may set fsuid to nonzero before calling
mount(2). In such an unlikely case, this patchset would cause
an unintended side effect of making the mount owned by the fsuid.
c) For legacy mounts, no "user=UID" option should be shown in
/proc/mounts for backwards compatibility.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-03 22:10:10.000000000 +0100
+++ linux/fs/namespace.c 2008-01-04 13:46:33.000000000 +0100
@@ -477,6 +477,13 @@ static struct vfsmount *skip_mnt_tree(st
return p;
}
+static void set_mnt_user(struct vfsmount *mnt)
+{
+ BUG_ON(mnt->mnt_flags & MNT_USER);
+ mnt->mnt_uid = current->fsuid;
+ mnt->mnt_flags |= MNT_USER;
+}
+
static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
int flag)
{
@@ -491,6 +498,11 @@ static struct vfsmount *clone_mnt(struct
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
+ /* don't copy the MNT_USER flag */
+ mnt->mnt_flags &= ~MNT_USER;
+ if (flag & CL_SETUSER)
+ set_mnt_user(mnt);
+
if (flag & CL_SLAVE) {
list_add(&mnt->mnt_slave, &old->mnt_slave_list);
mnt->mnt_master = old;
@@ -644,6 +656,8 @@ static int show_vfsmnt(struct seq_file *
if (mnt->mnt_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
}
+ if (mnt->mnt_flags & MNT_USER)
+ seq_printf(m, ",user=%i", mnt->mnt_uid);
if (mnt->mnt_sb->s_op->show_options)
err = mnt->mnt_sb->s_op->show_options(m, mnt);
seq_puts(m, " 0 0\n");
@@ -1181,8 +1195,9 @@ static int do_change_type(struct nameida
/*
* do loopback mount.
*/
-static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
+static int do_loopback(struct nameidata *nd, char *old_name, int flags)
{
+ int clone_fl;
struct nameidata old_nd;
struct vfsmount *mnt = NULL;
int err = mount_is_safe(nd);
@@ -1202,11 +1217,12 @@ static int do_loopback(struct nameidata
if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt))
goto out;
+ clone_fl = (flags & MS_SETUSER) ? CL_SETUSER : 0;
err = -ENOMEM;
- if (recurse)
- mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0);
+ if (flags & MS_REC)
+ mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
else
- mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0);
+ mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
if (!mnt)
goto out;
@@ -1268,8 +1284,11 @@ static int do_remount(struct nameidata *
err = change_mount_flags(nd->path.mnt, flags);
else
err = do_remount_sb(sb, flags, data, 0);
- if (!err)
+ if (!err) {
nd->path.mnt->mnt_flags = mnt_flags;
+ if (flags & MS_SETUSER)
+ set_mnt_user(nd->path.mnt);
+ }
up_write(&sb->s_umount);
if (!err)
security_sb_post_remount(nd->path.mnt, flags, data);
@@ -1378,10 +1397,13 @@ static int do_new_mount(struct nameidata
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- mnt = do_kern_mount(type, flags, name, data);
+ mnt = do_kern_mount(type, flags & ~MS_SETUSER, name, data);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
+ if (flags & MS_SETUSER)
+ set_mnt_user(mnt);
+
return do_add_mount(mnt, nd, mnt_flags, NULL);
}
@@ -1413,7 +1435,8 @@ int do_add_mount(struct vfsmount *newmnt
if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
goto unlock;
- newmnt->mnt_flags = mnt_flags;
+ /* MNT_USER was set earlier */
+ newmnt->mnt_flags |= mnt_flags;
if ((err = graft_tree(newmnt, nd)))
goto unlock;
@@ -1735,7 +1758,7 @@ long do_mount(char *dev_name, char *dir_
retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
- retval = do_loopback(&nd, dev_name, flags & MS_REC);
+ retval = do_loopback(&nd, dev_name, flags);
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&nd, flags);
else if (flags & MS_MOVE)
Index: linux/fs/pnode.h
===================================================================
--- linux.orig/fs/pnode.h 2008-01-03 22:10:10.000000000 +0100
+++ linux/fs/pnode.h 2008-01-04 13:45:45.000000000 +0100
@@ -23,6 +23,7 @@
#define CL_MAKE_SHARED 0x08
#define CL_PROPAGATION 0x10
#define CL_PRIVATE 0x20
+#define CL_SETUSER 0x40
static inline void set_mnt_shared(struct vfsmount *mnt)
{
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h 2008-01-03 22:10:10.000000000 +0100
+++ linux/include/linux/fs.h 2008-01-04 13:45:46.000000000 +0100
@@ -125,6 +125,7 @@ extern int dir_notify_enable;
#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define MS_I_VERSION (1<<23) /* Update inode I_version field */
+#define MS_SETUSER (1<<24) /* set mnt_uid to current user */
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
Index: linux/include/linux/mount.h
===================================================================
--- linux.orig/include/linux/mount.h 2008-01-03 22:10:10.000000000 +0100
+++ linux/include/linux/mount.h 2008-01-04 13:45:45.000000000 +0100
@@ -33,6 +33,7 @@ struct mnt_namespace;
#define MNT_SHRINKABLE 0x100
#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */
+#define MNT_USER 0x400
#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
@@ -69,6 +70,8 @@ struct vfsmount {
* are held, and all mnt_writer[]s on this mount have 0 as their ->count
*/
atomic_t __mnt_writers;
+
+ uid_t mnt_uid; /* owner of the mount */
};
static inline struct vfsmount *mntget(struct vfsmount *mnt)
--
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 1/9] unprivileged mounts: add user mounts to the kernel
2008-01-08 11:35 ` [patch 1/9] unprivileged mounts: add user mounts to the kernel Miklos Szeredi
@ 2008-01-08 21:34 ` Pavel Machek
[not found] ` <20080108113619.213519920-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
2008-01-14 21:46 ` Serge E. Hallyn
2 siblings, 0 replies; 62+ messages in thread
From: Pavel Machek @ 2008-01-08 21:34 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel,
linux-kernel, containers, util-linux-ng
On Tue 2008-01-08 12:35:03, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@suse.cz>
>
> This patchset adds support for keeping mount ownership information in the
> kernel, and allow unprivileged mount(2) and umount(2) in certain cases.
>
> The mount owner has the following privileges:
>
> - unmount the owned mount
> - create a submount under the owned mount
- create unkillable processes
- block suspend/hibernation
?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
^ permalink raw reply [flat|nested] 62+ messages in thread[parent not found: <20080108113619.213519920-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>]
* Re: [patch 1/9] unprivileged mounts: add user mounts to the kernel
[not found] ` <20080108113619.213519920-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
@ 2008-01-08 21:47 ` Pavel Machek
0 siblings, 0 replies; 62+ messages in thread
From: Pavel Machek @ 2008-01-08 21:47 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, hch-wEGCiKHe2LqWVfeAwA7xHQ,
serue-r/Jw6+rmf7HQT0dZR+AlfA, viro-rfM+Q5joDG/XmaaqVzeoHQ,
ebiederm-aS9lmoZGLiVWk0Htik3J/w, kzak-H+wXaHxf7aLQT0dZR+AlfA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
containers-qjLDD68F18O7TbgM5vRIOg,
util-linux-ng-u79uwXL29TY76Z2rM5mHXA
On Tue 2008-01-08 12:35:03, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>
>
> This patchset adds support for keeping mount ownership information in the
> kernel, and allow unprivileged mount(2) and umount(2) in certain cases.
>
> The mount owner has the following privileges:
>
> - unmount the owned mount
> - create a submount under the owned mount
- create traps for updatedb, etc?
Is there Doc* file somewhere describing dangers of allowing this?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [patch 1/9] unprivileged mounts: add user mounts to the kernel
2008-01-08 11:35 ` [patch 1/9] unprivileged mounts: add user mounts to the kernel Miklos Szeredi
2008-01-08 21:34 ` Pavel Machek
[not found] ` <20080108113619.213519920-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
@ 2008-01-14 21:46 ` Serge E. Hallyn
2 siblings, 0 replies; 62+ messages in thread
From: Serge E. Hallyn @ 2008-01-14 21:46 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel,
linux-kernel, containers, util-linux-ng
Quoting Miklos Szeredi (miklos@szeredi.hu):
> From: Miklos Szeredi <mszeredi@suse.cz>
>
> This patchset adds support for keeping mount ownership information in the
> kernel, and allow unprivileged mount(2) and umount(2) in certain cases.
>
> The mount owner has the following privileges:
>
> - unmount the owned mount
> - create a submount under the owned mount
>
> The sysadmin can set the owner explicitly on mount and remount. When an
> unprivileged user creates a mount, then the owner is automatically set to the
> user.
>
> The following use cases are envisioned:
>
> 1) Private namespace, with selected mounts owned by user. E.g.
> /home/$USER is a good candidate for allowing unpriv mounts and unmounts
> within.
>
> 2) Private namespace, with all mounts owned by user and having the "nosuid"
> flag. User can mount and umount anywhere within the namespace, but suid
> programs will not work.
>
> 3) Global namespace, with a designated directory, which is a mount owned by
> the user. E.g. /mnt/users/$USER is set up so that it is bind mounted onto
> itself, and set to be owned by $USER. The user can add/remove mounts only
> under this directory.
>
> The following extra security measures are taken for unprivileged mounts:
>
> - usermounts are limited by a sysctl tunable
> - force "nosuid,nodev" mount options on the created mount
>
> For testing unprivileged mounts (and for other purposes) simple
> mount/umount utilities are available from:
>
> http://www.kernel.org/pub/linux/kernel/people/mszeredi/mmount/
>
> After this series I'll be posting a preliminary patch for util-linux-ng,
> to add the same functionality to mount(8) and umount(8).
>
> This patch:
>
> A new mount flag, MS_SETUSER is used to make a mount owned by a user. If this
> flag is specified, then the owner will be set to the current fsuid and the
> mount will be marked with the MNT_USER flag. On remount don't preserve
> previous owner, and treat MS_SETUSER as for a new mount. The MS_SETUSER flag
> is ignored on mount move.
>
> The MNT_USER flag is not copied on any kind of mount cloning: namespace
> creation, binding or propagation. For bind mounts the cloned mount(s) are set
> to MNT_USER depending on the MS_SETUSER mount flag. In all the other cases
> MNT_USER is always cleared.
>
> For MNT_USER mounts a "user=UID" option is added to /proc/PID/mounts. This is
> compatible with how mount ownership is stored in /etc/mtab.
>
> The rationale for using MS_SETUSER and MNT_USER, to distinguish "user"
> mounts from "non-user" or "legacy" mounts are follows:
>
> a) Mount(2) and umount(2) on legacy mounts always need CAP_SYS_ADMIN
> capability. As opposed to user mounts, which will only require,
> that the mount owner matches the current fsuid. So a process
> with fsuid=0 should not be able to mount/umount legacy mounts
> without the CAP_SYS_ADMIN capability.
>
> b) Legacy userspace programs may set fsuid to nonzero before calling
> mount(2). In such an unlikely case, this patchset would cause
> an unintended side effect of making the mount owned by the fsuid.
>
> c) For legacy mounts, no "user=UID" option should be shown in
> /proc/mounts for backwards compatibility.
>
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
This looks good to me.
Acked-by: Serge Hallyn <serue@us.ibm.com>
thanks,
-serge
> ---
>
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c 2008-01-03 22:10:10.000000000 +0100
> +++ linux/fs/namespace.c 2008-01-04 13:46:33.000000000 +0100
> @@ -477,6 +477,13 @@ static struct vfsmount *skip_mnt_tree(st
> return p;
> }
>
> +static void set_mnt_user(struct vfsmount *mnt)
> +{
> + BUG_ON(mnt->mnt_flags & MNT_USER);
> + mnt->mnt_uid = current->fsuid;
> + mnt->mnt_flags |= MNT_USER;
> +}
> +
> static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
> int flag)
> {
> @@ -491,6 +498,11 @@ static struct vfsmount *clone_mnt(struct
> mnt->mnt_mountpoint = mnt->mnt_root;
> mnt->mnt_parent = mnt;
>
> + /* don't copy the MNT_USER flag */
> + mnt->mnt_flags &= ~MNT_USER;
> + if (flag & CL_SETUSER)
> + set_mnt_user(mnt);
> +
> if (flag & CL_SLAVE) {
> list_add(&mnt->mnt_slave, &old->mnt_slave_list);
> mnt->mnt_master = old;
> @@ -644,6 +656,8 @@ static int show_vfsmnt(struct seq_file *
> if (mnt->mnt_flags & fs_infop->flag)
> seq_puts(m, fs_infop->str);
> }
> + if (mnt->mnt_flags & MNT_USER)
> + seq_printf(m, ",user=%i", mnt->mnt_uid);
> if (mnt->mnt_sb->s_op->show_options)
> err = mnt->mnt_sb->s_op->show_options(m, mnt);
> seq_puts(m, " 0 0\n");
> @@ -1181,8 +1195,9 @@ static int do_change_type(struct nameida
> /*
> * do loopback mount.
> */
> -static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
> +static int do_loopback(struct nameidata *nd, char *old_name, int flags)
> {
> + int clone_fl;
> struct nameidata old_nd;
> struct vfsmount *mnt = NULL;
> int err = mount_is_safe(nd);
> @@ -1202,11 +1217,12 @@ static int do_loopback(struct nameidata
> if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt))
> goto out;
>
> + clone_fl = (flags & MS_SETUSER) ? CL_SETUSER : 0;
> err = -ENOMEM;
> - if (recurse)
> - mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0);
> + if (flags & MS_REC)
> + mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
> else
> - mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0);
> + mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
>
> if (!mnt)
> goto out;
> @@ -1268,8 +1284,11 @@ static int do_remount(struct nameidata *
> err = change_mount_flags(nd->path.mnt, flags);
> else
> err = do_remount_sb(sb, flags, data, 0);
> - if (!err)
> + if (!err) {
> nd->path.mnt->mnt_flags = mnt_flags;
> + if (flags & MS_SETUSER)
> + set_mnt_user(nd->path.mnt);
> + }
> up_write(&sb->s_umount);
> if (!err)
> security_sb_post_remount(nd->path.mnt, flags, data);
> @@ -1378,10 +1397,13 @@ static int do_new_mount(struct nameidata
> if (!capable(CAP_SYS_ADMIN))
> return -EPERM;
>
> - mnt = do_kern_mount(type, flags, name, data);
> + mnt = do_kern_mount(type, flags & ~MS_SETUSER, name, data);
> if (IS_ERR(mnt))
> return PTR_ERR(mnt);
>
> + if (flags & MS_SETUSER)
> + set_mnt_user(mnt);
> +
> return do_add_mount(mnt, nd, mnt_flags, NULL);
> }
>
> @@ -1413,7 +1435,8 @@ int do_add_mount(struct vfsmount *newmnt
> if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
> goto unlock;
>
> - newmnt->mnt_flags = mnt_flags;
> + /* MNT_USER was set earlier */
> + newmnt->mnt_flags |= mnt_flags;
> if ((err = graft_tree(newmnt, nd)))
> goto unlock;
>
> @@ -1735,7 +1758,7 @@ long do_mount(char *dev_name, char *dir_
> retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
> data_page);
> else if (flags & MS_BIND)
> - retval = do_loopback(&nd, dev_name, flags & MS_REC);
> + retval = do_loopback(&nd, dev_name, flags);
> else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
> retval = do_change_type(&nd, flags);
> else if (flags & MS_MOVE)
> Index: linux/fs/pnode.h
> ===================================================================
> --- linux.orig/fs/pnode.h 2008-01-03 22:10:10.000000000 +0100
> +++ linux/fs/pnode.h 2008-01-04 13:45:45.000000000 +0100
> @@ -23,6 +23,7 @@
> #define CL_MAKE_SHARED 0x08
> #define CL_PROPAGATION 0x10
> #define CL_PRIVATE 0x20
> +#define CL_SETUSER 0x40
>
> static inline void set_mnt_shared(struct vfsmount *mnt)
> {
> Index: linux/include/linux/fs.h
> ===================================================================
> --- linux.orig/include/linux/fs.h 2008-01-03 22:10:10.000000000 +0100
> +++ linux/include/linux/fs.h 2008-01-04 13:45:46.000000000 +0100
> @@ -125,6 +125,7 @@ extern int dir_notify_enable;
> #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
> #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
> #define MS_I_VERSION (1<<23) /* Update inode I_version field */
> +#define MS_SETUSER (1<<24) /* set mnt_uid to current user */
> #define MS_ACTIVE (1<<30)
> #define MS_NOUSER (1<<31)
>
> Index: linux/include/linux/mount.h
> ===================================================================
> --- linux.orig/include/linux/mount.h 2008-01-03 22:10:10.000000000 +0100
> +++ linux/include/linux/mount.h 2008-01-04 13:45:45.000000000 +0100
> @@ -33,6 +33,7 @@ struct mnt_namespace;
>
> #define MNT_SHRINKABLE 0x100
> #define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */
> +#define MNT_USER 0x400
>
> #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
> #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
> @@ -69,6 +70,8 @@ struct vfsmount {
> * are held, and all mnt_writer[]s on this mount have 0 as their ->count
> */
> atomic_t __mnt_writers;
> +
> + uid_t mnt_uid; /* owner of the mount */
> };
>
> static inline struct vfsmount *mntget(struct vfsmount *mnt)
>
> --
^ permalink raw reply [flat|nested] 62+ messages in thread
* [patch 2/9] unprivileged mounts: allow unprivileged umount
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
2008-01-08 11:35 ` [patch 1/9] unprivileged mounts: add user mounts to the kernel Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
[not found] ` <20080108113620.664824939-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
2008-01-08 11:35 ` [patch 3/9] unprivileged mounts: account user mounts Miklos Szeredi
` (6 subsequent siblings)
8 siblings, 1 reply; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-allow-unprivileged-umount.patch --]
[-- Type: text/plain, Size: 1477 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
The owner doesn't need sysadmin capabilities to call umount().
Similar behavior as umount(8) on mounts having "user=UID" option in /etc/mtab.
The difference is that umount also checks /etc/fstab, presumably to exclude
another mount on the same mountpoint.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-03 20:52:38.000000000 +0100
+++ linux/fs/namespace.c 2008-01-03 21:14:16.000000000 +0100
@@ -894,6 +894,27 @@ static int do_umount(struct vfsmount *mn
return retval;
}
+static bool is_mount_owner(struct vfsmount *mnt, uid_t uid)
+{
+ return (mnt->mnt_flags & MNT_USER) && mnt->mnt_uid == uid;
+}
+
+/*
+ * umount is permitted for
+ * - sysadmin
+ * - mount owner, if not forced umount
+ */
+static bool permit_umount(struct vfsmount *mnt, int flags)
+{
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+
+ if (flags & MNT_FORCE)
+ return false;
+
+ return is_mount_owner(mnt, current->fsuid);
+}
+
/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
@@ -917,7 +938,7 @@ asmlinkage long sys_umount(char __user *
goto dput_and_out;
retval = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!permit_umount(nd.path.mnt, flags))
goto dput_and_out;
retval = do_umount(nd.path.mnt, flags);
--
^ permalink raw reply [flat|nested] 62+ messages in thread* [patch 3/9] unprivileged mounts: account user mounts
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
2008-01-08 11:35 ` [patch 1/9] unprivileged mounts: add user mounts to the kernel Miklos Szeredi
2008-01-08 11:35 ` [patch 2/9] unprivileged mounts: allow unprivileged umount Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
[not found] ` <20080108113623.446872155-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
2008-01-14 21:53 ` Serge E. Hallyn
2008-01-08 11:35 ` [patch 4/9] unprivileged mounts: propagate error values from clone_mnt Miklos Szeredi
` (5 subsequent siblings)
8 siblings, 2 replies; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-account-user-mounts.patch --]
[-- Type: text/plain, Size: 4213 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
Add sysctl variables for accounting and limiting the number of user
mounts.
The maximum number of user mounts is set to 1024 by default. This
won't in itself enable user mounts, setting a mount to be owned by a
user is first needed
[akpm]
- don't use enumerated sysctls
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/Documentation/filesystems/proc.txt
===================================================================
--- linux.orig/Documentation/filesystems/proc.txt 2008-01-03 17:12:58.000000000 +0100
+++ linux/Documentation/filesystems/proc.txt 2008-01-03 21:15:35.000000000 +0100
@@ -1012,6 +1012,15 @@ reaches aio-max-nr then io_setup will fa
raising aio-max-nr does not result in the pre-allocation or re-sizing
of any kernel data structures.
+nr_user_mounts and max_user_mounts
+----------------------------------
+
+These represent the number of "user" mounts and the maximum number of
+"user" mounts respectively. User mounts may be created by
+unprivileged users. User mounts may also be created with sysadmin
+privileges on behalf of a user, in which case nr_user_mounts may
+exceed max_user_mounts.
+
2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats
-----------------------------------------------------------
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-03 21:14:16.000000000 +0100
+++ linux/fs/namespace.c 2008-01-03 21:15:35.000000000 +0100
@@ -44,6 +44,9 @@ static struct list_head *mount_hashtable
static struct kmem_cache *mnt_cache __read_mostly;
static struct rw_semaphore namespace_sem;
+int nr_user_mounts;
+int max_user_mounts = 1024;
+
/* /sys/fs */
struct kobject *fs_kobj;
EXPORT_SYMBOL_GPL(fs_kobj);
@@ -477,11 +480,30 @@ static struct vfsmount *skip_mnt_tree(st
return p;
}
+static void dec_nr_user_mounts(void)
+{
+ spin_lock(&vfsmount_lock);
+ nr_user_mounts--;
+ spin_unlock(&vfsmount_lock);
+}
+
static void set_mnt_user(struct vfsmount *mnt)
{
BUG_ON(mnt->mnt_flags & MNT_USER);
mnt->mnt_uid = current->fsuid;
mnt->mnt_flags |= MNT_USER;
+ spin_lock(&vfsmount_lock);
+ nr_user_mounts++;
+ spin_unlock(&vfsmount_lock);
+}
+
+static void clear_mnt_user(struct vfsmount *mnt)
+{
+ if (mnt->mnt_flags & MNT_USER) {
+ mnt->mnt_uid = 0;
+ mnt->mnt_flags &= ~MNT_USER;
+ dec_nr_user_mounts();
+ }
}
static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
@@ -542,6 +564,7 @@ static inline void __mntput(struct vfsmo
*/
WARN_ON(atomic_read(&mnt->__mnt_writers));
dput(mnt->mnt_root);
+ clear_mnt_user(mnt);
free_vfsmnt(mnt);
deactivate_super(sb);
}
@@ -1306,6 +1329,7 @@ static int do_remount(struct nameidata *
else
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
+ clear_mnt_user(nd->path.mnt);
nd->path.mnt->mnt_flags = mnt_flags;
if (flags & MS_SETUSER)
set_mnt_user(nd->path.mnt);
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h 2008-01-03 20:52:38.000000000 +0100
+++ linux/include/linux/fs.h 2008-01-03 21:15:35.000000000 +0100
@@ -50,6 +50,9 @@ extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
+extern int nr_user_mounts;
+extern int max_user_mounts;
+
#ifdef CONFIG_DNOTIFY
extern int dir_notify_enable;
#endif
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c 2008-01-03 17:13:22.000000000 +0100
+++ linux/kernel/sysctl.c 2008-01-03 21:15:35.000000000 +0100
@@ -1288,6 +1288,22 @@ static struct ctl_table fs_table[] = {
#endif
#endif
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nr_user_mounts",
+ .data = &nr_user_mounts,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "max_user_mounts",
+ .data = &max_user_mounts,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = KERN_SETUID_DUMPABLE,
.procname = "suid_dumpable",
.data = &suid_dumpable,
--
^ permalink raw reply [flat|nested] 62+ messages in thread[parent not found: <20080108113623.446872155-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>]
* Re: [patch 3/9] unprivileged mounts: account user mounts
[not found] ` <20080108113623.446872155-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
@ 2008-01-08 18:18 ` Dave Hansen
2008-01-08 19:18 ` Miklos Szeredi
0 siblings, 1 reply; 62+ messages in thread
From: Dave Hansen @ 2008-01-08 18:18 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, hch-wEGCiKHe2LqWVfeAwA7xHQ,
serue-r/Jw6+rmf7HQT0dZR+AlfA, viro-rfM+Q5joDG/XmaaqVzeoHQ,
ebiederm-aS9lmoZGLiVWk0Htik3J/w, kzak-H+wXaHxf7aLQT0dZR+AlfA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
containers-qjLDD68F18O7TbgM5vRIOg,
util-linux-ng-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
On Tue, 2008-01-08 at 12:35 +0100, Miklos Szeredi wrote:
> plain text document attachment
> (unprivileged-mounts-account-user-mounts.patch)
> From: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>
>
> Add sysctl variables for accounting and limiting the number of user
> mounts.
...
> +int nr_user_mounts;
> +int max_user_mounts = 1024;
Just from a containers point of view, I think this is something we'll
need to fix up in the near future if it stays in the current form.
Instead of having a global tracking, perhaps we could have a per-user
limit tracked in 'struct user'. The plans are to ensure that two
containers' users "dave" each have a different 'struct user', so that
seems to be a decent place to track it.
Also, is a read-only sysctl really the best way to get the number of
user mounts back out of the kernel? What would you use it for?
Do you need any special logic for setting 'max_user_mounts' in the case
where it gets set _below_ 'nr_user_mounts'?
> /* /sys/fs */
> struct kobject *fs_kobj;
> EXPORT_SYMBOL_GPL(fs_kobj);
> @@ -477,11 +480,30 @@ static struct vfsmount *skip_mnt_tree(st
> return p;
> }
>
> +static void dec_nr_user_mounts(void)
> +{
> + spin_lock(&vfsmount_lock);
> + nr_user_mounts--;
> + spin_unlock(&vfsmount_lock);
> +}
> +
> static void set_mnt_user(struct vfsmount *mnt)
> {
> BUG_ON(mnt->mnt_flags & MNT_USER);
> mnt->mnt_uid = current->fsuid;
> mnt->mnt_flags |= MNT_USER;
> + spin_lock(&vfsmount_lock);
> + nr_user_mounts++;
> + spin_unlock(&vfsmount_lock);
> +}
One little nitpick on the patch layout: It's a wee bit difficult to
audit how the set function is used vs the clear one when its users don't
come until the later patches. It might be worth introducing the users
here, too.
> +static void clear_mnt_user(struct vfsmount *mnt)
> +{
> + if (mnt->mnt_flags & MNT_USER) {
> + mnt->mnt_uid = 0;
> + mnt->mnt_flags &= ~MNT_USER;
> + dec_nr_user_mounts();
> + }
> }
>
> static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
> @@ -542,6 +564,7 @@ static inline void __mntput(struct vfsmo
> */
> WARN_ON(atomic_read(&mnt->__mnt_writers));
> dput(mnt->mnt_root);
> + clear_mnt_user(mnt);
> free_vfsmnt(mnt);
> deactivate_super(sb);
> }
> @@ -1306,6 +1329,7 @@ static int do_remount(struct nameidata *
> else
> err = do_remount_sb(sb, flags, data, 0);
> if (!err) {
> + clear_mnt_user(nd->path.mnt);
> nd->path.mnt->mnt_flags = mnt_flags;
> if (flags & MS_SETUSER)
> set_mnt_user(nd->path.mnt);
> Index: linux/include/linux/fs.h
> ===================================================================
> --- linux.orig/include/linux/fs.h 2008-01-03 20:52:38.000000000 +0100
> +++ linux/include/linux/fs.h 2008-01-03 21:15:35.000000000 +0100
> @@ -50,6 +50,9 @@ extern struct inodes_stat_t inodes_stat;
>
> extern int leases_enable, lease_break_time;
>
> +extern int nr_user_mounts;
> +extern int max_user_mounts;
> +
> #ifdef CONFIG_DNOTIFY
> extern int dir_notify_enable;
> #endif
> Index: linux/kernel/sysctl.c
> ===================================================================
> --- linux.orig/kernel/sysctl.c 2008-01-03 17:13:22.000000000 +0100
> +++ linux/kernel/sysctl.c 2008-01-03 21:15:35.000000000 +0100
> @@ -1288,6 +1288,22 @@ static struct ctl_table fs_table[] = {
> #endif
> #endif
> {
> + .ctl_name = CTL_UNNUMBERED,
> + .procname = "nr_user_mounts",
> + .data = &nr_user_mounts,
> + .maxlen = sizeof(int),
> + .mode = 0444,
> + .proc_handler = &proc_dointvec,
> + },
> + {
> + .ctl_name = CTL_UNNUMBERED,
> + .procname = "max_user_mounts",
> + .data = &max_user_mounts,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = &proc_dointvec,
> + },
> + {
> .ctl_name = KERN_SETUID_DUMPABLE,
> .procname = "suid_dumpable",
> .data = &suid_dumpable,
>
> --
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
-- Dave
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 3/9] unprivileged mounts: account user mounts
2008-01-08 18:18 ` Dave Hansen
@ 2008-01-08 19:18 ` Miklos Szeredi
0 siblings, 0 replies; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 19:18 UTC (permalink / raw)
To: haveblue-r/Jw6+rmf7HQT0dZR+AlfA
Cc: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, hch-wEGCiKHe2LqWVfeAwA7xHQ,
serue-r/Jw6+rmf7HQT0dZR+AlfA, viro-rfM+Q5joDG/XmaaqVzeoHQ,
ebiederm-aS9lmoZGLiVWk0Htik3J/w, kzak-H+wXaHxf7aLQT0dZR+AlfA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
containers-qjLDD68F18O7TbgM5vRIOg,
util-linux-ng-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
> On Tue, 2008-01-08 at 12:35 +0100, Miklos Szeredi wrote:
> > plain text document attachment
> > (unprivileged-mounts-account-user-mounts.patch)
> > From: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>
> >
> > Add sysctl variables for accounting and limiting the number of user
> > mounts.
> ...
> > +int nr_user_mounts;
> > +int max_user_mounts = 1024;
>
> Just from a containers point of view, I think this is something we'll
> need to fix up in the near future if it stays in the current form.
>
> Instead of having a global tracking, perhaps we could have a per-user
> limit tracked in 'struct user'. The plans are to ensure that two
> containers' users "dave" each have a different 'struct user', so that
> seems to be a decent place to track it.
At one time there was a per-user accounting patch, but it was dropped,
because it was deemed an unnecessary additional compexity.
max_user_mounts is analogue to files_stat.max_files (which is a sysctl
tunable also), and it's purpose is really to make sure that a user is
not able to create an insane number of mounts, and not to acurately
limit normal usage.
So I'm not sure a per-container or per-user count is really needed.
> Also, is a read-only sysctl really the best way to get the number of
> user mounts back out of the kernel? What would you use it for?
Just to check, why I got that (EPERM or whatever) error for the mount
command.
> Do you need any special logic for setting 'max_user_mounts' in the case
> where it gets set _below_ 'nr_user_mounts'?
No, I don't think such corner cases really matter in this case.
> > /* /sys/fs */
> > struct kobject *fs_kobj;
> > EXPORT_SYMBOL_GPL(fs_kobj);
> > @@ -477,11 +480,30 @@ static struct vfsmount *skip_mnt_tree(st
> > return p;
> > }
> >
> > +static void dec_nr_user_mounts(void)
> > +{
> > + spin_lock(&vfsmount_lock);
> > + nr_user_mounts--;
> > + spin_unlock(&vfsmount_lock);
> > +}
> > +
> > static void set_mnt_user(struct vfsmount *mnt)
> > {
> > BUG_ON(mnt->mnt_flags & MNT_USER);
> > mnt->mnt_uid = current->fsuid;
> > mnt->mnt_flags |= MNT_USER;
> > + spin_lock(&vfsmount_lock);
> > + nr_user_mounts++;
> > + spin_unlock(&vfsmount_lock);
> > +}
>
> One little nitpick on the patch layout: It's a wee bit difficult to
> audit how the set function is used vs the clear one when its users don't
> come until the later patches. It might be worth introducing the users
> here, too.
Yeah, maybe some of the patches should be folded together. If a
resubmit is necessary I'll look into that.
Miklos
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [patch 3/9] unprivileged mounts: account user mounts
2008-01-08 11:35 ` [patch 3/9] unprivileged mounts: account user mounts Miklos Szeredi
[not found] ` <20080108113623.446872155-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
@ 2008-01-14 21:53 ` Serge E. Hallyn
1 sibling, 0 replies; 62+ messages in thread
From: Serge E. Hallyn @ 2008-01-14 21:53 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel,
linux-kernel, containers, util-linux-ng
Quoting Miklos Szeredi (miklos@szeredi.hu):
> From: Miklos Szeredi <mszeredi@suse.cz>
>
> Add sysctl variables for accounting and limiting the number of user
> mounts.
>
> The maximum number of user mounts is set to 1024 by default. This
> won't in itself enable user mounts, setting a mount to be owned by a
> user is first needed
>
> [akpm]
> - don't use enumerated sysctls
>
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Seems sane enough, given your responses to Dave.
Acked-by: Serge Hallyn <serue@us.ibm.com>
> ---
>
> Index: linux/Documentation/filesystems/proc.txt
> ===================================================================
> --- linux.orig/Documentation/filesystems/proc.txt 2008-01-03 17:12:58.000000000 +0100
> +++ linux/Documentation/filesystems/proc.txt 2008-01-03 21:15:35.000000000 +0100
> @@ -1012,6 +1012,15 @@ reaches aio-max-nr then io_setup will fa
> raising aio-max-nr does not result in the pre-allocation or re-sizing
> of any kernel data structures.
>
> +nr_user_mounts and max_user_mounts
> +----------------------------------
> +
> +These represent the number of "user" mounts and the maximum number of
> +"user" mounts respectively. User mounts may be created by
> +unprivileged users. User mounts may also be created with sysadmin
> +privileges on behalf of a user, in which case nr_user_mounts may
> +exceed max_user_mounts.
> +
> 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats
> -----------------------------------------------------------
>
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c 2008-01-03 21:14:16.000000000 +0100
> +++ linux/fs/namespace.c 2008-01-03 21:15:35.000000000 +0100
> @@ -44,6 +44,9 @@ static struct list_head *mount_hashtable
> static struct kmem_cache *mnt_cache __read_mostly;
> static struct rw_semaphore namespace_sem;
>
> +int nr_user_mounts;
> +int max_user_mounts = 1024;
> +
> /* /sys/fs */
> struct kobject *fs_kobj;
> EXPORT_SYMBOL_GPL(fs_kobj);
> @@ -477,11 +480,30 @@ static struct vfsmount *skip_mnt_tree(st
> return p;
> }
>
> +static void dec_nr_user_mounts(void)
> +{
> + spin_lock(&vfsmount_lock);
> + nr_user_mounts--;
> + spin_unlock(&vfsmount_lock);
> +}
> +
> static void set_mnt_user(struct vfsmount *mnt)
> {
> BUG_ON(mnt->mnt_flags & MNT_USER);
> mnt->mnt_uid = current->fsuid;
> mnt->mnt_flags |= MNT_USER;
> + spin_lock(&vfsmount_lock);
> + nr_user_mounts++;
> + spin_unlock(&vfsmount_lock);
> +}
> +
> +static void clear_mnt_user(struct vfsmount *mnt)
> +{
> + if (mnt->mnt_flags & MNT_USER) {
> + mnt->mnt_uid = 0;
> + mnt->mnt_flags &= ~MNT_USER;
> + dec_nr_user_mounts();
> + }
> }
>
> static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
> @@ -542,6 +564,7 @@ static inline void __mntput(struct vfsmo
> */
> WARN_ON(atomic_read(&mnt->__mnt_writers));
> dput(mnt->mnt_root);
> + clear_mnt_user(mnt);
> free_vfsmnt(mnt);
> deactivate_super(sb);
> }
> @@ -1306,6 +1329,7 @@ static int do_remount(struct nameidata *
> else
> err = do_remount_sb(sb, flags, data, 0);
> if (!err) {
> + clear_mnt_user(nd->path.mnt);
> nd->path.mnt->mnt_flags = mnt_flags;
> if (flags & MS_SETUSER)
> set_mnt_user(nd->path.mnt);
> Index: linux/include/linux/fs.h
> ===================================================================
> --- linux.orig/include/linux/fs.h 2008-01-03 20:52:38.000000000 +0100
> +++ linux/include/linux/fs.h 2008-01-03 21:15:35.000000000 +0100
> @@ -50,6 +50,9 @@ extern struct inodes_stat_t inodes_stat;
>
> extern int leases_enable, lease_break_time;
>
> +extern int nr_user_mounts;
> +extern int max_user_mounts;
> +
> #ifdef CONFIG_DNOTIFY
> extern int dir_notify_enable;
> #endif
> Index: linux/kernel/sysctl.c
> ===================================================================
> --- linux.orig/kernel/sysctl.c 2008-01-03 17:13:22.000000000 +0100
> +++ linux/kernel/sysctl.c 2008-01-03 21:15:35.000000000 +0100
> @@ -1288,6 +1288,22 @@ static struct ctl_table fs_table[] = {
> #endif
> #endif
> {
> + .ctl_name = CTL_UNNUMBERED,
> + .procname = "nr_user_mounts",
> + .data = &nr_user_mounts,
> + .maxlen = sizeof(int),
> + .mode = 0444,
> + .proc_handler = &proc_dointvec,
> + },
> + {
> + .ctl_name = CTL_UNNUMBERED,
> + .procname = "max_user_mounts",
> + .data = &max_user_mounts,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = &proc_dointvec,
> + },
> + {
> .ctl_name = KERN_SETUID_DUMPABLE,
> .procname = "suid_dumpable",
> .data = &suid_dumpable,
>
> --
^ permalink raw reply [flat|nested] 62+ messages in thread
* [patch 4/9] unprivileged mounts: propagate error values from clone_mnt
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
` (2 preceding siblings ...)
2008-01-08 11:35 ` [patch 3/9] unprivileged mounts: account user mounts Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
[not found] ` <20080108113624.898035951-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
2008-01-08 11:35 ` [patch 5/9] unprivileged mounts: allow unprivileged bind mounts Miklos Szeredi
` (4 subsequent siblings)
8 siblings, 1 reply; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-propagate-error-values-from-clone_mnt.patch --]
[-- Type: text/plain, Size: 5319 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
Allow clone_mnt() to return errors other than ENOMEM. This will be used for
returning a different error value when the number of user mounts goes over the
limit.
Fix copy_tree() to return EPERM for unbindable mounts.
Don't propagate further from dup_mnt_ns() as that copy_tree() can only fail
with -ENOMEM.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-04 13:47:09.000000000 +0100
+++ linux/fs/namespace.c 2008-01-04 13:47:49.000000000 +0100
@@ -512,41 +512,42 @@ static struct vfsmount *clone_mnt(struct
struct super_block *sb = old->mnt_sb;
struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
- if (mnt) {
- mnt->mnt_flags = old->mnt_flags;
- atomic_inc(&sb->s_active);
- mnt->mnt_sb = sb;
- mnt->mnt_root = dget(root);
- mnt->mnt_mountpoint = mnt->mnt_root;
- mnt->mnt_parent = mnt;
-
- /* don't copy the MNT_USER flag */
- mnt->mnt_flags &= ~MNT_USER;
- if (flag & CL_SETUSER)
- set_mnt_user(mnt);
-
- if (flag & CL_SLAVE) {
- list_add(&mnt->mnt_slave, &old->mnt_slave_list);
- mnt->mnt_master = old;
- CLEAR_MNT_SHARED(mnt);
- } else if (!(flag & CL_PRIVATE)) {
- if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
- list_add(&mnt->mnt_share, &old->mnt_share);
- if (IS_MNT_SLAVE(old))
- list_add(&mnt->mnt_slave, &old->mnt_slave);
- mnt->mnt_master = old->mnt_master;
- }
- if (flag & CL_MAKE_SHARED)
- set_mnt_shared(mnt);
+ if (!mnt)
+ return ERR_PTR(-ENOMEM);
- /* stick the duplicate mount on the same expiry list
- * as the original if that was on one */
- if (flag & CL_EXPIRE) {
- spin_lock(&vfsmount_lock);
- if (!list_empty(&old->mnt_expire))
- list_add(&mnt->mnt_expire, &old->mnt_expire);
- spin_unlock(&vfsmount_lock);
- }
+ mnt->mnt_flags = old->mnt_flags;
+ atomic_inc(&sb->s_active);
+ mnt->mnt_sb = sb;
+ mnt->mnt_root = dget(root);
+ mnt->mnt_mountpoint = mnt->mnt_root;
+ mnt->mnt_parent = mnt;
+
+ /* don't copy the MNT_USER flag */
+ mnt->mnt_flags &= ~MNT_USER;
+ if (flag & CL_SETUSER)
+ set_mnt_user(mnt);
+
+ if (flag & CL_SLAVE) {
+ list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+ mnt->mnt_master = old;
+ CLEAR_MNT_SHARED(mnt);
+ } else if (!(flag & CL_PRIVATE)) {
+ if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
+ list_add(&mnt->mnt_share, &old->mnt_share);
+ if (IS_MNT_SLAVE(old))
+ list_add(&mnt->mnt_slave, &old->mnt_slave);
+ mnt->mnt_master = old->mnt_master;
+ }
+ if (flag & CL_MAKE_SHARED)
+ set_mnt_shared(mnt);
+
+ /* stick the duplicate mount on the same expiry list
+ * as the original if that was on one */
+ if (flag & CL_EXPIRE) {
+ spin_lock(&vfsmount_lock);
+ if (!list_empty(&old->mnt_expire))
+ list_add(&mnt->mnt_expire, &old->mnt_expire);
+ spin_unlock(&vfsmount_lock);
}
return mnt;
}
@@ -1021,11 +1022,11 @@ struct vfsmount *copy_tree(struct vfsmou
struct nameidata nd;
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
- return NULL;
+ return ERR_PTR(-EPERM);
res = q = clone_mnt(mnt, dentry, flag);
- if (!q)
- goto Enomem;
+ if (IS_ERR(q))
+ goto error;
q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt;
@@ -1046,8 +1047,8 @@ struct vfsmount *copy_tree(struct vfsmou
nd.path.mnt = q;
nd.path.dentry = p->mnt_mountpoint;
q = clone_mnt(p, p->mnt_root, flag);
- if (!q)
- goto Enomem;
+ if (IS_ERR(q))
+ goto error;
spin_lock(&vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
attach_mnt(q, &nd);
@@ -1055,7 +1056,7 @@ struct vfsmount *copy_tree(struct vfsmou
}
}
return res;
-Enomem:
+ error:
if (res) {
LIST_HEAD(umount_list);
spin_lock(&vfsmount_lock);
@@ -1063,7 +1064,7 @@ Enomem:
spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
}
- return NULL;
+ return q;
}
struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
@@ -1262,13 +1263,13 @@ static int do_loopback(struct nameidata
goto out;
clone_fl = (flags & MS_SETUSER) ? CL_SETUSER : 0;
- err = -ENOMEM;
if (flags & MS_REC)
mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
else
mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
- if (!mnt)
+ err = PTR_ERR(mnt);
+ if (IS_ERR(mnt))
goto out;
err = graft_tree(mnt, nd);
@@ -1840,7 +1841,7 @@ static struct mnt_namespace *dup_mnt_ns(
/* First pass: copy the tree topology */
new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
CL_COPY_ALL | CL_EXPIRE);
- if (!new_ns->root) {
+ if (IS_ERR(new_ns->root)) {
up_write(&namespace_sem);
kfree(new_ns);
return ERR_PTR(-ENOMEM);;
Index: linux/fs/pnode.c
===================================================================
--- linux.orig/fs/pnode.c 2008-01-04 13:45:46.000000000 +0100
+++ linux/fs/pnode.c 2008-01-04 13:47:49.000000000 +0100
@@ -189,8 +189,9 @@ int propagate_mnt(struct vfsmount *dest_
source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
- if (!(child = copy_tree(source, source->mnt_root, type))) {
- ret = -ENOMEM;
+ child = copy_tree(source, source->mnt_root, type);
+ if (IS_ERR(child)) {
+ ret = PTR_ERR(child);
list_splice(tree_list, tmp_list.prev);
goto out;
}
--
^ permalink raw reply [flat|nested] 62+ messages in thread* [patch 5/9] unprivileged mounts: allow unprivileged bind mounts
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
` (3 preceding siblings ...)
2008-01-08 11:35 ` [patch 4/9] unprivileged mounts: propagate error values from clone_mnt Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
2008-01-08 18:12 ` Dave Hansen
` (3 more replies)
2008-01-08 11:35 ` [patch 6/9] unprivileged mounts: allow unprivileged mounts Miklos Szeredi
` (3 subsequent siblings)
8 siblings, 4 replies; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-allow-unprivileged-bind-mounts.patch --]
[-- Type: text/plain, Size: 4004 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
Allow bind mounts to unprivileged users if the following conditions are met:
- mountpoint is not a symlink
- parent mount is owned by the user
- the number of user mounts is below the maximum
Unprivileged mounts imply MS_SETUSER, and will also have the "nosuid" and
"nodev" mount flags set.
In particular, if mounting process doesn't have CAP_SETUID capability,
then the "nosuid" flag will be added, and if it doesn't have CAP_MKNOD
capability, then the "nodev" flag will be added.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-04 13:47:49.000000000 +0100
+++ linux/fs/namespace.c 2008-01-04 13:48:01.000000000 +0100
@@ -487,11 +487,34 @@ static void dec_nr_user_mounts(void)
spin_unlock(&vfsmount_lock);
}
-static void set_mnt_user(struct vfsmount *mnt)
+static int reserve_user_mount(void)
+{
+ int err = 0;
+
+ spin_lock(&vfsmount_lock);
+ if (nr_user_mounts >= max_user_mounts && !capable(CAP_SYS_ADMIN))
+ err = -EPERM;
+ else
+ nr_user_mounts++;
+ spin_unlock(&vfsmount_lock);
+ return err;
+}
+
+static void __set_mnt_user(struct vfsmount *mnt)
{
BUG_ON(mnt->mnt_flags & MNT_USER);
mnt->mnt_uid = current->fsuid;
mnt->mnt_flags |= MNT_USER;
+
+ if (!capable(CAP_SETUID))
+ mnt->mnt_flags |= MNT_NOSUID;
+ if (!capable(CAP_MKNOD))
+ mnt->mnt_flags |= MNT_NODEV;
+}
+
+static void set_mnt_user(struct vfsmount *mnt)
+{
+ __set_mnt_user(mnt);
spin_lock(&vfsmount_lock);
nr_user_mounts++;
spin_unlock(&vfsmount_lock);
@@ -510,10 +533,16 @@ static struct vfsmount *clone_mnt(struct
int flag)
{
struct super_block *sb = old->mnt_sb;
- struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
+ struct vfsmount *mnt;
+ if (flag & CL_SETUSER) {
+ int err = reserve_user_mount();
+ if (err)
+ return ERR_PTR(err);
+ }
+ mnt = alloc_vfsmnt(old->mnt_devname);
if (!mnt)
- return ERR_PTR(-ENOMEM);
+ goto alloc_failed;
mnt->mnt_flags = old->mnt_flags;
atomic_inc(&sb->s_active);
@@ -525,7 +554,7 @@ static struct vfsmount *clone_mnt(struct
/* don't copy the MNT_USER flag */
mnt->mnt_flags &= ~MNT_USER;
if (flag & CL_SETUSER)
- set_mnt_user(mnt);
+ __set_mnt_user(mnt);
if (flag & CL_SLAVE) {
list_add(&mnt->mnt_slave, &old->mnt_slave_list);
@@ -550,6 +579,11 @@ static struct vfsmount *clone_mnt(struct
spin_unlock(&vfsmount_lock);
}
return mnt;
+
+ alloc_failed:
+ if (flag & CL_SETUSER)
+ dec_nr_user_mounts();
+ return ERR_PTR(-ENOMEM);
}
static inline void __mntput(struct vfsmount *mnt)
@@ -986,22 +1020,26 @@ asmlinkage long sys_oldumount(char __use
#endif
-static int mount_is_safe(struct nameidata *nd)
+/*
+ * Conditions for unprivileged mounts are:
+ * - mountpoint is not a symlink
+ * - mountpoint is in a mount owned by the user
+ */
+static bool permit_mount(struct nameidata *nd, int *flags)
{
+ struct inode *inode = nd->path.dentry->d_inode;
+
if (capable(CAP_SYS_ADMIN))
- return 0;
- return -EPERM;
-#ifdef notyet
- if (S_ISLNK(nd->path.dentry->d_inode->i_mode))
- return -EPERM;
- if (nd->path.dentry->d_inode->i_mode & S_ISVTX) {
- if (current->uid != nd->path.dentry->d_inode->i_uid)
- return -EPERM;
- }
- if (vfs_permission(nd, MAY_WRITE))
- return -EPERM;
- return 0;
-#endif
+ return true;
+
+ if (S_ISLNK(inode->i_mode))
+ return false;
+
+ if (!is_mount_owner(nd->path.mnt, current->fsuid))
+ return false;
+
+ *flags |= MS_SETUSER;
+ return true;
}
static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
@@ -1245,9 +1283,10 @@ static int do_loopback(struct nameidata
int clone_fl;
struct nameidata old_nd;
struct vfsmount *mnt = NULL;
- int err = mount_is_safe(nd);
- if (err)
- return err;
+ int err;
+
+ if (!permit_mount(nd, &flags))
+ return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
--
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 5/9] unprivileged mounts: allow unprivileged bind mounts
2008-01-08 11:35 ` [patch 5/9] unprivileged mounts: allow unprivileged bind mounts Miklos Szeredi
@ 2008-01-08 18:12 ` Dave Hansen
2008-01-08 19:08 ` Miklos Szeredi
2008-01-08 18:26 ` Dave Hansen
` (2 subsequent siblings)
3 siblings, 1 reply; 62+ messages in thread
From: Dave Hansen @ 2008-01-08 18:12 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel, containers,
util-linux-ng, linux-kernel
On Tue, 2008-01-08 at 12:35 +0100, Miklos Szeredi wrote:
> +static int reserve_user_mount(void)
> +{
> + int err = 0;
> +
> + spin_lock(&vfsmount_lock);
> + if (nr_user_mounts >= max_user_mounts && !capable(CAP_SYS_ADMIN))
> + err = -EPERM;
> + else
> + nr_user_mounts++;
> + spin_unlock(&vfsmount_lock);
> + return err;
> +}
Would -ENOSPC or -ENOMEM be a more descriptive error here?
-- Dave
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 5/9] unprivileged mounts: allow unprivileged bind mounts
2008-01-08 18:12 ` Dave Hansen
@ 2008-01-08 19:08 ` Miklos Szeredi
[not found] ` <E1JCJoQ-0003g9-Nk-8f8m9JG5TPIdUIPVzhDTVZP2KDSNp7ea@public.gmane.org>
0 siblings, 1 reply; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 19:08 UTC (permalink / raw)
To: haveblue
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel, containers,
util-linux-ng, linux-kernel
> On Tue, 2008-01-08 at 12:35 +0100, Miklos Szeredi wrote:
> > +static int reserve_user_mount(void)
> > +{
> > + int err = 0;
> > +
> > + spin_lock(&vfsmount_lock);
> > + if (nr_user_mounts >= max_user_mounts && !capable(CAP_SYS_ADMIN))
> > + err = -EPERM;
> > + else
> > + nr_user_mounts++;
> > + spin_unlock(&vfsmount_lock);
> > + return err;
> > +}
>
> Would -ENOSPC or -ENOMEM be a more descriptive error here?
The logic behind EPERM, is that this failure is only for unprivileged
callers. ENOMEM is too specifically about OOM. It could be changed
to ENOSPC, ENFILE, EMFILE, or it could remain EPERM. What do others
think?
Miklos
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [patch 5/9] unprivileged mounts: allow unprivileged bind mounts
2008-01-08 11:35 ` [patch 5/9] unprivileged mounts: allow unprivileged bind mounts Miklos Szeredi
2008-01-08 18:12 ` Dave Hansen
@ 2008-01-08 18:26 ` Dave Hansen
2008-01-08 19:21 ` Miklos Szeredi
[not found] ` <20080108113626.895583537-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
2008-01-14 22:42 ` Serge E. Hallyn
3 siblings, 1 reply; 62+ messages in thread
From: Dave Hansen @ 2008-01-08 18:26 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel, containers,
util-linux-ng, linux-kernel
On Tue, 2008-01-08 at 12:35 +0100, Miklos Szeredi wrote:
> @@ -510,10 +533,16 @@ static struct vfsmount *clone_mnt(struct
> int flag)
> {
> struct super_block *sb = old->mnt_sb;
> - struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
> + struct vfsmount *mnt;
>
> + if (flag & CL_SETUSER) {
> + int err = reserve_user_mount();
> + if (err)
> + return ERR_PTR(err);
> + }
> + mnt = alloc_vfsmnt(old->mnt_devname);
> if (!mnt)
> - return ERR_PTR(-ENOMEM);
> + goto alloc_failed;
>
> mnt->mnt_flags = old->mnt_flags;
> atomic_inc(&sb->s_active);
I think there's a little race here. We could have several users racing
to get to this point when nr_user_mounts==max_user_mounts-1. One user
wins the race and gets their mount reserved. The others get the error
out of reserve_user_mount(), and return.
But, the winner goes on to error out on some condition further down in
clone_mnt() and never actually instantiates the mount.
Do you think this is a problem?
I think just about the one solution is to block new mounters on a
sleepable lock until the race winner actually finishes their mount
operation.
-- Dave
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 5/9] unprivileged mounts: allow unprivileged bind mounts
2008-01-08 18:26 ` Dave Hansen
@ 2008-01-08 19:21 ` Miklos Szeredi
0 siblings, 0 replies; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 19:21 UTC (permalink / raw)
To: haveblue-r/Jw6+rmf7HQT0dZR+AlfA
Cc: miklos-sUDqSbJrdHQHWmgEVkV9KA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, hch-wEGCiKHe2LqWVfeAwA7xHQ,
serue-r/Jw6+rmf7HQT0dZR+AlfA, viro-rfM+Q5joDG/XmaaqVzeoHQ,
ebiederm-aS9lmoZGLiVWk0Htik3J/w, kzak-H+wXaHxf7aLQT0dZR+AlfA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
containers-qjLDD68F18O7TbgM5vRIOg,
util-linux-ng-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
> > @@ -510,10 +533,16 @@ static struct vfsmount *clone_mnt(struct
> > int flag)
> > {
> > struct super_block *sb = old->mnt_sb;
> > - struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
> > + struct vfsmount *mnt;
> >
> > + if (flag & CL_SETUSER) {
> > + int err = reserve_user_mount();
> > + if (err)
> > + return ERR_PTR(err);
> > + }
> > + mnt = alloc_vfsmnt(old->mnt_devname);
> > if (!mnt)
> > - return ERR_PTR(-ENOMEM);
> > + goto alloc_failed;
> >
> > mnt->mnt_flags = old->mnt_flags;
> > atomic_inc(&sb->s_active);
>
> I think there's a little race here. We could have several users racing
> to get to this point when nr_user_mounts==max_user_mounts-1. One user
> wins the race and gets their mount reserved. The others get the error
> out of reserve_user_mount(), and return.
>
> But, the winner goes on to error out on some condition further down in
> clone_mnt() and never actually instantiates the mount.
>
> Do you think this is a problem?
For similar reasons as stated in the previous mail, I don't think this
matters. If nr_user_mounts is getting remotely close to
max_user_mounts, then something is wrong (or the max needs to be
raised anyway).
Thanks for the review, Dave!
Miklos
^ permalink raw reply [flat|nested] 62+ messages in thread
[parent not found: <20080108113626.895583537-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>]
* Re: [patch 5/9] unprivileged mounts: allow unprivileged bind mounts
[not found] ` <20080108113626.895583537-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
@ 2008-01-10 4:47 ` Serge E. Hallyn
0 siblings, 0 replies; 62+ messages in thread
From: Serge E. Hallyn @ 2008-01-10 4:47 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, hch-wEGCiKHe2LqWVfeAwA7xHQ,
serue-r/Jw6+rmf7HQT0dZR+AlfA, viro-rfM+Q5joDG/XmaaqVzeoHQ,
ebiederm-aS9lmoZGLiVWk0Htik3J/w, kzak-H+wXaHxf7aLQT0dZR+AlfA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
containers-qjLDD68F18O7TbgM5vRIOg,
util-linux-ng-u79uwXL29TY76Z2rM5mHXA
Quoting Miklos Szeredi (miklos-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org):
> From: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>
>
> Allow bind mounts to unprivileged users if the following conditions are met:
>
> - mountpoint is not a symlink
> - parent mount is owned by the user
> - the number of user mounts is below the maximum
>
> Unprivileged mounts imply MS_SETUSER, and will also have the "nosuid" and
> "nodev" mount flags set.
>
> In particular, if mounting process doesn't have CAP_SETUID capability,
> then the "nosuid" flag will be added, and if it doesn't have CAP_MKNOD
> capability, then the "nodev" flag will be added.
That little part by itself is really needed in order to make the ability
to remove CAP_MKNOD from a process tree's bounding set meaningful.
Else instead of creating /dev/hda1, the user can just mount a filesystem
with hda1 existing on it. (Which is why I was surprised when one day
I found this code missing :)
But of course I'm a fan of the patchset altogether. I plan to review in
more detail early next week, but since I liked the previous submission I
don't see myself having any complaints, so I'm glad to see the reviews
by others.
thanks,
-serge
> Signed-off-by: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>
> ---
>
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c 2008-01-04 13:47:49.000000000 +0100
> +++ linux/fs/namespace.c 2008-01-04 13:48:01.000000000 +0100
> @@ -487,11 +487,34 @@ static void dec_nr_user_mounts(void)
> spin_unlock(&vfsmount_lock);
> }
>
> -static void set_mnt_user(struct vfsmount *mnt)
> +static int reserve_user_mount(void)
> +{
> + int err = 0;
> +
> + spin_lock(&vfsmount_lock);
> + if (nr_user_mounts >= max_user_mounts && !capable(CAP_SYS_ADMIN))
> + err = -EPERM;
> + else
> + nr_user_mounts++;
> + spin_unlock(&vfsmount_lock);
> + return err;
> +}
> +
> +static void __set_mnt_user(struct vfsmount *mnt)
> {
> BUG_ON(mnt->mnt_flags & MNT_USER);
> mnt->mnt_uid = current->fsuid;
> mnt->mnt_flags |= MNT_USER;
> +
> + if (!capable(CAP_SETUID))
> + mnt->mnt_flags |= MNT_NOSUID;
> + if (!capable(CAP_MKNOD))
> + mnt->mnt_flags |= MNT_NODEV;
> +}
> +
> +static void set_mnt_user(struct vfsmount *mnt)
> +{
> + __set_mnt_user(mnt);
> spin_lock(&vfsmount_lock);
> nr_user_mounts++;
> spin_unlock(&vfsmount_lock);
> @@ -510,10 +533,16 @@ static struct vfsmount *clone_mnt(struct
> int flag)
> {
> struct super_block *sb = old->mnt_sb;
> - struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
> + struct vfsmount *mnt;
>
> + if (flag & CL_SETUSER) {
> + int err = reserve_user_mount();
> + if (err)
> + return ERR_PTR(err);
> + }
> + mnt = alloc_vfsmnt(old->mnt_devname);
> if (!mnt)
> - return ERR_PTR(-ENOMEM);
> + goto alloc_failed;
>
> mnt->mnt_flags = old->mnt_flags;
> atomic_inc(&sb->s_active);
> @@ -525,7 +554,7 @@ static struct vfsmount *clone_mnt(struct
> /* don't copy the MNT_USER flag */
> mnt->mnt_flags &= ~MNT_USER;
> if (flag & CL_SETUSER)
> - set_mnt_user(mnt);
> + __set_mnt_user(mnt);
>
> if (flag & CL_SLAVE) {
> list_add(&mnt->mnt_slave, &old->mnt_slave_list);
> @@ -550,6 +579,11 @@ static struct vfsmount *clone_mnt(struct
> spin_unlock(&vfsmount_lock);
> }
> return mnt;
> +
> + alloc_failed:
> + if (flag & CL_SETUSER)
> + dec_nr_user_mounts();
> + return ERR_PTR(-ENOMEM);
> }
>
> static inline void __mntput(struct vfsmount *mnt)
> @@ -986,22 +1020,26 @@ asmlinkage long sys_oldumount(char __use
>
> #endif
>
> -static int mount_is_safe(struct nameidata *nd)
> +/*
> + * Conditions for unprivileged mounts are:
> + * - mountpoint is not a symlink
> + * - mountpoint is in a mount owned by the user
> + */
> +static bool permit_mount(struct nameidata *nd, int *flags)
> {
> + struct inode *inode = nd->path.dentry->d_inode;
> +
> if (capable(CAP_SYS_ADMIN))
> - return 0;
> - return -EPERM;
> -#ifdef notyet
> - if (S_ISLNK(nd->path.dentry->d_inode->i_mode))
> - return -EPERM;
> - if (nd->path.dentry->d_inode->i_mode & S_ISVTX) {
> - if (current->uid != nd->path.dentry->d_inode->i_uid)
> - return -EPERM;
> - }
> - if (vfs_permission(nd, MAY_WRITE))
> - return -EPERM;
> - return 0;
> -#endif
> + return true;
> +
> + if (S_ISLNK(inode->i_mode))
> + return false;
> +
> + if (!is_mount_owner(nd->path.mnt, current->fsuid))
> + return false;
> +
> + *flags |= MS_SETUSER;
> + return true;
> }
>
> static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
> @@ -1245,9 +1283,10 @@ static int do_loopback(struct nameidata
> int clone_fl;
> struct nameidata old_nd;
> struct vfsmount *mnt = NULL;
> - int err = mount_is_safe(nd);
> - if (err)
> - return err;
> + int err;
> +
> + if (!permit_mount(nd, &flags))
> + return -EPERM;
> if (!old_name || !*old_name)
> return -EINVAL;
> err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
>
> --
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [patch 5/9] unprivileged mounts: allow unprivileged bind mounts
2008-01-08 11:35 ` [patch 5/9] unprivileged mounts: allow unprivileged bind mounts Miklos Szeredi
` (2 preceding siblings ...)
[not found] ` <20080108113626.895583537-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
@ 2008-01-14 22:42 ` Serge E. Hallyn
3 siblings, 0 replies; 62+ messages in thread
From: Serge E. Hallyn @ 2008-01-14 22:42 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel,
linux-kernel, containers, util-linux-ng
Quoting Miklos Szeredi (miklos@szeredi.hu):
> From: Miklos Szeredi <mszeredi@suse.cz>
>
> Allow bind mounts to unprivileged users if the following conditions are met:
>
> - mountpoint is not a symlink
> - parent mount is owned by the user
> - the number of user mounts is below the maximum
>
> Unprivileged mounts imply MS_SETUSER, and will also have the "nosuid" and
> "nodev" mount flags set.
>
> In particular, if mounting process doesn't have CAP_SETUID capability,
> then the "nosuid" flag will be added, and if it doesn't have CAP_MKNOD
> capability, then the "nodev" flag will be added.
>
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Serge Hallyn <serue@us.ibm.com>
> ---
>
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c 2008-01-04 13:47:49.000000000 +0100
> +++ linux/fs/namespace.c 2008-01-04 13:48:01.000000000 +0100
> @@ -487,11 +487,34 @@ static void dec_nr_user_mounts(void)
> spin_unlock(&vfsmount_lock);
> }
>
> -static void set_mnt_user(struct vfsmount *mnt)
> +static int reserve_user_mount(void)
> +{
> + int err = 0;
> +
> + spin_lock(&vfsmount_lock);
> + if (nr_user_mounts >= max_user_mounts && !capable(CAP_SYS_ADMIN))
> + err = -EPERM;
> + else
> + nr_user_mounts++;
> + spin_unlock(&vfsmount_lock);
> + return err;
> +}
> +
> +static void __set_mnt_user(struct vfsmount *mnt)
> {
> BUG_ON(mnt->mnt_flags & MNT_USER);
> mnt->mnt_uid = current->fsuid;
> mnt->mnt_flags |= MNT_USER;
> +
> + if (!capable(CAP_SETUID))
> + mnt->mnt_flags |= MNT_NOSUID;
> + if (!capable(CAP_MKNOD))
> + mnt->mnt_flags |= MNT_NODEV;
> +}
> +
> +static void set_mnt_user(struct vfsmount *mnt)
> +{
> + __set_mnt_user(mnt);
> spin_lock(&vfsmount_lock);
> nr_user_mounts++;
> spin_unlock(&vfsmount_lock);
> @@ -510,10 +533,16 @@ static struct vfsmount *clone_mnt(struct
> int flag)
> {
> struct super_block *sb = old->mnt_sb;
> - struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
> + struct vfsmount *mnt;
>
> + if (flag & CL_SETUSER) {
> + int err = reserve_user_mount();
> + if (err)
> + return ERR_PTR(err);
> + }
> + mnt = alloc_vfsmnt(old->mnt_devname);
> if (!mnt)
> - return ERR_PTR(-ENOMEM);
> + goto alloc_failed;
>
> mnt->mnt_flags = old->mnt_flags;
> atomic_inc(&sb->s_active);
> @@ -525,7 +554,7 @@ static struct vfsmount *clone_mnt(struct
> /* don't copy the MNT_USER flag */
> mnt->mnt_flags &= ~MNT_USER;
> if (flag & CL_SETUSER)
> - set_mnt_user(mnt);
> + __set_mnt_user(mnt);
>
> if (flag & CL_SLAVE) {
> list_add(&mnt->mnt_slave, &old->mnt_slave_list);
> @@ -550,6 +579,11 @@ static struct vfsmount *clone_mnt(struct
> spin_unlock(&vfsmount_lock);
> }
> return mnt;
> +
> + alloc_failed:
> + if (flag & CL_SETUSER)
> + dec_nr_user_mounts();
> + return ERR_PTR(-ENOMEM);
> }
>
> static inline void __mntput(struct vfsmount *mnt)
> @@ -986,22 +1020,26 @@ asmlinkage long sys_oldumount(char __use
>
> #endif
>
> -static int mount_is_safe(struct nameidata *nd)
> +/*
> + * Conditions for unprivileged mounts are:
> + * - mountpoint is not a symlink
> + * - mountpoint is in a mount owned by the user
> + */
> +static bool permit_mount(struct nameidata *nd, int *flags)
> {
> + struct inode *inode = nd->path.dentry->d_inode;
> +
> if (capable(CAP_SYS_ADMIN))
> - return 0;
> - return -EPERM;
> -#ifdef notyet
> - if (S_ISLNK(nd->path.dentry->d_inode->i_mode))
> - return -EPERM;
> - if (nd->path.dentry->d_inode->i_mode & S_ISVTX) {
> - if (current->uid != nd->path.dentry->d_inode->i_uid)
> - return -EPERM;
> - }
> - if (vfs_permission(nd, MAY_WRITE))
> - return -EPERM;
> - return 0;
> -#endif
> + return true;
> +
> + if (S_ISLNK(inode->i_mode))
> + return false;
> +
> + if (!is_mount_owner(nd->path.mnt, current->fsuid))
> + return false;
> +
> + *flags |= MS_SETUSER;
> + return true;
> }
>
> static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
> @@ -1245,9 +1283,10 @@ static int do_loopback(struct nameidata
> int clone_fl;
> struct nameidata old_nd;
> struct vfsmount *mnt = NULL;
> - int err = mount_is_safe(nd);
> - if (err)
> - return err;
> + int err;
> +
> + if (!permit_mount(nd, &flags))
> + return -EPERM;
> if (!old_name || !*old_name)
> return -EINVAL;
> err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
>
> --
^ permalink raw reply [flat|nested] 62+ messages in thread
* [patch 6/9] unprivileged mounts: allow unprivileged mounts
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
` (4 preceding siblings ...)
2008-01-08 11:35 ` [patch 5/9] unprivileged mounts: allow unprivileged bind mounts Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
2008-01-09 11:11 ` Karel Zak
2008-01-14 22:58 ` Serge E. Hallyn
2008-01-08 11:35 ` [patch 7/9] unprivileged mounts: allow unprivileged fuse mounts Miklos Szeredi
` (2 subsequent siblings)
8 siblings, 2 replies; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-allow-unprivileged-mounts.patch --]
[-- Type: text/plain, Size: 5594 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
Define a new fs flag FS_SAFE, which denotes, that unprivileged mounting of
this filesystem may not constitute a security problem.
Since most filesystems haven't been designed with unprivileged mounting in
mind, a thorough audit is needed before setting this flag.
For "safe" filesystems also allow unprivileged forced unmounting.
Move subtype handling from do_kern_mount() into do_new_mount(). All
other callers are kernel-internal and do not need subtype support.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-03 21:20:11.000000000 +0100
+++ linux/fs/namespace.c 2008-01-03 21:21:06.000000000 +0100
@@ -960,14 +960,16 @@ static bool is_mount_owner(struct vfsmou
/*
* umount is permitted for
* - sysadmin
- * - mount owner, if not forced umount
+ * - mount owner
+ * o if not forced umount,
+ * o if forced umount, and filesystem is "safe"
*/
static bool permit_umount(struct vfsmount *mnt, int flags)
{
if (capable(CAP_SYS_ADMIN))
return true;
- if (flags & MNT_FORCE)
+ if ((flags & MNT_FORCE) && !(mnt->mnt_sb->s_type->fs_flags & FS_SAFE))
return false;
return is_mount_owner(mnt, current->fsuid);
@@ -1025,13 +1027,17 @@ asmlinkage long sys_oldumount(char __use
* - mountpoint is not a symlink
* - mountpoint is in a mount owned by the user
*/
-static bool permit_mount(struct nameidata *nd, int *flags)
+static bool permit_mount(struct nameidata *nd, struct file_system_type *type,
+ int *flags)
{
struct inode *inode = nd->path.dentry->d_inode;
if (capable(CAP_SYS_ADMIN))
return true;
+ if (type && !(type->fs_flags & FS_SAFE))
+ return false;
+
if (S_ISLNK(inode->i_mode))
return false;
@@ -1285,7 +1291,7 @@ static int do_loopback(struct nameidata
struct vfsmount *mnt = NULL;
int err;
- if (!permit_mount(nd, &flags))
+ if (!permit_mount(nd, NULL, &flags))
return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
@@ -1466,30 +1472,76 @@ out:
return err;
}
+static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
+{
+ int err;
+ const char *subtype = strchr(fstype, '.');
+ if (subtype) {
+ subtype++;
+ err = -EINVAL;
+ if (!subtype[0])
+ goto err;
+ } else
+ subtype = "";
+
+ mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!mnt->mnt_sb->s_subtype)
+ goto err;
+ return mnt;
+
+ err:
+ mntput(mnt);
+ return ERR_PTR(err);
+}
+
/*
* create a new mount for userspace and request it to be added into the
* namespace's tree
*/
-static int do_new_mount(struct nameidata *nd, char *type, int flags,
+static int do_new_mount(struct nameidata *nd, char *fstype, int flags,
int mnt_flags, char *name, void *data)
{
+ int err;
struct vfsmount *mnt;
+ struct file_system_type *type;
- if (!type || !memchr(type, 0, PAGE_SIZE))
+ if (!fstype || !memchr(fstype, 0, PAGE_SIZE))
return -EINVAL;
- /* we need capabilities... */
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- mnt = do_kern_mount(type, flags & ~MS_SETUSER, name, data);
- if (IS_ERR(mnt))
+ type = get_fs_type(fstype);
+ if (!type)
+ return -ENODEV;
+
+ err = -EPERM;
+ if (!permit_mount(nd, type, &flags))
+ goto out_put_filesystem;
+
+ if (flags & MS_SETUSER) {
+ err = reserve_user_mount();
+ if (err)
+ goto out_put_filesystem;
+ }
+
+ mnt = vfs_kern_mount(type, flags & ~MS_SETUSER, name, data);
+ if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+ !mnt->mnt_sb->s_subtype)
+ mnt = fs_set_subtype(mnt, fstype);
+ put_filesystem(type);
+ if (IS_ERR(mnt)) {
+ if (flags & MS_SETUSER)
+ dec_nr_user_mounts();
return PTR_ERR(mnt);
+ }
if (flags & MS_SETUSER)
- set_mnt_user(mnt);
+ __set_mnt_user(mnt);
return do_add_mount(mnt, nd, mnt_flags, NULL);
+
+ out_put_filesystem:
+ put_filesystem(type);
+ return err;
}
/*
@@ -1520,7 +1572,7 @@ int do_add_mount(struct vfsmount *newmnt
if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
goto unlock;
- /* MNT_USER was set earlier */
+ /* some flags may have been set earlier */
newmnt->mnt_flags |= mnt_flags;
if ((err = graft_tree(newmnt, nd)))
goto unlock;
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h 2008-01-03 21:15:35.000000000 +0100
+++ linux/include/linux/fs.h 2008-01-03 21:21:06.000000000 +0100
@@ -96,6 +96,7 @@ extern int dir_notify_enable;
#define FS_REQUIRES_DEV 1
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
+#define FS_SAFE 8 /* Safe to mount by unprivileged users */
#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move()
* during rename() internally.
Index: linux/fs/super.c
===================================================================
--- linux.orig/fs/super.c 2008-01-02 21:42:10.000000000 +0100
+++ linux/fs/super.c 2008-01-03 21:21:06.000000000 +0100
@@ -906,29 +906,6 @@ out:
EXPORT_SYMBOL_GPL(vfs_kern_mount);
-static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
-{
- int err;
- const char *subtype = strchr(fstype, '.');
- if (subtype) {
- subtype++;
- err = -EINVAL;
- if (!subtype[0])
- goto err;
- } else
- subtype = "";
-
- mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
- err = -ENOMEM;
- if (!mnt->mnt_sb->s_subtype)
- goto err;
- return mnt;
-
- err:
- mntput(mnt);
- return ERR_PTR(err);
-}
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 6/9] unprivileged mounts: allow unprivileged mounts
2008-01-08 11:35 ` [patch 6/9] unprivileged mounts: allow unprivileged mounts Miklos Szeredi
@ 2008-01-09 11:11 ` Karel Zak
[not found] ` <20080109111120.GI3926-CxBs/XhZ2BtHjqfyn1fVYA@public.gmane.org>
2008-01-14 22:58 ` Serge E. Hallyn
1 sibling, 1 reply; 62+ messages in thread
From: Karel Zak @ 2008-01-09 11:11 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, linux-fsdevel, linux-kernel,
containers, util-linux-ng
On Tue, Jan 08, 2008 at 12:35:08PM +0100, Miklos Szeredi wrote:
> Define a new fs flag FS_SAFE, which denotes, that unprivileged mounting of
> this filesystem may not constitute a security problem.
>
> Since most filesystems haven't been designed with unprivileged mounting in
> mind, a thorough audit is needed before setting this flag.
>
> For "safe" filesystems also allow unprivileged forced unmounting.
What about to list "safe" filesystems anywhere in /proc/fs/ ? I think
it's very important information for admins.
Note, your patch for mount(8) is always trying to use unprivileged
mount(2) for non-root users. It's overkill when unprivileged mounts are
supported for bind mounts and fuse only. It would be nice to check
if FS is "safe" before switch to unprivileged mode.
The "safe" definition is also very subjective and it depends on your
level of paranoia. There should be a way (e.g. /proc) how control and
modify the list of "safe" filesystems. For example I have no problem
to mark cifs as "safe" for my home server.
Karel
--
Karel Zak <kzak@redhat.com>
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 6/9] unprivileged mounts: allow unprivileged mounts
2008-01-08 11:35 ` [patch 6/9] unprivileged mounts: allow unprivileged mounts Miklos Szeredi
2008-01-09 11:11 ` Karel Zak
@ 2008-01-14 22:58 ` Serge E. Hallyn
1 sibling, 0 replies; 62+ messages in thread
From: Serge E. Hallyn @ 2008-01-14 22:58 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel,
linux-kernel, containers, util-linux-ng
Quoting Miklos Szeredi (miklos@szeredi.hu):
> From: Miklos Szeredi <mszeredi@suse.cz>
>
> Define a new fs flag FS_SAFE, which denotes, that unprivileged mounting of
> this filesystem may not constitute a security problem.
>
> Since most filesystems haven't been designed with unprivileged mounting in
> mind, a thorough audit is needed before setting this flag.
>
> For "safe" filesystems also allow unprivileged forced unmounting.
>
> Move subtype handling from do_kern_mount() into do_new_mount(). All
> other callers are kernel-internal and do not need subtype support.
>
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
This patch itself doesn't assign FS_SAFE to any filesystems, so
presuming that there is such a thing as an fs safe for users to
mount, and/or users sign their systems away through a sysctl,
this patch in itself appears right.
Acked-by: Serge Hallyn <serue@us.ibm.com>
> ---
>
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c 2008-01-03 21:20:11.000000000 +0100
> +++ linux/fs/namespace.c 2008-01-03 21:21:06.000000000 +0100
> @@ -960,14 +960,16 @@ static bool is_mount_owner(struct vfsmou
> /*
> * umount is permitted for
> * - sysadmin
> - * - mount owner, if not forced umount
> + * - mount owner
> + * o if not forced umount,
> + * o if forced umount, and filesystem is "safe"
> */
> static bool permit_umount(struct vfsmount *mnt, int flags)
> {
> if (capable(CAP_SYS_ADMIN))
> return true;
>
> - if (flags & MNT_FORCE)
> + if ((flags & MNT_FORCE) && !(mnt->mnt_sb->s_type->fs_flags & FS_SAFE))
> return false;
>
> return is_mount_owner(mnt, current->fsuid);
> @@ -1025,13 +1027,17 @@ asmlinkage long sys_oldumount(char __use
> * - mountpoint is not a symlink
> * - mountpoint is in a mount owned by the user
> */
> -static bool permit_mount(struct nameidata *nd, int *flags)
> +static bool permit_mount(struct nameidata *nd, struct file_system_type *type,
> + int *flags)
> {
> struct inode *inode = nd->path.dentry->d_inode;
>
> if (capable(CAP_SYS_ADMIN))
> return true;
>
> + if (type && !(type->fs_flags & FS_SAFE))
> + return false;
> +
> if (S_ISLNK(inode->i_mode))
> return false;
>
> @@ -1285,7 +1291,7 @@ static int do_loopback(struct nameidata
> struct vfsmount *mnt = NULL;
> int err;
>
> - if (!permit_mount(nd, &flags))
> + if (!permit_mount(nd, NULL, &flags))
> return -EPERM;
> if (!old_name || !*old_name)
> return -EINVAL;
> @@ -1466,30 +1472,76 @@ out:
> return err;
> }
>
> +static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
> +{
> + int err;
> + const char *subtype = strchr(fstype, '.');
> + if (subtype) {
> + subtype++;
> + err = -EINVAL;
> + if (!subtype[0])
> + goto err;
> + } else
> + subtype = "";
> +
> + mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
> + err = -ENOMEM;
> + if (!mnt->mnt_sb->s_subtype)
> + goto err;
> + return mnt;
> +
> + err:
> + mntput(mnt);
> + return ERR_PTR(err);
> +}
> +
> /*
> * create a new mount for userspace and request it to be added into the
> * namespace's tree
> */
> -static int do_new_mount(struct nameidata *nd, char *type, int flags,
> +static int do_new_mount(struct nameidata *nd, char *fstype, int flags,
> int mnt_flags, char *name, void *data)
> {
> + int err;
> struct vfsmount *mnt;
> + struct file_system_type *type;
>
> - if (!type || !memchr(type, 0, PAGE_SIZE))
> + if (!fstype || !memchr(fstype, 0, PAGE_SIZE))
> return -EINVAL;
>
> - /* we need capabilities... */
> - if (!capable(CAP_SYS_ADMIN))
> - return -EPERM;
> -
> - mnt = do_kern_mount(type, flags & ~MS_SETUSER, name, data);
> - if (IS_ERR(mnt))
> + type = get_fs_type(fstype);
> + if (!type)
> + return -ENODEV;
> +
> + err = -EPERM;
> + if (!permit_mount(nd, type, &flags))
> + goto out_put_filesystem;
> +
> + if (flags & MS_SETUSER) {
> + err = reserve_user_mount();
> + if (err)
> + goto out_put_filesystem;
> + }
> +
> + mnt = vfs_kern_mount(type, flags & ~MS_SETUSER, name, data);
> + if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
> + !mnt->mnt_sb->s_subtype)
> + mnt = fs_set_subtype(mnt, fstype);
> + put_filesystem(type);
> + if (IS_ERR(mnt)) {
> + if (flags & MS_SETUSER)
> + dec_nr_user_mounts();
> return PTR_ERR(mnt);
> + }
>
> if (flags & MS_SETUSER)
> - set_mnt_user(mnt);
> + __set_mnt_user(mnt);
>
> return do_add_mount(mnt, nd, mnt_flags, NULL);
> +
> + out_put_filesystem:
> + put_filesystem(type);
> + return err;
> }
>
> /*
> @@ -1520,7 +1572,7 @@ int do_add_mount(struct vfsmount *newmnt
> if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
> goto unlock;
>
> - /* MNT_USER was set earlier */
> + /* some flags may have been set earlier */
> newmnt->mnt_flags |= mnt_flags;
> if ((err = graft_tree(newmnt, nd)))
> goto unlock;
> Index: linux/include/linux/fs.h
> ===================================================================
> --- linux.orig/include/linux/fs.h 2008-01-03 21:15:35.000000000 +0100
> +++ linux/include/linux/fs.h 2008-01-03 21:21:06.000000000 +0100
> @@ -96,6 +96,7 @@ extern int dir_notify_enable;
> #define FS_REQUIRES_DEV 1
> #define FS_BINARY_MOUNTDATA 2
> #define FS_HAS_SUBTYPE 4
> +#define FS_SAFE 8 /* Safe to mount by unprivileged users */
> #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
> #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move()
> * during rename() internally.
> Index: linux/fs/super.c
> ===================================================================
> --- linux.orig/fs/super.c 2008-01-02 21:42:10.000000000 +0100
> +++ linux/fs/super.c 2008-01-03 21:21:06.000000000 +0100
> @@ -906,29 +906,6 @@ out:
>
> EXPORT_SYMBOL_GPL(vfs_kern_mount);
>
> -static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
> -{
> - int err;
> - const char *subtype = strchr(fstype, '.');
> - if (subtype) {
> - subtype++;
> - err = -EINVAL;
> - if (!subtype[0])
> - goto err;
> - } else
> - subtype = "";
> -
> - mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
> - err = -ENOMEM;
> - if (!mnt->mnt_sb->s_subtype)
> - goto err;
> - return mnt;
> -
> - err:
> - mntput(mnt);
> - return ERR_PTR(err);
> -}
> -
> struct vfsmount *
> do_kern_mount(const char *fstype, int flags, const char *name, void *data)
> {
> @@ -937,9 +914,6 @@ do_kern_mount(const char *fstype, int fl
> if (!type)
> return ERR_PTR(-ENODEV);
> mnt = vfs_kern_mount(type, flags, name, data);
> - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
> - !mnt->mnt_sb->s_subtype)
> - mnt = fs_set_subtype(mnt, fstype);
> put_filesystem(type);
> return mnt;
> }
>
> --
^ permalink raw reply [flat|nested] 62+ messages in thread
* [patch 7/9] unprivileged mounts: allow unprivileged fuse mounts
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
` (5 preceding siblings ...)
2008-01-08 11:35 ` [patch 6/9] unprivileged mounts: allow unprivileged mounts Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
2008-01-08 21:46 ` Pavel Machek
[not found] ` <20080108113630.861045063-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
2008-01-08 11:35 ` [patch 8/9] unprivileged mounts: propagation: inherit owner from parent Miklos Szeredi
2008-01-08 11:35 ` [patch 9/9] unprivileged mounts: add "no submounts" flag Miklos Szeredi
8 siblings, 2 replies; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-allow-unprivileged-fuse-mounts.patch --]
[-- Type: text/plain, Size: 2598 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
Use FS_SAFE for "fuse" fs type, but not for "fuseblk".
FUSE was designed from the beginning to be safe for unprivileged users. This
has also been verified in practice over many years. In addition unprivileged
mounts require the parent mount to be owned by the user, which is more strict
than the current userspace policy.
This will enable future installations to remove the suid-root fusermount
utility.
Don't require the "user_id=" and "group_id=" options for unprivileged mounts,
but if they are present, verify them for sanity.
Disallow the "allow_other" option for unprivileged mounts.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/fuse/inode.c
===================================================================
--- linux.orig/fs/fuse/inode.c 2008-01-03 17:13:13.000000000 +0100
+++ linux/fs/fuse/inode.c 2008-01-03 21:28:01.000000000 +0100
@@ -357,6 +357,19 @@ static int parse_fuse_opt(char *opt, str
d->max_read = ~0;
d->blksize = 512;
+ /*
+ * For unprivileged mounts use current uid/gid. Still allow
+ * "user_id" and "group_id" options for compatibility, but
+ * only if they match these values.
+ */
+ if (!capable(CAP_SYS_ADMIN)) {
+ d->user_id = current->uid;
+ d->user_id_present = 1;
+ d->group_id = current->gid;
+ d->group_id_present = 1;
+
+ }
+
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
@@ -385,6 +398,8 @@ static int parse_fuse_opt(char *opt, str
case OPT_USER_ID:
if (match_int(&args[0], &value))
return 0;
+ if (d->user_id_present && d->user_id != value)
+ return 0;
d->user_id = value;
d->user_id_present = 1;
break;
@@ -392,6 +407,8 @@ static int parse_fuse_opt(char *opt, str
case OPT_GROUP_ID:
if (match_int(&args[0], &value))
return 0;
+ if (d->group_id_present && d->group_id != value)
+ return 0;
d->group_id = value;
d->group_id_present = 1;
break;
@@ -596,6 +613,10 @@ static int fuse_fill_super(struct super_
if (!parse_fuse_opt((char *) data, &d, is_bdev))
return -EINVAL;
+ /* This is a privileged option */
+ if ((d.flags & FUSE_ALLOW_OTHER) && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
if (is_bdev) {
#ifdef CONFIG_BLOCK
if (!sb_set_blocksize(sb, d.blksize))
@@ -696,9 +717,9 @@ static int fuse_get_sb(struct file_syste
static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
- .fs_flags = FS_HAS_SUBTYPE,
.get_sb = fuse_get_sb,
.kill_sb = kill_anon_super,
+ .fs_flags = FS_HAS_SUBTYPE | FS_SAFE,
};
#ifdef CONFIG_BLOCK
--
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 7/9] unprivileged mounts: allow unprivileged fuse mounts
2008-01-08 11:35 ` [patch 7/9] unprivileged mounts: allow unprivileged fuse mounts Miklos Szeredi
@ 2008-01-08 21:46 ` Pavel Machek
[not found] ` <20080108214625.GE5050-+ZI9xUNit7I@public.gmane.org>
[not found] ` <20080108113630.861045063-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
1 sibling, 1 reply; 62+ messages in thread
From: Pavel Machek @ 2008-01-08 21:46 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel,
linux-kernel, containers, util-linux-ng
On Tue 2008-01-08 12:35:09, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@suse.cz>
>
> Use FS_SAFE for "fuse" fs type, but not for "fuseblk".
>
> FUSE was designed from the beginning to be safe for unprivileged users. This
> has also been verified in practice over many years. In addition unprivileged
Eh? So 'kill -9 no longer works' and 'suspend no longer works' is not
considered important enough to even mention?
'updatedb no longer works' is not a problem?
Are you ready to offer shell account for bugtraq people to see how
long it survives?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
^ permalink raw reply [flat|nested] 62+ messages in thread[parent not found: <20080108113630.861045063-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>]
* Re: [patch 7/9] unprivileged mounts: allow unprivileged fuse mounts
[not found] ` <20080108113630.861045063-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
@ 2008-01-14 23:24 ` Serge E. Hallyn
2008-01-15 10:29 ` Miklos Szeredi
0 siblings, 1 reply; 62+ messages in thread
From: Serge E. Hallyn @ 2008-01-14 23:24 UTC (permalink / raw)
To: Miklos Szeredi
Cc: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, hch-wEGCiKHe2LqWVfeAwA7xHQ,
serue-r/Jw6+rmf7HQT0dZR+AlfA, viro-rfM+Q5joDG/XmaaqVzeoHQ,
ebiederm-aS9lmoZGLiVWk0Htik3J/w, kzak-H+wXaHxf7aLQT0dZR+AlfA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
containers-qjLDD68F18O7TbgM5vRIOg,
util-linux-ng-u79uwXL29TY76Z2rM5mHXA
Quoting Miklos Szeredi (miklos-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org):
> From: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>
>
> Use FS_SAFE for "fuse" fs type, but not for "fuseblk".
>
> FUSE was designed from the beginning to be safe for unprivileged users. This
> has also been verified in practice over many years. In addition unprivileged
> mounts require the parent mount to be owned by the user, which is more strict
> than the current userspace policy.
>
> This will enable future installations to remove the suid-root fusermount
> utility.
>
> Don't require the "user_id=" and "group_id=" options for unprivileged mounts,
> but if they are present, verify them for sanity.
>
> Disallow the "allow_other" option for unprivileged mounts.
>
> Signed-off-by: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>
Sounds like a sysctl to enable FS_SAFE for fuse will make this patch
acceptable to everyone?
> ---
>
> Index: linux/fs/fuse/inode.c
> ===================================================================
> --- linux.orig/fs/fuse/inode.c 2008-01-03 17:13:13.000000000 +0100
> +++ linux/fs/fuse/inode.c 2008-01-03 21:28:01.000000000 +0100
> @@ -357,6 +357,19 @@ static int parse_fuse_opt(char *opt, str
> d->max_read = ~0;
> d->blksize = 512;
>
> + /*
> + * For unprivileged mounts use current uid/gid. Still allow
> + * "user_id" and "group_id" options for compatibility, but
> + * only if they match these values.
> + */
> + if (!capable(CAP_SYS_ADMIN)) {
> + d->user_id = current->uid;
> + d->user_id_present = 1;
> + d->group_id = current->gid;
> + d->group_id_present = 1;
> +
> + }
> +
> while ((p = strsep(&opt, ",")) != NULL) {
> int token;
> int value;
> @@ -385,6 +398,8 @@ static int parse_fuse_opt(char *opt, str
> case OPT_USER_ID:
> if (match_int(&args[0], &value))
> return 0;
> + if (d->user_id_present && d->user_id != value)
> + return 0;
> d->user_id = value;
> d->user_id_present = 1;
> break;
> @@ -392,6 +407,8 @@ static int parse_fuse_opt(char *opt, str
> case OPT_GROUP_ID:
> if (match_int(&args[0], &value))
> return 0;
> + if (d->group_id_present && d->group_id != value)
> + return 0;
> d->group_id = value;
> d->group_id_present = 1;
> break;
> @@ -596,6 +613,10 @@ static int fuse_fill_super(struct super_
> if (!parse_fuse_opt((char *) data, &d, is_bdev))
> return -EINVAL;
>
> + /* This is a privileged option */
> + if ((d.flags & FUSE_ALLOW_OTHER) && !capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> if (is_bdev) {
> #ifdef CONFIG_BLOCK
> if (!sb_set_blocksize(sb, d.blksize))
> @@ -696,9 +717,9 @@ static int fuse_get_sb(struct file_syste
> static struct file_system_type fuse_fs_type = {
> .owner = THIS_MODULE,
> .name = "fuse",
> - .fs_flags = FS_HAS_SUBTYPE,
> .get_sb = fuse_get_sb,
> .kill_sb = kill_anon_super,
> + .fs_flags = FS_HAS_SUBTYPE | FS_SAFE,
> };
>
> #ifdef CONFIG_BLOCK
>
> --
^ permalink raw reply [flat|nested] 62+ messages in thread* Re: [patch 7/9] unprivileged mounts: allow unprivileged fuse mounts
2008-01-14 23:24 ` Serge E. Hallyn
@ 2008-01-15 10:29 ` Miklos Szeredi
[not found] ` <E1JEj3N-0000vj-QA-8f8m9JG5TPIdUIPVzhDTVZP2KDSNp7ea@public.gmane.org>
0 siblings, 1 reply; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-15 10:29 UTC (permalink / raw)
To: serue
Cc: miklos, akpm, hch, serue, viro, ebiederm, kzak, linux-fsdevel,
linux-kernel, containers, util-linux-ng
> Sounds like a sysctl to enable FS_SAFE for fuse will make this patch
> acceptable to everyone?
I think the most generic approach, is to be able to set "safeness" for
any fs type, not just fuse (Karel's suggestion).
E.g:
echo 1 > /proc/sys/fs/types/cifs/safe
This would also provide a way to query the FS_SAFE flag.
Miklos
^ permalink raw reply [flat|nested] 62+ messages in thread
* [patch 8/9] unprivileged mounts: propagation: inherit owner from parent
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
` (6 preceding siblings ...)
2008-01-08 11:35 ` [patch 7/9] unprivileged mounts: allow unprivileged fuse mounts Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
[not found] ` <20080108113632.895453887-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
2008-01-08 11:35 ` [patch 9/9] unprivileged mounts: add "no submounts" flag Miklos Szeredi
8 siblings, 1 reply; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-propagation-inherit-owner-from-parent.patch --]
[-- Type: text/plain, Size: 7062 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
On mount propagation, let the owner of the clone be inherited from the
parent into which it has been propagated. Also if the parent has the
"nosuid" flag, set this flag for the child as well.
This makes sense for example, when propagation is set up from the
initial namespace into a per-user namespace, where some or all of the
mounts may be owned by the user.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-04 13:48:14.000000000 +0100
+++ linux/fs/namespace.c 2008-01-04 13:49:52.000000000 +0100
@@ -500,10 +500,10 @@ static int reserve_user_mount(void)
return err;
}
-static void __set_mnt_user(struct vfsmount *mnt)
+static void __set_mnt_user(struct vfsmount *mnt, uid_t owner)
{
BUG_ON(mnt->mnt_flags & MNT_USER);
- mnt->mnt_uid = current->fsuid;
+ mnt->mnt_uid = owner;
mnt->mnt_flags |= MNT_USER;
if (!capable(CAP_SETUID))
@@ -514,7 +514,7 @@ static void __set_mnt_user(struct vfsmou
static void set_mnt_user(struct vfsmount *mnt)
{
- __set_mnt_user(mnt);
+ __set_mnt_user(mnt, current->fsuid);
spin_lock(&vfsmount_lock);
nr_user_mounts++;
spin_unlock(&vfsmount_lock);
@@ -530,7 +530,7 @@ static void clear_mnt_user(struct vfsmou
}
static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
- int flag)
+ int flag, uid_t owner)
{
struct super_block *sb = old->mnt_sb;
struct vfsmount *mnt;
@@ -554,7 +554,10 @@ static struct vfsmount *clone_mnt(struct
/* don't copy the MNT_USER flag */
mnt->mnt_flags &= ~MNT_USER;
if (flag & CL_SETUSER)
- __set_mnt_user(mnt);
+ __set_mnt_user(mnt, owner);
+
+ if (flag & CL_NOSUID)
+ mnt->mnt_flags |= MNT_NOSUID;
if (flag & CL_SLAVE) {
list_add(&mnt->mnt_slave, &old->mnt_slave_list);
@@ -1060,7 +1063,7 @@ static int lives_below_in_same_fs(struct
}
struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
- int flag)
+ int flag, uid_t owner)
{
struct vfsmount *res, *p, *q, *r, *s;
struct nameidata nd;
@@ -1068,7 +1071,7 @@ struct vfsmount *copy_tree(struct vfsmou
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
return ERR_PTR(-EPERM);
- res = q = clone_mnt(mnt, dentry, flag);
+ res = q = clone_mnt(mnt, dentry, flag, owner);
if (IS_ERR(q))
goto error;
q->mnt_mountpoint = mnt->mnt_mountpoint;
@@ -1090,7 +1093,7 @@ struct vfsmount *copy_tree(struct vfsmou
p = s;
nd.path.mnt = q;
nd.path.dentry = p->mnt_mountpoint;
- q = clone_mnt(p, p->mnt_root, flag);
+ q = clone_mnt(p, p->mnt_root, flag, owner);
if (IS_ERR(q))
goto error;
spin_lock(&vfsmount_lock);
@@ -1115,7 +1118,7 @@ struct vfsmount *collect_mounts(struct v
{
struct vfsmount *tree;
down_read(&namespace_sem);
- tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
+ tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE, 0);
up_read(&namespace_sem);
return tree;
}
@@ -1286,7 +1289,8 @@ static int do_change_type(struct nameida
*/
static int do_loopback(struct nameidata *nd, char *old_name, int flags)
{
- int clone_fl;
+ int clone_fl = 0;
+ uid_t owner = 0;
struct nameidata old_nd;
struct vfsmount *mnt = NULL;
int err;
@@ -1307,11 +1311,17 @@ static int do_loopback(struct nameidata
if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt))
goto out;
- clone_fl = (flags & MS_SETUSER) ? CL_SETUSER : 0;
+ if (flags & MS_SETUSER) {
+ clone_fl |= CL_SETUSER;
+ owner = current->fsuid;
+ }
+
if (flags & MS_REC)
- mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
+ mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, clone_fl,
+ owner);
else
- mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, clone_fl);
+ mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, clone_fl,
+ owner);
err = PTR_ERR(mnt);
if (IS_ERR(mnt))
@@ -1535,7 +1545,7 @@ static int do_new_mount(struct nameidata
}
if (flags & MS_SETUSER)
- __set_mnt_user(mnt);
+ __set_mnt_user(mnt, current->fsuid);
return do_add_mount(mnt, nd, mnt_flags, NULL);
@@ -1931,7 +1941,7 @@ static struct mnt_namespace *dup_mnt_ns(
down_write(&namespace_sem);
/* First pass: copy the tree topology */
new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
- CL_COPY_ALL | CL_EXPIRE);
+ CL_COPY_ALL | CL_EXPIRE, 0);
if (IS_ERR(new_ns->root)) {
up_write(&namespace_sem);
kfree(new_ns);
Index: linux/fs/pnode.c
===================================================================
--- linux.orig/fs/pnode.c 2008-01-04 13:47:49.000000000 +0100
+++ linux/fs/pnode.c 2008-01-04 13:49:12.000000000 +0100
@@ -181,15 +181,28 @@ int propagate_mnt(struct vfsmount *dest_
for (m = propagation_next(dest_mnt, dest_mnt); m;
m = propagation_next(m, dest_mnt)) {
- int type;
+ int clflags;
+ uid_t owner = 0;
struct vfsmount *source;
if (IS_MNT_NEW(m))
continue;
- source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+ source = get_source(m, prev_dest_mnt, prev_src_mnt, &clflags);
- child = copy_tree(source, source->mnt_root, type);
+ if (m->mnt_flags & MNT_USER) {
+ clflags |= CL_SETUSER;
+ owner = m->mnt_uid;
+
+ /*
+ * If propagating into a user mount which doesn't
+ * allow suid, then make sure, the child(ren) won't
+ * allow suid either
+ */
+ if (m->mnt_flags & MNT_NOSUID)
+ clflags |= CL_NOSUID;
+ }
+ child = copy_tree(source, source->mnt_root, clflags, owner);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
list_splice(tree_list, tmp_list.prev);
Index: linux/fs/pnode.h
===================================================================
--- linux.orig/fs/pnode.h 2008-01-04 13:45:45.000000000 +0100
+++ linux/fs/pnode.h 2008-01-04 13:49:12.000000000 +0100
@@ -24,6 +24,7 @@
#define CL_PROPAGATION 0x10
#define CL_PRIVATE 0x20
#define CL_SETUSER 0x40
+#define CL_NOSUID 0x80
static inline void set_mnt_shared(struct vfsmount *mnt)
{
@@ -36,4 +37,6 @@ int propagate_mnt(struct vfsmount *, str
struct list_head *);
int propagate_umount(struct list_head *);
int propagate_mount_busy(struct vfsmount *, int);
+struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int, uid_t);
+
#endif /* _LINUX_PNODE_H */
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h 2008-01-04 13:48:14.000000000 +0100
+++ linux/include/linux/fs.h 2008-01-04 13:49:12.000000000 +0100
@@ -1492,7 +1492,6 @@ extern int may_umount(struct vfsmount *)
extern void umount_tree(struct vfsmount *, int, struct list_head *);
extern void release_mounts(struct list_head *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
-extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
struct vfsmount *);
extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *);
--
^ permalink raw reply [flat|nested] 62+ messages in thread* [patch 9/9] unprivileged mounts: add "no submounts" flag
2008-01-08 11:35 [patch 0/9] mount ownership and unprivileged mount syscall (v6) Miklos Szeredi
` (7 preceding siblings ...)
2008-01-08 11:35 ` [patch 8/9] unprivileged mounts: propagation: inherit owner from parent Miklos Szeredi
@ 2008-01-08 11:35 ` Miklos Szeredi
[not found] ` <20080108113634.382855604-sUDqSbJrdHQHWmgEVkV9KA@public.gmane.org>
8 siblings, 1 reply; 62+ messages in thread
From: Miklos Szeredi @ 2008-01-08 11:35 UTC (permalink / raw)
To: akpm, hch, serue, viro, ebiederm, kzak
Cc: linux-fsdevel, linux-kernel, containers, util-linux-ng
[-- Attachment #1: unprivileged-mounts-add-no-submounts-flag.patch --]
[-- Type: text/plain, Size: 2856 bytes --]
From: Miklos Szeredi <mszeredi@suse.cz>
Add a new mount flag "nomnt", which denies submounts for the owner.
This would be useful, if we want to support traditional /etc/fstab
based user mounts.
In this case mount(8) would still have to be suid-root, to check the
mountpoint against the user/users flag in /etc/fstab, but /etc/mtab
would no longer be mandatory for storing the actual owner of the
mount.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c 2008-01-04 13:49:52.000000000 +0100
+++ linux/fs/namespace.c 2008-01-04 13:50:28.000000000 +0100
@@ -694,6 +694,7 @@ static int show_vfsmnt(struct seq_file *
{ MNT_NOATIME, ",noatime" },
{ MNT_NODIRATIME, ",nodiratime" },
{ MNT_RELATIME, ",relatime" },
+ { MNT_NOMNT, ",nomnt" },
{ 0, NULL }
};
struct proc_fs_info *fs_infop;
@@ -1044,6 +1045,9 @@ static bool permit_mount(struct nameidat
if (S_ISLNK(inode->i_mode))
return false;
+ if (nd->path.mnt->mnt_flags & MNT_NOMNT)
+ return false;
+
if (!is_mount_owner(nd->path.mnt, current->fsuid))
return false;
@@ -1888,9 +1892,11 @@ long do_mount(char *dev_name, char *dir_
mnt_flags |= MNT_RELATIME;
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
+ if (flags & MS_NOMNT)
+ mnt_flags |= MNT_NOMNT;
- flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
- MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
+ flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_NOATIME |
+ MS_NODIRATIME | MS_RELATIME | MS_KERNMOUNT | MS_NOMNT);
/* ... and get the mountpoint */
retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h 2008-01-04 13:49:12.000000000 +0100
+++ linux/include/linux/fs.h 2008-01-04 13:49:58.000000000 +0100
@@ -130,6 +130,7 @@ extern int dir_notify_enable;
#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_SETUSER (1<<24) /* set mnt_uid to current user */
+#define MS_NOMNT (1<<25) /* don't allow unprivileged submounts */
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
Index: linux/include/linux/mount.h
===================================================================
--- linux.orig/include/linux/mount.h 2008-01-04 13:45:45.000000000 +0100
+++ linux/include/linux/mount.h 2008-01-04 13:49:58.000000000 +0100
@@ -30,6 +30,7 @@ struct mnt_namespace;
#define MNT_NODIRATIME 0x10
#define MNT_RELATIME 0x20
#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
+#define MNT_NOMNT 0x80
#define MNT_SHRINKABLE 0x100
#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */
--
^ permalink raw reply [flat|nested] 62+ messages in thread