* [PATCH review 2/6] vfs: Add setns support for the mount namespace
[not found] ` <1353322222-27060-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2012-11-19 10:50 ` Eric W. Biederman
2012-11-19 10:50 ` [PATCH review 3/6] vfs: Add a user namespace reference from struct mnt_namespace Eric W. Biederman
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Eric W. Biederman @ 2012-11-19 10:50 UTC (permalink / raw)
To: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Cc: Linux Containers, Eric W. Biederman,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Al Viro
From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
setns support for the mount namespace is a little tricky as an
arbitrary decision must be made about what to set fs->root and
fs->pwd to, as there is no expectation of a relationship between
the two mount namespaces. Therefore I arbitrarily find the root
mount point, and follow every mount on top of it to find the top
of the mount stack. Then I set fs->root and fs->pwd to that
location. The topmost root of the mount stack seems like a
reasonable place to be.
Bind mount support for the mount namespace inodes has the
possibility of creating circular dependencies between mount
namespaces. Circular dependencies can result in loops that
prevent mount namespaces from every being freed. I avoid
creating those circular dependencies by adding a sequence number
to the mount namespace and require all bind mounts be of a
younger mount namespace into an older mount namespace.
Add a helper function proc_ns_inode so it is possible to
detect when we are attempting to bind mound a namespace inode.
Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
fs/mount.h | 1 +
fs/namespace.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++
fs/proc/namespaces.c | 5 ++
include/linux/proc_fs.h | 7 +++
4 files changed, 108 insertions(+), 0 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9..e9c37dd 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -6,6 +6,7 @@ struct mnt_namespace {
atomic_t count;
struct mount * root;
struct list_head list;
+ u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
int event;
};
diff --git a/fs/namespace.c b/fs/namespace.c
index 2496062..d287e7e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
#include "pnode.h"
#include "internal.h"
@@ -1308,6 +1309,26 @@ static int mount_is_safe(struct path *path)
#endif
}
+static bool mnt_ns_loop(struct path *path)
+{
+ /* Could bind mounting the mount namespace inode cause a
+ * mount namespace loop?
+ */
+ struct inode *inode = path->dentry->d_inode;
+ struct proc_inode *ei;
+ struct mnt_namespace *mnt_ns;
+
+ if (!proc_ns_inode(inode))
+ return false;
+
+ ei = PROC_I(inode);
+ if (ei->ns_ops != &mntns_operations)
+ return false;
+
+ mnt_ns = ei->ns;
+ return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+}
+
struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
int flag)
{
@@ -1655,6 +1676,10 @@ static int do_loopback(struct path *path, const char *old_name,
if (err)
return err;
+ err = -EINVAL;
+ if (mnt_ns_loop(&old_path))
+ goto out;
+
err = lock_mount(path);
if (err)
goto out;
@@ -2261,6 +2286,15 @@ dput_out:
return retval;
}
+/*
+ * Assign a sequence number so we can detect when we attempt to bind
+ * mount a reference to an older mount namespace into the current
+ * mount namespace, preventing reference counting loops. A 64bit
+ * number incrementing at 10Ghz will take 12,427 years to wrap which
+ * is effectively never, so we can ignore the possibility.
+ */
+static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
+
static struct mnt_namespace *alloc_mnt_ns(void)
{
struct mnt_namespace *new_ns;
@@ -2268,6 +2302,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
if (!new_ns)
return ERR_PTR(-ENOMEM);
+ new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
atomic_set(&new_ns->count, 1);
new_ns->root = NULL;
INIT_LIST_HEAD(&new_ns->list);
@@ -2681,3 +2716,63 @@ bool our_mnt(struct vfsmount *mnt)
{
return check_mnt(real_mount(mnt));
}
+
+static void *mntns_get(struct task_struct *task)
+{
+ struct mnt_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy) {
+ ns = nsproxy->mnt_ns;
+ get_mnt_ns(ns);
+ }
+ rcu_read_unlock();
+
+ return ns;
+}
+
+static void mntns_put(void *ns)
+{
+ put_mnt_ns(ns);
+}
+
+static int mntns_install(struct nsproxy *nsproxy, void *ns)
+{
+ struct fs_struct *fs = current->fs;
+ struct mnt_namespace *mnt_ns = ns;
+ struct path root;
+
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT))
+ return -EINVAL;
+
+ if (fs->users != 1)
+ return -EINVAL;
+
+ get_mnt_ns(mnt_ns);
+ put_mnt_ns(nsproxy->mnt_ns);
+ nsproxy->mnt_ns = mnt_ns;
+
+ /* Find the root */
+ root.mnt = &mnt_ns->root->mnt;
+ root.dentry = mnt_ns->root->mnt.mnt_root;
+ path_get(&root);
+ while(d_mountpoint(root.dentry) && follow_down_one(&root))
+ ;
+
+ /* Update the pwd and root */
+ set_fs_pwd(fs, &root);
+ set_fs_root(fs, &root);
+
+ path_put(&root);
+ return 0;
+}
+
+const struct proc_ns_operations mntns_operations = {
+ .name = "mnt",
+ .type = CLONE_NEWNS,
+ .get = mntns_get,
+ .put = mntns_put,
+ .install = mntns_install,
+};
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed7..f91b1ed 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -24,6 +24,7 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_IPC_NS
&ipcns_operations,
#endif
+ &mntns_operations,
};
static const struct file_operations ns_file_operations = {
@@ -198,3 +199,7 @@ out_invalid:
return ERR_PTR(-EINVAL);
}
+bool proc_ns_inode(struct inode *inode)
+{
+ return inode->i_fop == &ns_file_operations;
+}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e87..bf1df81 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -174,6 +174,7 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
struct proc_dir_entry *parent);
extern struct file *proc_ns_fget(int fd);
+extern bool proc_ns_inode(struct inode *inode);
#else
@@ -229,6 +230,11 @@ static inline struct file *proc_ns_fget(int fd)
return ERR_PTR(-EINVAL);
}
+static inline bool proc_ns_inode(struct inode *inode)
+{
+ return false;
+}
+
#endif /* CONFIG_PROC_FS */
#if !defined(CONFIG_PROC_KCORE)
@@ -251,6 +257,7 @@ struct proc_ns_operations {
extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations mntns_operations;
union proc_op {
int (*proc_get_link)(struct dentry *, struct path *);
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH review 3/6] vfs: Add a user namespace reference from struct mnt_namespace
[not found] ` <1353322222-27060-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2012-11-19 10:50 ` [PATCH review 2/6] vfs: Add setns support for the mount namespace Eric W. Biederman
@ 2012-11-19 10:50 ` Eric W. Biederman
2012-11-19 10:50 ` [PATCH review 4/6] vfs: Only support slave subtrees across different user namespaces Eric W. Biederman
` (2 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Eric W. Biederman @ 2012-11-19 10:50 UTC (permalink / raw)
To: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Cc: Linux Containers, Eric W. Biederman,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Al Viro
From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
This will allow for support for unprivileged mounts in a new user namespace.
Acked-by: "Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
fs/mount.h | 1 +
fs/namespace.c | 24 ++++++++++++++++--------
include/linux/mnt_namespace.h | 3 ++-
kernel/nsproxy.c | 2 +-
4 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h
index e9c37dd..630fafc 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -6,6 +6,7 @@ struct mnt_namespace {
atomic_t count;
struct mount * root;
struct list_head list;
+ struct user_namespace *user_ns;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
int event;
diff --git a/fs/namespace.c b/fs/namespace.c
index d287e7e..207c7ba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/capability.h>
#include <linux/mnt_namespace.h>
+#include <linux/user_namespace.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/idr.h>
@@ -2286,6 +2287,12 @@ dput_out:
return retval;
}
+static void free_mnt_ns(struct mnt_namespace *ns)
+{
+ put_user_ns(ns->user_ns);
+ kfree(ns);
+}
+
/*
* Assign a sequence number so we can detect when we attempt to bind
* mount a reference to an older mount namespace into the current
@@ -2295,7 +2302,7 @@ dput_out:
*/
static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
-static struct mnt_namespace *alloc_mnt_ns(void)
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
{
struct mnt_namespace *new_ns;
@@ -2308,6 +2315,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
INIT_LIST_HEAD(&new_ns->list);
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
+ new_ns->user_ns = get_user_ns(user_ns);
return new_ns;
}
@@ -2316,7 +2324,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
* copied from the namespace of the passed in task structure.
*/
static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
- struct fs_struct *fs)
+ struct user_namespace *user_ns, struct fs_struct *fs)
{
struct mnt_namespace *new_ns;
struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
@@ -2324,7 +2332,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
struct mount *old = mnt_ns->root;
struct mount *new;
- new_ns = alloc_mnt_ns();
+ new_ns = alloc_mnt_ns(user_ns);
if (IS_ERR(new_ns))
return new_ns;
@@ -2333,7 +2341,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
if (IS_ERR(new)) {
up_write(&namespace_sem);
- kfree(new_ns);
+ free_mnt_ns(new_ns);
return ERR_CAST(new);
}
new_ns->root = new;
@@ -2374,7 +2382,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
}
struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
- struct fs_struct *new_fs)
+ struct user_namespace *user_ns, struct fs_struct *new_fs)
{
struct mnt_namespace *new_ns;
@@ -2384,7 +2392,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
if (!(flags & CLONE_NEWNS))
return ns;
- new_ns = dup_mnt_ns(ns, new_fs);
+ new_ns = dup_mnt_ns(ns, user_ns, new_fs);
put_mnt_ns(ns);
return new_ns;
@@ -2396,7 +2404,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
*/
static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
{
- struct mnt_namespace *new_ns = alloc_mnt_ns();
+ struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
if (!IS_ERR(new_ns)) {
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
@@ -2682,7 +2690,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
br_write_unlock(&vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
- kfree(ns);
+ free_mnt_ns(ns);
}
struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 5a8e390..12b2ab5 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -4,9 +4,10 @@
struct mnt_namespace;
struct fs_struct;
+struct user_namespace;
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
- struct fs_struct *);
+ struct user_namespace *, struct fs_struct *);
extern void put_mnt_ns(struct mnt_namespace *ns);
extern const struct file_operations proc_mounts_operations;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index b576f7f..5b6ce19 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -66,7 +66,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
if (!new_nsp)
return ERR_PTR(-ENOMEM);
- new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
+ new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, task_cred_xxx(tsk, user_ns), new_fs);
if (IS_ERR(new_nsp->mnt_ns)) {
err = PTR_ERR(new_nsp->mnt_ns);
goto out_ns;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH review 4/6] vfs: Only support slave subtrees across different user namespaces
[not found] ` <1353322222-27060-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2012-11-19 10:50 ` [PATCH review 2/6] vfs: Add setns support for the mount namespace Eric W. Biederman
2012-11-19 10:50 ` [PATCH review 3/6] vfs: Add a user namespace reference from struct mnt_namespace Eric W. Biederman
@ 2012-11-19 10:50 ` Eric W. Biederman
2012-11-19 10:50 ` [PATCH review 5/6] vfs: Allow unprivileged manipulation of the mount namespace Eric W. Biederman
2012-11-19 10:50 ` [PATCH review 6/6] userns: fix return value on mntns_install() failure Eric W. Biederman
4 siblings, 0 replies; 9+ messages in thread
From: Eric W. Biederman @ 2012-11-19 10:50 UTC (permalink / raw)
To: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Cc: Linux Containers, Eric W. Biederman,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Al Viro
From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
Sharing mount subtress with mount namespaces created by unprivileged
users allows unprivileged mounts created by unprivileged users to
propagate to mount namespaces controlled by privileged users.
Prevent nasty consequences by changing shared subtrees to slave
subtress when an unprivileged users creates a new mount namespace.
Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
fs/namespace.c | 11 ++++++++---
fs/pnode.h | 1 +
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 207c7ba..4dfcaf0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -786,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if (!mnt)
return ERR_PTR(-ENOMEM);
- if (flag & (CL_SLAVE | CL_PRIVATE))
+ if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
mnt->mnt_group_id = 0; /* not a peer of original */
else
mnt->mnt_group_id = old->mnt_group_id;
@@ -807,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
br_write_unlock(&vfsmount_lock);
- if (flag & CL_SLAVE) {
+ if ((flag & CL_SLAVE) ||
+ ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
list_add(&mnt->mnt_slave, &old->mnt_slave_list);
mnt->mnt_master = old;
CLEAR_MNT_SHARED(mnt);
@@ -2331,6 +2332,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
struct mount *p, *q;
struct mount *old = mnt_ns->root;
struct mount *new;
+ int copy_flags;
new_ns = alloc_mnt_ns(user_ns);
if (IS_ERR(new_ns))
@@ -2338,7 +2340,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
down_write(&namespace_sem);
/* First pass: copy the tree topology */
- new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
+ copy_flags = CL_COPY_ALL | CL_EXPIRE;
+ if (user_ns != mnt_ns->user_ns)
+ copy_flags |= CL_SHARED_TO_SLAVE;
+ new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
up_write(&namespace_sem);
free_mnt_ns(new_ns);
diff --git a/fs/pnode.h b/fs/pnode.h
index 65c6097..19b853a3 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -22,6 +22,7 @@
#define CL_COPY_ALL 0x04
#define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10
+#define CL_SHARED_TO_SLAVE 0x20
static inline void set_mnt_shared(struct mount *mnt)
{
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH review 5/6] vfs: Allow unprivileged manipulation of the mount namespace.
[not found] ` <1353322222-27060-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
` (2 preceding siblings ...)
2012-11-19 10:50 ` [PATCH review 4/6] vfs: Only support slave subtrees across different user namespaces Eric W. Biederman
@ 2012-11-19 10:50 ` Eric W. Biederman
2012-11-19 10:50 ` [PATCH review 6/6] userns: fix return value on mntns_install() failure Eric W. Biederman
4 siblings, 0 replies; 9+ messages in thread
From: Eric W. Biederman @ 2012-11-19 10:50 UTC (permalink / raw)
To: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Cc: Linux Containers, Eric W. Biederman,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Al Viro
From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
- Add a filesystem flag to mark filesystems that are safe to mount as
an unprivileged user.
- Add a filesystem flag to mark filesystems that don't need MNT_NODEV
when mounted by an unprivileged user.
- Relax the permission checks to allow unprivileged users that have
CAP_SYS_ADMIN permissions in the user namespace referred to by the
current mount namespace to be allowed to mount, unmount, and move
filesystems.
Acked-by: "Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
fs/namespace.c | 69 ++++++++++++++++++++++++++++++++-------------------
include/linux/fs.h | 2 +
2 files changed, 45 insertions(+), 26 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 4dfcaf0..9ddc86f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1269,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
retval = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
goto dput_and_out;
retval = do_umount(mnt, flags);
@@ -1295,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
static int mount_is_safe(struct path *path)
{
- if (capable(CAP_SYS_ADMIN))
+ if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
return 0;
return -EPERM;
#ifdef notyet
@@ -1633,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
int type;
int err = 0;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (path->dentry != path->mnt->mnt_root)
@@ -1797,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
struct mount *p;
struct mount *old;
int err = 0;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
@@ -1884,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return ERR_PTR(err);
}
-static struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
-{
- struct file_system_type *type = get_fs_type(fstype);
- struct vfsmount *mnt;
- if (!type)
- return ERR_PTR(-ENODEV);
- mnt = vfs_kern_mount(type, flags, name, data);
- if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
- !mnt->mnt_sb->s_subtype)
- mnt = fs_set_subtype(mnt, fstype);
- put_filesystem(type);
- return mnt;
-}
-
/*
* add a mount into a namespace's mount tree
*/
@@ -1944,20 +1929,46 @@ unlock:
* create a new mount for userspace and request it to be added into the
* namespace's tree
*/
-static int do_new_mount(struct path *path, const char *type, int flags,
+static int do_new_mount(struct path *path, const char *fstype, int flags,
int mnt_flags, const char *name, void *data)
{
+ struct file_system_type *type;
+ struct user_namespace *user_ns;
struct vfsmount *mnt;
int err;
- if (!type)
+ if (!fstype)
return -EINVAL;
/* we need capabilities... */
- if (!capable(CAP_SYS_ADMIN))
+ user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
+ if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
- mnt = do_kern_mount(type, flags, name, data);
+ type = get_fs_type(fstype);
+ if (!type)
+ return -ENODEV;
+
+ if (user_ns != &init_user_ns) {
+ if (!(type->fs_flags & FS_USERNS_MOUNT)) {
+ put_filesystem(type);
+ return -EPERM;
+ }
+ /* Only in special cases allow devices from mounts
+ * created outside the initial user namespace.
+ */
+ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+ flags |= MS_NODEV;
+ mnt_flags |= MNT_NODEV;
+ }
+ }
+
+ mnt = vfs_kern_mount(type, flags, name, data);
+ if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+ !mnt->mnt_sb->s_subtype)
+ mnt = fs_set_subtype(mnt, fstype);
+
+ put_filesystem(type);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
@@ -2549,7 +2560,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
struct mount *new_mnt, *root_mnt;
int error;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
error = user_path_dir(new_root, &new);
@@ -2631,8 +2642,13 @@ static void __init init_mount_tree(void)
struct vfsmount *mnt;
struct mnt_namespace *ns;
struct path root;
+ struct file_system_type *type;
- mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
+ type = get_fs_type("rootfs");
+ if (!type)
+ panic("Can't find rootfs type");
+ mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
+ put_filesystem(type);
if (IS_ERR(mnt))
panic("Can't create rootfs");
@@ -2757,7 +2773,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
struct mnt_namespace *mnt_ns = ns;
struct path root;
- if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT))
+ if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
+ !nsown_capable(CAP_SYS_CHROOT))
return -EINVAL;
if (fs->users != 1)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b33cfc9..5037aa6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1812,6 +1812,8 @@ struct file_system_type {
#define FS_REQUIRES_DEV 1
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
+#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
+#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
struct dentry *(*mount) (struct file_system_type *, int,
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH review 6/6] userns: fix return value on mntns_install() failure
[not found] ` <1353322222-27060-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
` (3 preceding siblings ...)
2012-11-19 10:50 ` [PATCH review 5/6] vfs: Allow unprivileged manipulation of the mount namespace Eric W. Biederman
@ 2012-11-19 10:50 ` Eric W. Biederman
4 siblings, 0 replies; 9+ messages in thread
From: Eric W. Biederman @ 2012-11-19 10:50 UTC (permalink / raw)
To: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Cc: Linux Containers, Eric W. Biederman,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Al Viro
From: Zhao Hongjiang <zhaohongjiang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Change return value from -EINVAL to -EPERM when the permission check fails.
Signed-off-by: Zhao Hongjiang <zhaohongjiang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
fs/namespace.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 9ddc86f..cab78a7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2775,7 +2775,7 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_CHROOT))
- return -EINVAL;
+ return -EPERM;
if (fs->users != 1)
return -EINVAL;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread