- * [PATCH v2 01/28] user_namespace: introduce fsid mappings infrastructure
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 02/28] proc: add /proc/<pid>/fsuid_map Christian Brauner
                   ` (29 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
This introduces the infrastructure to setup fsid mappings which will be used in
later patches.
All new code depends on CONFIG_USER_NS_FSID=y. It currently defaults to "N".
If CONFIG_USER_NS_FSID is not set, no new code is added.
In this patch fsuid_m_show() and fsgid_m_show() are introduced. They are
identical to uid_m_show() and gid_m_show() until we introduce from_kfsuid() and
from_kfsgid() in a follow-up patch.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Randy Dunlap <rdunlap@infradead.org>:
  - Fix typo in USER_NS_FSID kconfig documentation.
---
 include/linux/user_namespace.h |  10 +++
 init/Kconfig                   |  11 +++
 kernel/user.c                  |  22 ++++++
 kernel/user_namespace.c        | 122 +++++++++++++++++++++++++++++++++
 4 files changed, 165 insertions(+)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6ef1c7109fc4..e44742b0cf8a 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -56,6 +56,10 @@ enum ucount_type {
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
+#ifdef CONFIG_USER_NS_FSID
+	struct uid_gid_map	fsuid_map;
+	struct uid_gid_map	fsgid_map;
+#endif
 	struct uid_gid_map	projid_map;
 	atomic_t		count;
 	struct user_namespace	*parent;
@@ -127,6 +131,12 @@ struct seq_operations;
 extern const struct seq_operations proc_uid_seq_operations;
 extern const struct seq_operations proc_gid_seq_operations;
 extern const struct seq_operations proc_projid_seq_operations;
+#ifdef CONFIG_USER_NS_FSID
+extern const struct seq_operations proc_fsuid_seq_operations;
+extern const struct seq_operations proc_fsgid_seq_operations;
+extern ssize_t proc_fsuid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t proc_fsgid_map_write(struct file *, const char __user *, size_t, loff_t *);
+#endif
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/init/Kconfig b/init/Kconfig
index cfee56c151f1..d4d0beeba48f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1111,6 +1111,17 @@ config USER_NS
 
 	  If unsure, say N.
 
+config USER_NS_FSID
+	bool "User namespace fsid mappings"
+	depends on USER_NS
+	default n
+	help
+	  This allows containers to alter their filesystem id mappings.
+	  With this containers with different id mappings can still share
+	  the same filesystem.
+
+	  If unsure, say N.
+
 config PID_NS
 	bool "PID Namespaces"
 	default y
diff --git a/kernel/user.c b/kernel/user.c
index 5235d7f49982..2ccaea9b810b 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -55,6 +55,28 @@ struct user_namespace init_user_ns = {
 			},
 		},
 	},
+#ifdef CONFIG_USER_NS_FSID
+	.fsuid_map = {
+		.nr_extents = 1,
+		{
+			.extent[0] = {
+				.first = 0,
+				.lower_first = 0,
+				.count = 4294967295U,
+			},
+		},
+	},
+	.fsgid_map = {
+		.nr_extents = 1,
+		{
+			.extent[0] = {
+				.first = 0,
+				.lower_first = 0,
+				.count = 4294967295U,
+			},
+		},
+	},
+#endif
 	.count = ATOMIC_INIT(3),
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 8eadadc478f9..cbdf456f95f0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -191,6 +191,16 @@ static void free_user_ns(struct work_struct *work)
 			kfree(ns->projid_map.forward);
 			kfree(ns->projid_map.reverse);
 		}
+#ifdef CONFIG_USER_NS_FSID
+		if (ns->fsgid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+			kfree(ns->fsgid_map.forward);
+			kfree(ns->fsgid_map.reverse);
+		}
+		if (ns->fsuid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+			kfree(ns->fsuid_map.forward);
+			kfree(ns->fsuid_map.reverse);
+		}
+#endif
 		retire_userns_sysctls(ns);
 		key_free_user_ns(ns);
 		ns_free_inum(&ns->ns);
@@ -637,6 +647,50 @@ static int projid_m_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+#ifdef CONFIG_USER_NS_FSID
+static int fsuid_m_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+	struct uid_gid_extent *extent = v;
+	struct user_namespace *lower_ns;
+	uid_t lower;
+
+	lower_ns = seq_user_ns(seq);
+	if ((lower_ns == ns) && lower_ns->parent)
+		lower_ns = lower_ns->parent;
+
+	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
+
+	seq_printf(seq, "%10u %10u %10u\n",
+		extent->first,
+		lower,
+		extent->count);
+
+	return 0;
+}
+
+static int fsgid_m_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+	struct uid_gid_extent *extent = v;
+	struct user_namespace *lower_ns;
+	gid_t lower;
+
+	lower_ns = seq_user_ns(seq);
+	if ((lower_ns == ns) && lower_ns->parent)
+		lower_ns = lower_ns->parent;
+
+	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
+
+	seq_printf(seq, "%10u %10u %10u\n",
+		extent->first,
+		lower,
+		extent->count);
+
+	return 0;
+}
+#endif
+
 static void *m_start(struct seq_file *seq, loff_t *ppos,
 		     struct uid_gid_map *map)
 {
@@ -674,6 +728,22 @@ static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
 	return m_start(seq, ppos, &ns->projid_map);
 }
 
+#ifdef CONFIG_USER_NS_FSID
+static void *fsuid_m_start(struct seq_file *seq, loff_t *ppos)
+{
+	struct user_namespace *ns = seq->private;
+
+	return m_start(seq, ppos, &ns->fsuid_map);
+}
+
+static void *fsgid_m_start(struct seq_file *seq, loff_t *ppos)
+{
+	struct user_namespace *ns = seq->private;
+
+	return m_start(seq, ppos, &ns->fsgid_map);
+}
+#endif
+
 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	(*pos)++;
@@ -706,6 +776,22 @@ const struct seq_operations proc_projid_seq_operations = {
 	.show = projid_m_show,
 };
 
+#ifdef CONFIG_USER_NS_FSID
+const struct seq_operations proc_fsuid_seq_operations = {
+	.start = fsuid_m_start,
+	.stop = m_stop,
+	.next = m_next,
+	.show = fsuid_m_show,
+};
+
+const struct seq_operations proc_fsgid_seq_operations = {
+	.start = fsgid_m_start,
+	.stop = m_stop,
+	.next = m_next,
+	.show = fsgid_m_show,
+};
+#endif
+
 static bool mappings_overlap(struct uid_gid_map *new_map,
 			     struct uid_gid_extent *extent)
 {
@@ -1081,6 +1167,42 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
 			 &ns->projid_map, &ns->parent->projid_map);
 }
 
+#ifdef CONFIG_USER_NS_FSID
+ssize_t proc_fsuid_map_write(struct file *file, const char __user *buf,
+			     size_t size, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	struct user_namespace *seq_ns = seq_user_ns(seq);
+
+	if (!ns->parent)
+		return -EPERM;
+
+	if ((seq_ns != ns) && (seq_ns != ns->parent))
+		return -EPERM;
+
+	return map_write(file, buf, size, ppos, CAP_SETUID, &ns->fsuid_map,
+			 &ns->parent->fsuid_map);
+}
+
+ssize_t proc_fsgid_map_write(struct file *file, const char __user *buf,
+			     size_t size, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	struct user_namespace *seq_ns = seq_user_ns(seq);
+
+	if (!ns->parent)
+		return -EPERM;
+
+	if ((seq_ns != ns) && (seq_ns != ns->parent))
+		return -EPERM;
+
+	return map_write(file, buf, size, ppos, CAP_SETGID, &ns->fsgid_map,
+			 &ns->parent->fsgid_map);
+}
+#endif
+
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *new_map)
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 02/28] proc: add /proc/<pid>/fsuid_map
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 01/28] user_namespace: introduce fsid mappings infrastructure Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 03/28] proc: add /proc/<pid>/fsgid_map Christian Brauner
                   ` (28 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
The /proc/<pid>/fsuid_map file can be written to once to setup an fsuid mapping
for a user namespace. Writing to this file has the same restrictions as writing
to /proc/<pid>/fsuid_map:
root@e1-vm:/# cat /proc/13023/fsuid_map
         0     300000     100000
Fsid mappings have always been around. They are currently always identical to
the id mappings for a user namespace. This means, currently whenever an fsid
needs to be looked up the kernel will use the id mapping of the user namespace.
With the introduction of fsid mappings the kernel will now lookup fsids in the
fsid mappings of the user namespace. If no fsid mapping exists the kernel will
continue looking up fsids in the id mappings of the user namespace. Hence, if a
system supports fsid mappings through /proc/<pid>/fs*id_map and a container
runtime is not aware of fsid mappings it or does not use them it will it will
continue to work just as before.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/proc/base.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c7c64272b0fa..5fb28004663e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2970,6 +2970,13 @@ static int proc_projid_map_open(struct inode *inode, struct file *file)
 	return proc_id_map_open(inode, file, &proc_projid_seq_operations);
 }
 
+#ifdef CONFIG_USER_NS_FSID
+static int proc_fsuid_map_open(struct inode *inode, struct file *file)
+{
+	return proc_id_map_open(inode, file, &proc_fsuid_seq_operations);
+}
+#endif
+
 static const struct file_operations proc_uid_map_operations = {
 	.open		= proc_uid_map_open,
 	.write		= proc_uid_map_write,
@@ -2994,6 +3001,16 @@ static const struct file_operations proc_projid_map_operations = {
 	.release	= proc_id_map_release,
 };
 
+#ifdef CONFIG_USER_NS_FSID
+static const struct file_operations proc_fsuid_map_operations = {
+	.open		= proc_fsuid_map_open,
+	.write		= proc_fsuid_map_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= proc_id_map_release,
+};
+#endif
+
 static int proc_setgroups_open(struct inode *inode, struct file *file)
 {
 	struct user_namespace *ns = NULL;
@@ -3176,6 +3193,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	ONE("io",	S_IRUSR, proc_tgid_io_accounting),
 #endif
 #ifdef CONFIG_USER_NS
+#ifdef CONFIG_USER_NS_FSID
+	REG("fsuid_map",  S_IRUGO|S_IWUSR, proc_fsuid_map_operations),
+#endif
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 03/28] proc: add /proc/<pid>/fsgid_map
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 01/28] user_namespace: introduce fsid mappings infrastructure Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 02/28] proc: add /proc/<pid>/fsuid_map Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 04/28] fsuidgid: add fsid mapping helpers Christian Brauner
                   ` (27 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
The /proc/<pid>/fsgid_map file can be written to once to setup an fsgid mapping
for a user namespace. Writing to this file has the same restrictions as writing
to /proc/<pid>/fsgid_map.
root@e1-vm:/# cat /proc/13023/fsgid_map
         0     300000     100000
Fsid mappings have always been around. They are currently always identical to
the id mappings for a user namespace. This means, currently whenever an fsid
needs to be looked up the kernel will use the id mapping of the user namespace.
With the introduction of fsid mappings the kernel will now lookup fsids in the
fsid mappings of the user namespace. If no fsid mapping exists the kernel will
continue looking up fsids in the id mappings of the user namespace. Hence, if a
system supports fsid mappings through /proc/<pid>/fs*id_map and a container
runtime is not aware of fsid mappings it or does not use them it will it will
continue to work just as before.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/proc/base.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5fb28004663e..1303cdd2e617 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2975,6 +2975,11 @@ static int proc_fsuid_map_open(struct inode *inode, struct file *file)
 {
 	return proc_id_map_open(inode, file, &proc_fsuid_seq_operations);
 }
+
+static int proc_fsgid_map_open(struct inode *inode, struct file *file)
+{
+	return proc_id_map_open(inode, file, &proc_fsgid_seq_operations);
+}
 #endif
 
 static const struct file_operations proc_uid_map_operations = {
@@ -3009,6 +3014,14 @@ static const struct file_operations proc_fsuid_map_operations = {
 	.llseek		= seq_lseek,
 	.release	= proc_id_map_release,
 };
+
+static const struct file_operations proc_fsgid_map_operations = {
+	.open		= proc_fsgid_map_open,
+	.write		= proc_fsgid_map_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= proc_id_map_release,
+};
 #endif
 
 static int proc_setgroups_open(struct inode *inode, struct file *file)
@@ -3195,6 +3208,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_USER_NS
 #ifdef CONFIG_USER_NS_FSID
 	REG("fsuid_map",  S_IRUGO|S_IWUSR, proc_fsuid_map_operations),
+	REG("fsgid_map",  S_IRUGO|S_IWUSR, proc_fsgid_map_operations),
 #endif
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 04/28] fsuidgid: add fsid mapping helpers
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (2 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 03/28] proc: add /proc/<pid>/fsgid_map Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 19:11   ` Jann Horn
  2020-02-14 18:35 ` [PATCH v2 05/28] proc: task_state(): use from_kfs{g,u}id_munged Christian Brauner
                   ` (26 subsequent siblings)
  30 siblings, 1 reply; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
This adds a set of helpers to translate between kfsuid/kfsgid and their
userspace fsuid/fsgid counter parts relative to a given user namespace.
- kuid_t make_kfsuid(struct user_namespace *from, uid_t fsuid)
  Maps a user-namespace fsuid pair into a kfsuid.
  If no fsuid mappings have been written it behaves identical to calling
  make_kuid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- kgid_t make_kfsgid(struct user_namespace *from, gid_t fsgid)
  Maps a user-namespace fsgid pair into a kfsgid.
  If no fsgid mappings have been written it behaves identical to calling
  make_kgid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- uid_t from_kfsuid(struct user_namespace *to, kuid_t fsuid)
  Creates a fsuid from a kfsuid user-namespace pair if possible.
  If no fsuid mappings have been written it behaves identical to calling
  from_kuid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- gid_t from_kfsgid(struct user_namespace *to, kgid_t fsgid)
  Creates a fsgid from a kfsgid user-namespace pair if possible.
  If no fsgid mappings have been written it behaves identical to calling
  make_kgid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- uid_t from_kfsuid_munged(struct user_namespace *to, kuid_t fsuid)
  Always creates a fsuid from a kfsuid user-namespace pair.
  If no fsuid mappings have been written it behaves identical to calling
  from_kuid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- gid_t from_kfsgid_munged(struct user_namespace *to, kgid_t fsgid)
  Always creates a fsgid from a kfsgid user-namespace pair if possible.
  If no fsgid mappings have been written it behaves identical to calling
  make_kgid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- bool kfsuid_has_mapping(struct user_namespace *ns, kuid_t uid)
  Check whether this kfsuid has a mapping in the provided user namespace.
  If no fsuid mappings have been written it behaves identical to calling
  from_kuid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- bool kfsgid_has_mapping(struct user_namespace *ns, kgid_t gid)
  Check whether this kfsgid has a mapping in the provided user namespace.
  If no fsgid mappings have been written it behaves identical to calling
  make_kgid(). This ensures backwards compatibility for workloads unaware
  or not in need of fsid mappings.
- kuid_t kfsuid_to_kuid(struct user_namespace *to, kuid_t kfsuid)
  Translate from a kfsuid into a kuid.
- kgid_t kfsgid_to_kgid(struct user_namespace *to, kgid_t kfsgid)
  Translate from a kfsgid into a kgid.
- kuid_t kuid_to_kfsuid(struct user_namespace *to, kuid_t kuid)
  Translate from a kuid into a kfsuid.
- kgid_t kgid_to_kfsuid(struct user_namespace *to, kgid_t kgid)
  Translate from a kgid into a kfsgid.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - add kfsuid_to_kuid(), kfsgid_to_kgid(), kuid_to_kfsuid(), kgid_to_kfsgid()
---
 include/linux/fsuidgid.h | 122 +++++++++++++++++++++++++
 kernel/user_namespace.c  | 189 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 298 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/fsuidgid.h
diff --git a/include/linux/fsuidgid.h b/include/linux/fsuidgid.h
new file mode 100644
index 000000000000..46763591f4e6
--- /dev/null
+++ b/include/linux/fsuidgid.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FSUIDGID_H
+#define _LINUX_FSUIDGID_H
+
+#include <linux/uidgid.h>
+
+#ifdef CONFIG_USER_NS_FSID
+
+extern kuid_t make_kfsuid(struct user_namespace *from, uid_t fsuid);
+extern kgid_t make_kfsgid(struct user_namespace *from, gid_t fsgid);
+extern uid_t from_kfsuid(struct user_namespace *to, kuid_t kfsuid);
+extern gid_t from_kfsgid(struct user_namespace *to, kgid_t kfsgid);
+extern uid_t from_kfsuid_munged(struct user_namespace *to, kuid_t kfsuid);
+extern gid_t from_kfsgid_munged(struct user_namespace *to, kgid_t kfsgid);
+
+static inline bool kfsuid_has_mapping(struct user_namespace *ns, kuid_t kfsuid)
+{
+	return from_kfsuid(ns, kfsuid) != (uid_t) -1;
+}
+
+static inline bool kfsgid_has_mapping(struct user_namespace *ns, kgid_t kfsgid)
+{
+	return from_kfsgid(ns, kfsgid) != (gid_t) -1;
+}
+
+static inline kuid_t kfsuid_to_kuid(struct user_namespace *to, kuid_t kfsuid)
+{
+	uid_t fsuid = from_kfsuid(to, kfsuid);
+	if (fsuid == (uid_t) -1)
+		return INVALID_UID;
+	return make_kuid(to, fsuid);
+}
+
+static inline kgid_t kfsgid_to_kgid(struct user_namespace *to, kgid_t kfsgid)
+{
+	gid_t fsgid = from_kfsgid(to, kfsgid);
+	if (fsgid == (gid_t) -1)
+		return INVALID_GID;
+	return make_kgid(to, fsgid);
+}
+
+static inline kuid_t kuid_to_kfsuid(struct user_namespace *to, kuid_t kuid)
+{
+	uid_t uid = from_kuid(to, kuid);
+	if (uid == (uid_t) -1)
+		return INVALID_UID;
+	return make_kfsuid(to, uid);
+}
+
+static inline kgid_t kgid_to_kfsgid(struct user_namespace *to, kgid_t kgid)
+{
+	gid_t gid = from_kgid(to, kgid);
+	if (gid == (gid_t) -1)
+		return INVALID_GID;
+	return make_kfsgid(to, gid);
+}
+
+#else
+
+static inline kuid_t make_kfsuid(struct user_namespace *from, uid_t fsuid)
+{
+	return make_kuid(from, fsuid);
+}
+
+static inline kgid_t make_kfsgid(struct user_namespace *from, gid_t fsgid)
+{
+	return make_kgid(from, fsgid);
+}
+
+static inline uid_t from_kfsuid(struct user_namespace *to, kuid_t kfsuid)
+{
+	return from_kuid(to, kfsuid);
+}
+
+static inline gid_t from_kfsgid(struct user_namespace *to, kgid_t kfsgid)
+{
+	return from_kgid(to, kfsgid);
+}
+
+static inline uid_t from_kfsuid_munged(struct user_namespace *to, kuid_t kfsuid)
+{
+	return from_kuid_munged(to, kfsuid);
+}
+
+static inline gid_t from_kfsgid_munged(struct user_namespace *to, kgid_t kfsgid)
+{
+	return from_kgid_munged(to, kfsgid);
+}
+
+static inline bool kfsuid_has_mapping(struct user_namespace *ns, kuid_t kfsuid)
+{
+	return kuid_has_mapping(ns, kfsuid);
+}
+
+static inline bool kfsgid_has_mapping(struct user_namespace *ns, kgid_t kfsgid)
+{
+	return kgid_has_mapping(ns, kfsgid);
+}
+
+static inline kuid_t kfsuid_to_kuid(struct user_namespace *to, kuid_t kfsuid)
+{
+	return kfsuid;
+}
+
+static inline kgid_t kfsgid_to_kgid(struct user_namespace *to, kgid_t kfsgid)
+{
+	return kfsgid;
+}
+
+static inline kuid_t kuid_to_kfsuid(struct user_namespace *to, kuid_t kuid)
+{
+	return kuid;
+}
+
+static inline kgid_t kgid_to_kfsgid(struct user_namespace *to, kgid_t kgid)
+{
+	return kgid;
+}
+
+#endif /* CONFIG_USER_NS_FSID */
+
+#endif /* _LINUX_FSUIDGID_H */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index cbdf456f95f0..398be02de5c3 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -20,13 +20,14 @@
 #include <linux/fs_struct.h>
 #include <linux/bsearch.h>
 #include <linux/sort.h>
+#include <linux/fsuidgid.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
 static DEFINE_MUTEX(userns_state_mutex);
 
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
-				struct uid_gid_map *map);
+				struct uid_gid_map *map, bool map_fsid);
 static void free_user_ns(struct work_struct *work);
 
 static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
@@ -583,6 +584,166 @@ projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
 }
 EXPORT_SYMBOL(from_kprojid_munged);
 
+#ifdef CONFIG_USER_NS_FSID
+/**
+ *	make_kfsuid - Map a user-namespace fsuid pair into a kuid.
+ *	@ns:  User namespace that the fsuid is in
+ *	@fsuid: User identifier
+ *
+ *	Maps a user-namespace fsuid pair into a kernel internal kfsuid,
+ *	and returns that kfsuid.
+ *
+ *	When there is no mapping defined for the user-namespace kfsuid
+ *	pair INVALID_UID is returned.  Callers are expected to test
+ *	for and handle INVALID_UID being returned.  INVALID_UID
+ *	may be tested for using uid_valid().
+ */
+kuid_t make_kfsuid(struct user_namespace *ns, uid_t fsuid)
+{
+	unsigned extents = ns->fsuid_map.nr_extents;
+	smp_rmb();
+
+	/* Map the fsuid to a global kernel fsuid */
+	if (extents == 0)
+		return KUIDT_INIT(map_id_down(&ns->uid_map, fsuid));
+
+	return KUIDT_INIT(map_id_down(&ns->fsuid_map, fsuid));
+}
+EXPORT_SYMBOL(make_kfsuid);
+
+/**
+ *	from_kfsuid - Create a fsuid from a kfsuid user-namespace pair.
+ *	@targ: The user namespace we want a fsuid in.
+ *	@kfsuid: The kernel internal fsuid to start with.
+ *
+ *	Map @kfsuid into the user-namespace specified by @targ and
+ *	return the resulting fsuid.
+ *
+ *	There is always a mapping into the initial user_namespace.
+ *
+ *	If @kfsuid has no mapping in @targ (uid_t)-1 is returned.
+ */
+uid_t from_kfsuid(struct user_namespace *targ, kuid_t kfsuid)
+{
+	unsigned extents = targ->fsuid_map.nr_extents;
+	smp_rmb();
+
+	/* Map the fsuid from a global kernel fsuid */
+	if (extents == 0)
+		return map_id_up(&targ->uid_map, __kuid_val(kfsuid));
+
+	return map_id_up(&targ->fsuid_map, __kuid_val(kfsuid));
+}
+EXPORT_SYMBOL(from_kfsuid);
+
+/**
+ *	from_kfsuid_munged - Create a fsuid from a kfsuid user-namespace pair.
+ *	@targ: The user namespace we want a fsuid in.
+ *	@kfsuid: The kernel internal fsuid to start with.
+ *
+ *	Map @kfsuid into the user-namespace specified by @targ and
+ *	return the resulting fsuid.
+ *
+ *	There is always a mapping into the initial user_namespace.
+ *
+ *	Unlike from_kfsuid from_kfsuid_munged never fails and always
+ *	returns a valid fsuid.  This makes from_kfsuid_munged appropriate
+ *	for use in syscalls like stat and getuid where failing the
+ *	system call and failing to provide a valid fsuid are not an
+ *	options.
+ *
+ *	If @kfsuid has no mapping in @targ overflowuid is returned.
+ */
+uid_t from_kfsuid_munged(struct user_namespace *targ, kuid_t kfsuid)
+{
+	uid_t fsuid;
+	fsuid = from_kfsuid(targ, kfsuid);
+
+	if (fsuid == (uid_t) -1)
+		fsuid = overflowuid;
+	return fsuid;
+}
+EXPORT_SYMBOL(from_kfsuid_munged);
+
+/**
+ *	make_kfsgid - Map a user-namespace fsgid pair into a kfsgid.
+ *	@ns:  User namespace that the fsgid is in
+ *	@fsgid: User identifier
+ *
+ *	Maps a user-namespace fsgid pair into a kernel internal kfsgid,
+ *	and returns that kfsgid.
+ *
+ *	When there is no mapping defined for the user-namespace fsgid
+ *	pair INVALID_GID is returned.  Callers are expected to test
+ *	for and handle INVALID_GID being returned.  INVALID_GID
+ *	may be tested for using gid_valid().
+ */
+kgid_t make_kfsgid(struct user_namespace *ns, gid_t fsgid)
+{
+	unsigned extents = ns->fsgid_map.nr_extents;
+	smp_rmb();
+
+	/* Map the fsgid to a global kernel fsgid */
+	if (extents == 0)
+		return KGIDT_INIT(map_id_down(&ns->gid_map, fsgid));
+
+	return KGIDT_INIT(map_id_down(&ns->fsgid_map, fsgid));
+}
+EXPORT_SYMBOL(make_kfsgid);
+
+/**
+ *	from_kfsgid - Create a fsgid from a kfsgid user-namespace pair.
+ *	@targ: The user namespace we want a fsgid in.
+ *	@kfsgid: The kernel internal fsgid to start with.
+ *
+ *	Map @kfsgid into the user-namespace specified by @targ and
+ *	return the resulting fsgid.
+ *
+ *	There is always a mapping into the initial user_namespace.
+ *
+ *	If @kfsgid has no mapping in @targ (gid_t)-1 is returned.
+ */
+gid_t from_kfsgid(struct user_namespace *targ, kgid_t kfsgid)
+{
+	unsigned extents = targ->fsgid_map.nr_extents;
+	smp_rmb();
+
+	/* Map the fsgid from a global kernel fsgid */
+	if (extents == 0)
+		return map_id_up(&targ->gid_map, __kgid_val(kfsgid));
+
+	return map_id_up(&targ->fsgid_map, __kgid_val(kfsgid));
+}
+EXPORT_SYMBOL(from_kfsgid);
+
+/**
+ *	from_kfsgid_munged - Create a fsgid from a kfsgid user-namespace pair.
+ *	@targ: The user namespace we want a fsgid in.
+ *	@kfsgid: The kernel internal fsgid to start with.
+ *
+ *	Map @kfsgid into the user-namespace specified by @targ and
+ *	return the resulting fsgid.
+ *
+ *	There is always a mapping into the initial user_namespace.
+ *
+ *	Unlike from_kfsgid from_kfsgid_munged never fails and always
+ *	returns a valid fsgid.  This makes from_kfsgid_munged appropriate
+ *	for use in syscalls like stat and getgid where failing the
+ *	system call and failing to provide a valid fsgid are not options.
+ *
+ *	If @kfsgid has no mapping in @targ overflowgid is returned.
+ */
+gid_t from_kfsgid_munged(struct user_namespace *targ, kgid_t kfsgid)
+{
+	gid_t fsgid;
+	fsgid = from_kfsgid(targ, kfsgid);
+
+	if (fsgid == (gid_t) -1)
+		fsgid = overflowgid;
+	return fsgid;
+}
+EXPORT_SYMBOL(from_kfsgid_munged);
+#endif /* CONFIG_USER_NS_FSID */
 
 static int uid_m_show(struct seq_file *seq, void *v)
 {
@@ -659,7 +820,7 @@ static int fsuid_m_show(struct seq_file *seq, void *v)
 	if ((lower_ns == ns) && lower_ns->parent)
 		lower_ns = lower_ns->parent;
 
-	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
+	lower = from_kfsuid(lower_ns, KUIDT_INIT(extent->lower_first));
 
 	seq_printf(seq, "%10u %10u %10u\n",
 		extent->first,
@@ -680,7 +841,7 @@ static int fsgid_m_show(struct seq_file *seq, void *v)
 	if ((lower_ns == ns) && lower_ns->parent)
 		lower_ns = lower_ns->parent;
 
-	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
+	lower = from_kfsgid(lower_ns, KGIDT_INIT(extent->lower_first));
 
 	seq_printf(seq, "%10u %10u %10u\n",
 		extent->first,
@@ -931,7 +1092,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 			 size_t count, loff_t *ppos,
 			 int cap_setid,
 			 struct uid_gid_map *map,
-			 struct uid_gid_map *parent_map)
+			 struct uid_gid_map *parent_map, bool map_fsid)
 {
 	struct seq_file *seq = file->private_data;
 	struct user_namespace *ns = seq->private;
@@ -1051,7 +1212,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 
 	ret = -EPERM;
 	/* Validate the user is allowed to use user id's mapped to. */
-	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
+	if (!new_idmap_permitted(file, ns, cap_setid, &new_map, map_fsid))
 		goto out;
 
 	ret = -EPERM;
@@ -1129,7 +1290,7 @@ ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
 		return -EPERM;
 
 	return map_write(file, buf, size, ppos, CAP_SETUID,
-			 &ns->uid_map, &ns->parent->uid_map);
+			 &ns->uid_map, &ns->parent->uid_map, false);
 }
 
 ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
@@ -1146,7 +1307,7 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
 		return -EPERM;
 
 	return map_write(file, buf, size, ppos, CAP_SETGID,
-			 &ns->gid_map, &ns->parent->gid_map);
+			 &ns->gid_map, &ns->parent->gid_map, false);
 }
 
 ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
@@ -1164,7 +1325,7 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
 
 	/* Anyone can set any valid project id no capability needed */
 	return map_write(file, buf, size, ppos, -1,
-			 &ns->projid_map, &ns->parent->projid_map);
+			 &ns->projid_map, &ns->parent->projid_map, false);
 }
 
 #ifdef CONFIG_USER_NS_FSID
@@ -1182,7 +1343,7 @@ ssize_t proc_fsuid_map_write(struct file *file, const char __user *buf,
 		return -EPERM;
 
 	return map_write(file, buf, size, ppos, CAP_SETUID, &ns->fsuid_map,
-			 &ns->parent->fsuid_map);
+			 &ns->parent->fsuid_map, true);
 }
 
 ssize_t proc_fsgid_map_write(struct file *file, const char __user *buf,
@@ -1199,13 +1360,13 @@ ssize_t proc_fsgid_map_write(struct file *file, const char __user *buf,
 		return -EPERM;
 
 	return map_write(file, buf, size, ppos, CAP_SETGID, &ns->fsgid_map,
-			 &ns->parent->fsgid_map);
+			 &ns->parent->fsgid_map, true);
 }
 #endif
 
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
-				struct uid_gid_map *new_map)
+				struct uid_gid_map *new_map, bool map_fsid)
 {
 	const struct cred *cred = file->f_cred;
 	/* Don't allow mappings that would allow anything that wouldn't
@@ -1215,11 +1376,13 @@ static bool new_idmap_permitted(const struct file *file,
 	    uid_eq(ns->owner, cred->euid)) {
 		u32 id = new_map->extent[0].lower_first;
 		if (cap_setid == CAP_SETUID) {
-			kuid_t uid = make_kuid(ns->parent, id);
+			kuid_t uid = map_fsid ? make_kfsuid(ns->parent, id) :
+						make_kuid(ns->parent, id);
 			if (uid_eq(uid, cred->euid))
 				return true;
 		} else if (cap_setid == CAP_SETGID) {
-			kgid_t gid = make_kgid(ns->parent, id);
+			kgid_t gid = map_fsid ? make_kfsgid(ns->parent, id) :
+						make_kgid(ns->parent, id);
 			if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
 			    gid_eq(gid, cred->egid))
 				return true;
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * Re: [PATCH v2 04/28] fsuidgid: add fsid mapping helpers
  2020-02-14 18:35 ` [PATCH v2 04/28] fsuidgid: add fsid mapping helpers Christian Brauner
@ 2020-02-14 19:11   ` Jann Horn
  2020-02-16 16:55     ` Christian Brauner
  0 siblings, 1 reply; 43+ messages in thread
From: Jann Horn @ 2020-02-14 19:11 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Stéphane Graber, Eric W. Biederman, Aleksa Sarai,
	Stephen Barber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, kernel list, linux-fsdevel, Linux Containers,
	linux-security-module, Linux API
On Fri, Feb 14, 2020 at 7:37 PM Christian Brauner
<christian.brauner@ubuntu.com> wrote:
> This adds a set of helpers to translate between kfsuid/kfsgid and their
> userspace fsuid/fsgid counter parts relative to a given user namespace.
>
> - kuid_t make_kfsuid(struct user_namespace *from, uid_t fsuid)
>   Maps a user-namespace fsuid pair into a kfsuid.
>   If no fsuid mappings have been written it behaves identical to calling
>   make_kuid(). This ensures backwards compatibility for workloads unaware
>   or not in need of fsid mappings.
[...]
> +#ifdef CONFIG_USER_NS_FSID
> +/**
> + *     make_kfsuid - Map a user-namespace fsuid pair into a kuid.
> + *     @ns:  User namespace that the fsuid is in
> + *     @fsuid: User identifier
> + *
> + *     Maps a user-namespace fsuid pair into a kernel internal kfsuid,
> + *     and returns that kfsuid.
> + *
> + *     When there is no mapping defined for the user-namespace kfsuid
> + *     pair INVALID_UID is returned.  Callers are expected to test
> + *     for and handle INVALID_UID being returned.  INVALID_UID
> + *     may be tested for using uid_valid().
> + */
> +kuid_t make_kfsuid(struct user_namespace *ns, uid_t fsuid)
> +{
> +       unsigned extents = ns->fsuid_map.nr_extents;
> +       smp_rmb();
> +
> +       /* Map the fsuid to a global kernel fsuid */
> +       if (extents == 0)
> +               return KUIDT_INIT(map_id_down(&ns->uid_map, fsuid));
> +
> +       return KUIDT_INIT(map_id_down(&ns->fsuid_map, fsuid));
> +}
> +EXPORT_SYMBOL(make_kfsuid);
What effect is this fallback going to have for nested namespaces?
Let's say we have an outer namespace N1 with this uid_map:
    0 100000 65535
and with this fsuid_map:
    0 300000 65535
Now from in there, a process that is not aware of the existence of
fsuid mappings creates a new user namespace N2 with the following
uid_map:
    0 1000 1
At this point, if a process in N2 does chown("foo", 0, 0), is that
going to make "foo" owned by kuid 101000, which isn't even mapped in
N1?
> @@ -1215,11 +1376,13 @@ static bool new_idmap_permitted(const struct file *file,
>             uid_eq(ns->owner, cred->euid)) {
>                 u32 id = new_map->extent[0].lower_first;
>                 if (cap_setid == CAP_SETUID) {
> -                       kuid_t uid = make_kuid(ns->parent, id);
> +                       kuid_t uid = map_fsid ? make_kfsuid(ns->parent, id) :
> +                                               make_kuid(ns->parent, id);
>                         if (uid_eq(uid, cred->euid))
>                                 return true;
Let's say we have an outer user namespace N1 with this uid_map:
    0 1000 3000
and this fsuid_map:
    0 2000 3000
and in that namespace, a process is running as UID 1000 (which means
kernel-euid=2000, kernel-fsuid=3000). Now this process unshares its
user namespace and from this nested user namespace N2, tries to write
the following fsuid_map:
    0 1000 1
This should work, since the only ID it maps is the one the process had
in N1; but the code is AFAICS going to run as follows:
        if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
            uid_eq(ns->owner, cred->euid)) { // branch taken
                u32 id = new_map->extent[0].lower_first;
                if (cap_setid == CAP_SETUID) { // branch taken
                        // uid = make_kfsuid(ns->parent, 1000) = 3000
                        kuid_t uid = map_fsid ? make_kfsuid(ns->parent, id) :
                                                make_kuid(ns->parent, id);
                        // uid_eq(3000, 2000)
                        if (uid_eq(uid, cred->euid)) // not taken
                                return true;
                } else [...]
        }
Instead, I think what would succeed is this, which shouldn't be allowed:
    0 0 1
which AFAICS will evaluate as follows:
        if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
            uid_eq(ns->owner, cred->euid)) { // branch taken
                u32 id = new_map->extent[0].lower_first;
                if (cap_setid == CAP_SETUID) { // branch taken
                        // uid = make_kfsuid(ns->parent, 0) = 2000
                        kuid_t uid = map_fsid ? make_kfsuid(ns->parent, id) :
                                                make_kuid(ns->parent, id);
                        // uid_eq(2000, 2000)
                        if (uid_eq(uid, cred->euid)) // taken
                                return true;
                } else [...]
        }
>                 } else if (cap_setid == CAP_SETGID) {
> -                       kgid_t gid = make_kgid(ns->parent, id);
> +                       kgid_t gid = map_fsid ? make_kfsgid(ns->parent, id) :
> +                                               make_kgid(ns->parent, id);
>                         if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
>                             gid_eq(gid, cred->egid))
>                                 return true;
> --
> 2.25.0
>
^ permalink raw reply	[flat|nested] 43+ messages in thread
- * Re: [PATCH v2 04/28] fsuidgid: add fsid mapping helpers
  2020-02-14 19:11   ` Jann Horn
@ 2020-02-16 16:55     ` Christian Brauner
  0 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-16 16:55 UTC (permalink / raw)
  To: Jann Horn
  Cc: Stéphane Graber, Eric W. Biederman, Aleksa Sarai,
	Stephen Barber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, kernel list, linux-fsdevel, Linux Containers,
	linux-security-module, Linux API
On Fri, Feb 14, 2020 at 08:11:36PM +0100, Jann Horn wrote:
> On Fri, Feb 14, 2020 at 7:37 PM Christian Brauner
> <christian.brauner@ubuntu.com> wrote:
> > This adds a set of helpers to translate between kfsuid/kfsgid and their
> > userspace fsuid/fsgid counter parts relative to a given user namespace.
> >
> > - kuid_t make_kfsuid(struct user_namespace *from, uid_t fsuid)
> >   Maps a user-namespace fsuid pair into a kfsuid.
> >   If no fsuid mappings have been written it behaves identical to calling
> >   make_kuid(). This ensures backwards compatibility for workloads unaware
> >   or not in need of fsid mappings.
> [...]
> > +#ifdef CONFIG_USER_NS_FSID
> > +/**
> > + *     make_kfsuid - Map a user-namespace fsuid pair into a kuid.
> > + *     @ns:  User namespace that the fsuid is in
> > + *     @fsuid: User identifier
> > + *
> > + *     Maps a user-namespace fsuid pair into a kernel internal kfsuid,
> > + *     and returns that kfsuid.
> > + *
> > + *     When there is no mapping defined for the user-namespace kfsuid
> > + *     pair INVALID_UID is returned.  Callers are expected to test
> > + *     for and handle INVALID_UID being returned.  INVALID_UID
> > + *     may be tested for using uid_valid().
> > + */
> > +kuid_t make_kfsuid(struct user_namespace *ns, uid_t fsuid)
> > +{
> > +       unsigned extents = ns->fsuid_map.nr_extents;
> > +       smp_rmb();
> > +
> > +       /* Map the fsuid to a global kernel fsuid */
> > +       if (extents == 0)
> > +               return KUIDT_INIT(map_id_down(&ns->uid_map, fsuid));
> > +
> > +       return KUIDT_INIT(map_id_down(&ns->fsuid_map, fsuid));
> > +}
> > +EXPORT_SYMBOL(make_kfsuid);
> 
> What effect is this fallback going to have for nested namespaces?
> 
> Let's say we have an outer namespace N1 with this uid_map:
> 
>     0 100000 65535
> 
> and with this fsuid_map:
> 
>     0 300000 65535
> 
> Now from in there, a process that is not aware of the existence of
> fsuid mappings creates a new user namespace N2 with the following
> uid_map:
> 
>     0 1000 1
> 
> At this point, if a process in N2 does chown("foo", 0, 0), is that
> going to make "foo" owned by kuid 101000, which isn't even mapped in
> N1?
So Jann just made a clever suggestion that would solve this problem fsid
maps can only be written if the corresponding id mapping has been
written and fsid mappings will only have an effect once the
corresponding id mapping has been written. That sounds rather sane to
me.
Christian
^ permalink raw reply	[flat|nested] 43+ messages in thread
 
 
- * [PATCH v2 05/28] proc: task_state(): use from_kfs{g,u}id_munged
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (3 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 04/28] fsuidgid: add fsid mapping helpers Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 06/28] cred: add kfs{g,u}id Christian Brauner
                   ` (25 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
If fsid mappings have been written, this will cause proc to look at fsid
mappings for the user namespace. If no fsid mappings have been written the
behavior is as before.
Here is part of the output from /proc/<pid>/status from the initial user
namespace for systemd running in an unprivileged container as user namespace
root with id mapping 0 100000 100000 and fsid mapping 0 300000 100000:
Name:	systemd
Umask:	0000
State:	S (sleeping)
Tgid:	13023
Ngid:	0
Pid:	13023
PPid:	13008
TracerPid:	0
Uid:	100000	100000	100000	300000
Gid:	100000	100000	100000	300000
FDSize:	64
Groups:
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/proc/array.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5efaf3708ec6..d4a04f85a67e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -91,6 +91,7 @@
 #include <linux/string_helpers.h>
 #include <linux/user_namespace.h>
 #include <linux/fs_struct.h>
+#include <linux/fsuidgid.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -193,11 +194,11 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid));
 	seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid));
 	seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid));
-	seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid));
+	seq_put_decimal_ull(m, "\t", from_kfsuid_munged(user_ns, cred->fsuid));
 	seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid));
 	seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid));
 	seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid));
-	seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid));
+	seq_put_decimal_ull(m, "\t", from_kfsgid_munged(user_ns, cred->fsgid));
 	seq_put_decimal_ull(m, "\nFDSize:\t", max_fds);
 
 	seq_puts(m, "\nGroups:\t");
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 06/28] cred: add kfs{g,u}id
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (4 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 05/28] proc: task_state(): use from_kfs{g,u}id_munged Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 07/28] sys: __sys_setfsuid(): handle fsid mappings Christian Brauner
                   ` (24 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
After the introduction of fsid mappings we need to carefully handle
single-superblock filesystems that are visible in user namespaces. This
specifically concerns proc and sysfs. For those filesystems we want to continue
looking up fsid in the id mappings of the relevant user namespace. We can
either do this by dynamically translating between these fsids or we simply keep
them around with the other creds. The latter option is not just simpler but
also more performant since we don't need to do the translation from fsid
mappings into id mappings on the fly.
Link: https://lore.kernel.org/r/20200212145149.zohmc6d3x52bw6j6@wittgenstein
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
patch added
---
 include/linux/cred.h | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 18639c069263..604914d3fd51 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -125,6 +125,8 @@ struct cred {
 	kgid_t		egid;		/* effective GID of the task */
 	kuid_t		fsuid;		/* UID for VFS ops */
 	kgid_t		fsgid;		/* GID for VFS ops */
+	kuid_t		kfsuid;		/* UID for VFS ops for userns visible filesystems */
+	kgid_t		kfsgid;		/* GID for VFS ops for userns visible filesystems */
 	unsigned	securebits;	/* SUID-less security management */
 	kernel_cap_t	cap_inheritable; /* caps our children can inherit */
 	kernel_cap_t	cap_permitted;	/* caps we're permitted */
@@ -384,6 +386,8 @@ static inline void put_cred(const struct cred *_cred)
 #define current_sgid()		(current_cred_xxx(sgid))
 #define current_fsuid() 	(current_cred_xxx(fsuid))
 #define current_fsgid() 	(current_cred_xxx(fsgid))
+#define current_kfsuid() 	(current_cred_xxx(kfsuid))
+#define current_kfsgid() 	(current_cred_xxx(kfsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
 
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 07/28] sys: __sys_setfsuid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (5 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 06/28] cred: add kfs{g,u}id Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 08/28] sys: __sys_setfsgid(): " Christian Brauner
                   ` (23 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setfsuid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
A caller can only setfs{g,u}id() to a given id if the id maps to a valid kid in
both the id and fsid maps of the caller's user namespace. This is always the
case when no id mappings and fsid mappings have been written. It is also always
the case when an id mapping has been written which includes the target id and
but no fsid mappings have been written. All non-fsid mapping aware workloads
will thus work just as before.
Requiring a valid mapping for the target id in both the id and fsid mappings of
the container simplifies permission checking for userns visible filesystems
such as proc.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Set unmapped fsid as well.
---
 kernel/sys.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index f9bc5c303e3f..13f790dbda71 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -59,6 +59,7 @@
 #include <linux/sched/cputime.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
+#include <linux/fsuidgid.h>
 #include <linux/cred.h>
 
 #include <linux/nospec.h>
@@ -799,15 +800,19 @@ long __sys_setfsuid(uid_t uid)
 	const struct cred *old;
 	struct cred *new;
 	uid_t old_fsuid;
-	kuid_t kuid;
+	kuid_t kuid, kfsuid;
 
 	old = current_cred();
-	old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
+	old_fsuid = from_kfsuid_munged(old->user_ns, old->fsuid);
 
-	kuid = make_kuid(old->user_ns, uid);
+	kuid = make_kfsuid(old->user_ns, uid);
 	if (!uid_valid(kuid))
 		return old_fsuid;
 
+	kfsuid = make_kuid(old->user_ns, uid);
+	if (!uid_valid(kfsuid))
+		return old_fsuid;
+
 	new = prepare_creds();
 	if (!new)
 		return old_fsuid;
@@ -817,6 +822,7 @@ long __sys_setfsuid(uid_t uid)
 	    ns_capable_setid(old->user_ns, CAP_SETUID)) {
 		if (!uid_eq(kuid, old->fsuid)) {
 			new->fsuid = kuid;
+			new->kfsuid = kfsuid;
 			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
 				goto change_okay;
 		}
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 08/28] sys: __sys_setfsgid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (6 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 07/28] sys: __sys_setfsuid(): handle fsid mappings Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 09/28] sys:__sys_setuid(): " Christian Brauner
                   ` (22 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setfsgid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
A caller can only setfs{g,u}id() to a given id if the id maps to a valid kid in
both the id and fsid maps of the caller's user namespace. This is always the
case when no id mappings and fsid mappings have been written. It is also always
the case when an id mapping has been written which includes the target id and
but no fsid mappings have been written. All non-fsid mapping aware workloads
will thus work just as before.
Requiring a valid mapping for the target id in both the id and fsid mappings of
the container simplifies permission checking for userns visible filesystems
such as proc.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Set unmapped fsid as well.
---
 kernel/sys.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index 13f790dbda71..864fa78f25a7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -849,15 +849,19 @@ long __sys_setfsgid(gid_t gid)
 	const struct cred *old;
 	struct cred *new;
 	gid_t old_fsgid;
-	kgid_t kgid;
+	kgid_t kgid, kfsgid;
 
 	old = current_cred();
-	old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
+	old_fsgid = from_kfsgid_munged(old->user_ns, old->fsgid);
 
-	kgid = make_kgid(old->user_ns, gid);
+	kgid = make_kfsgid(old->user_ns, gid);
 	if (!gid_valid(kgid))
 		return old_fsgid;
 
+	kfsgid = make_kgid(old->user_ns, gid);
+	if (!gid_valid(kfsgid))
+		return old_fsgid;
+
 	new = prepare_creds();
 	if (!new)
 		return old_fsgid;
@@ -867,6 +871,7 @@ long __sys_setfsgid(gid_t gid)
 	    ns_capable(old->user_ns, CAP_SETGID)) {
 		if (!gid_eq(kgid, old->fsgid)) {
 			new->fsgid = kgid;
+			new->kfsgid = kfsgid;
 			goto change_okay;
 		}
 	}
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 09/28] sys:__sys_setuid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (7 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 08/28] sys: __sys_setfsgid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 10/28] sys:__sys_setgid(): " Christian Brauner
                   ` (21 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setuid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.
We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index 864fa78f25a7..a8eefd748327 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -574,11 +574,16 @@ long __sys_setuid(uid_t uid)
 	struct cred *new;
 	int retval;
 	kuid_t kuid;
+	kuid_t kfsuid;
 
 	kuid = make_kuid(ns, uid);
 	if (!uid_valid(kuid))
 		return -EINVAL;
 
+	kfsuid = make_kfsuid(ns, uid);
+	if (!uid_valid(kfsuid))
+		return -EINVAL;
+
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
@@ -596,7 +601,8 @@ long __sys_setuid(uid_t uid)
 		goto error;
 	}
 
-	new->fsuid = new->euid = kuid;
+	new->kfsuid = new->euid = kuid;
+	new->fsuid = kfsuid;
 
 	retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
 	if (retval < 0)
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 10/28] sys:__sys_setgid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (8 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 09/28] sys:__sys_setuid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 11/28] sys:__sys_setreuid(): " Christian Brauner
                   ` (20 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setgid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.
We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index a8eefd748327..aa379fb5e93b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -416,24 +416,31 @@ long __sys_setgid(gid_t gid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kgid_t kgid;
+	kgid_t kgid, kfsgid;
 
 	kgid = make_kgid(ns, gid);
 	if (!gid_valid(kgid))
 		return -EINVAL;
 
+	kfsgid = make_kfsgid(ns, gid);
+	if (!gid_valid(kfsgid))
+		return -EINVAL;
+
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
 	old = current_cred();
 
 	retval = -EPERM;
-	if (ns_capable(old->user_ns, CAP_SETGID))
-		new->gid = new->egid = new->sgid = new->fsgid = kgid;
-	else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
-		new->egid = new->fsgid = kgid;
-	else
+	if (ns_capable(old->user_ns, CAP_SETGID)) {
+		new->gid = new->egid = new->sgid = new->kfsgid = kgid;
+		new->fsgid = kfsgid;
+	} else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) {
+		new->egid = new->kfsgid = kgid;
+		new->fsgid = kfsgid;
+	} else {
 		goto error;
+	}
 
 	return commit_creds(new);
 
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 11/28] sys:__sys_setreuid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (9 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 10/28] sys:__sys_setgid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 12/28] sys:__sys_setregid(): " Christian Brauner
                   ` (19 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setreuid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
During setreuid() the kfsuid is set to the keuid corresponding the euid that is
requested by userspace. If the requested euid is -1 the kfsuid is reset to the
current keuid. For the latter case this means we need to lookup the
corresponding userspace euid corresponding to the current keuid in the id
mappings and translate this euid into the corresponding kfsuid in the fsid
mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.
We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index aa379fb5e93b..4697e010bbd7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -504,7 +504,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kuid_t kruid, keuid;
+	kuid_t kruid, keuid, kfsuid;
 
 	kruid = make_kuid(ns, ruid);
 	keuid = make_kuid(ns, euid);
@@ -535,6 +535,13 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
 		    !uid_eq(old->suid, keuid) &&
 		    !ns_capable_setid(old->user_ns, CAP_SETUID))
 			goto error;
+		kfsuid = make_kfsuid(new->user_ns, euid);
+	} else {
+		kfsuid = kuid_to_kfsuid(new->user_ns, new->euid);
+	}
+	if (!uid_valid(kfsuid)) {
+		retval = -EINVAL;
+		goto error;
 	}
 
 	if (!uid_eq(new->uid, old->uid)) {
@@ -545,7 +552,8 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
 	if (ruid != (uid_t) -1 ||
 	    (euid != (uid_t) -1 && !uid_eq(keuid, old->uid)))
 		new->suid = new->euid;
-	new->fsuid = new->euid;
+	new->kfsuid = new->euid;
+	new->fsuid = kfsuid;
 
 	retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
 	if (retval < 0)
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 12/28] sys:__sys_setregid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (10 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 11/28] sys:__sys_setreuid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 13/28] sys:__sys_setresuid(): " Christian Brauner
                   ` (18 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setregid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
During setregid() the kfsgid is set to the kegid corresponding the egid that is
requested by userspace. If the requested egid is -1 the kfsgid is reset to the
current kegid. For the latter case this means we need to lookup the
corresponding userspace egid corresponding to the current kegid in the id
mappings and translate this egid into the corresponding kfsgid in the fsid
mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.
We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index 4697e010bbd7..22eea030d9e7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -354,7 +354,7 @@ long __sys_setregid(gid_t rgid, gid_t egid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kgid_t krgid, kegid;
+	kgid_t krgid, kegid, kfsgid;
 
 	krgid = make_kgid(ns, rgid);
 	kegid = make_kgid(ns, egid);
@@ -386,12 +386,20 @@ long __sys_setregid(gid_t rgid, gid_t egid)
 			new->egid = kegid;
 		else
 			goto error;
+		kfsgid = make_kfsgid(ns, egid);
+	} else {
+		kfsgid = kgid_to_kfsgid(new->user_ns, new->egid);
+	}
+	if (!gid_valid(kfsgid)) {
+		retval = -EINVAL;
+		goto error;
 	}
 
 	if (rgid != (gid_t) -1 ||
 	    (egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
 		new->sgid = new->egid;
-	new->fsgid = new->egid;
+	new->kfsgid = new->egid;
+	new->fsgid = kfsgid;
 
 	return commit_creds(new);
 
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 13/28] sys:__sys_setresuid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (11 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 12/28] sys:__sys_setregid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 14/28] sys:__sys_setresgid(): " Christian Brauner
                   ` (17 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setresuid() to lookup fsids in the fsid mappings. If no fsid mappings
are setup the behavior is unchanged, i.e. fsids are looked up in the id
mappings.
During setresuid() the kfsuid is set to the keuid corresponding the euid that is
requested by userspace. If the requested euid is -1 the kfsuid is reset to the
current keuid. For the latter case this means we need to lookup the
corresponding userspace euid corresponding to the current keuid in the id
mappings and translate this euid into the corresponding kfsuid in the fsid
mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.
We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index 22eea030d9e7..54e072145146 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -654,7 +654,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kuid_t kruid, keuid, ksuid;
+	kuid_t kruid, keuid, ksuid, kfsuid;
 
 	kruid = make_kuid(ns, ruid);
 	keuid = make_kuid(ns, euid);
@@ -696,11 +696,21 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 				goto error;
 		}
 	}
-	if (euid != (uid_t) -1)
+	if (euid != (uid_t) -1) {
 		new->euid = keuid;
+		kfsuid = make_kfsuid(ns, euid);
+	} else {
+		kfsuid = kuid_to_kfsuid(new->user_ns, new->euid);
+	}
+	if (!uid_valid(kfsuid)) {
+		return -EINVAL;
+		goto error;
+	}
+
 	if (suid != (uid_t) -1)
 		new->suid = ksuid;
-	new->fsuid = new->euid;
+	new->kfsuid = new->euid;
+	new->fsuid = kfsuid;
 
 	retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
 	if (retval < 0)
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 14/28] sys:__sys_setresgid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (12 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 13/28] sys:__sys_setresuid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 15/28] fs: add is_userns_visible() helper Christian Brauner
                   ` (16 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch setresgid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
During setresgid() the kfsgid is set to the kegid corresponding the egid that is
requested by userspace. If the requested egid is -1 the kfsgid is reset to the
current kegid. For the latter case this means we need to lookup the
corresponding userspace egid corresponding to the current kegid in the id
mappings and translate this egid into the corresponding kfsgid in the fsid
mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.
We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/kernel/sys.c b/kernel/sys.c
index 54e072145146..78592deee2d8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -756,7 +756,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kgid_t krgid, kegid, ksgid;
+	kgid_t krgid, kegid, ksgid, kfsgid;
 
 	krgid = make_kgid(ns, rgid);
 	kegid = make_kgid(ns, egid);
@@ -789,11 +789,21 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 
 	if (rgid != (gid_t) -1)
 		new->gid = krgid;
-	if (egid != (gid_t) -1)
+	if (egid != (gid_t) -1) {
 		new->egid = kegid;
+		kfsgid = make_kfsgid(ns, egid);
+	} else {
+		kfsgid = kgid_to_kfsgid(new->user_ns, new->egid);
+	}
+	if (!gid_valid(kfsgid)) {
+		retval = -EINVAL;
+		goto error;
+	}
+
 	if (sgid != (gid_t) -1)
 		new->sgid = ksgid;
-	new->fsgid = new->egid;
+	new->kfsgid = new->egid;
+	new->fsgid = kfsgid;
 
 	return commit_creds(new);
 
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 15/28] fs: add is_userns_visible() helper
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (13 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 14/28] sys:__sys_setresgid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 16/28] namei: may_{o_}create(): handle fsid mappings Christian Brauner
                   ` (15 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Introduce a helper which makes it possible to detect fileystems whose
superblock is visible in multiple user namespace. This currently only
means proc and sys. Such filesystems usually have special semantics so their
behavior will not be changed with the introduction of fsid mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3cd4fe6b845e..fdc8fb2d786b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3651,4 +3651,9 @@ static inline int inode_drain_writes(struct inode *inode)
 	return filemap_write_and_wait(inode->i_mapping);
 }
 
+static inline bool is_userns_visible(unsigned long iflags)
+{
+	return (iflags & SB_I_USERNS_VISIBLE);
+}
+
 #endif /* _LINUX_FS_H */
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 16/28] namei: may_{o_}create(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (14 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 15/28] fs: add is_userns_visible() helper Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 17/28] inode: inode_owner_or_capable(): " Christian Brauner
                   ` (14 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch may_{o_}create() to lookup fsids in the fsid mappings. If no fsid
mappings are setup the behavior is unchanged, i.e. fsids are looked up in the
id mappings.
Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Jann Horn <jannh@google.com>:
  - Ensure that the correct fsid is used when dealing with userns visible
    filesystems like proc.
---
 fs/namei.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index db6565c99825..c5b014000f13 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -39,6 +39,7 @@
 #include <linux/bitops.h>
 #include <linux/init_task.h>
 #include <linux/uaccess.h>
+#include <linux/fsuidgid.h>
 
 #include "internal.h"
 #include "mount.h"
@@ -287,6 +288,13 @@ static int check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
+static inline kuid_t get_current_fsuid(const struct inode *inode)
+{
+	if (is_userns_visible(inode->i_sb->s_iflags))
+		return current_kfsuid();
+	return current_fsuid();
+}
+
 /*
  * This does the basic permission checking
  */
@@ -294,7 +302,7 @@ static int acl_permission_check(struct inode *inode, int mask)
 {
 	unsigned int mode = inode->i_mode;
 
-	if (likely(uid_eq(current_fsuid(), inode->i_uid)))
+	if (likely(uid_eq(get_current_fsuid(inode), inode->i_uid)))
 		mode >>= 6;
 	else {
 		if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
@@ -980,7 +988,7 @@ static inline int may_follow_link(struct nameidata *nd)
 
 	/* Allowed if owner and follower match. */
 	inode = nd->link_inode;
-	if (uid_eq(current_cred()->fsuid, inode->i_uid))
+	if (uid_eq(get_current_fsuid(inode), inode->i_uid))
 		return 0;
 
 	/* Allowed if parent directory not sticky and world-writable. */
@@ -1097,7 +1105,7 @@ static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
 	    (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
 	    likely(!(dir_mode & S_ISVTX)) ||
 	    uid_eq(inode->i_uid, dir_uid) ||
-	    uid_eq(current_fsuid(), inode->i_uid))
+	    uid_eq(get_current_fsuid(inode), inode->i_uid))
 		return 0;
 
 	if (likely(dir_mode & 0002) ||
@@ -2832,7 +2840,7 @@ EXPORT_SYMBOL(kern_path_mountpoint);
 
 int __check_sticky(struct inode *dir, struct inode *inode)
 {
-	kuid_t fsuid = current_fsuid();
+	kuid_t fsuid = get_current_fsuid(inode);
 
 	if (uid_eq(inode->i_uid, fsuid))
 		return 0;
@@ -2902,6 +2910,20 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 	return 0;
 }
 
+static bool fsid_has_mapping(struct user_namespace *ns, struct super_block *sb)
+{
+	if (is_userns_visible(sb->s_iflags)) {
+		if (!kuid_has_mapping(ns, current_kfsuid()) ||
+		    !kgid_has_mapping(ns, current_kfsgid()))
+			return false;
+	} else if (!kfsuid_has_mapping(ns, current_fsuid()) ||
+		   !kfsgid_has_mapping(ns, current_fsgid())) {
+		return false;
+	}
+
+	return true;
+}
+
 /*	Check whether we can create an object with dentry child in directory
  *  dir.
  *  1. We can't do it if child already exists (open has special treatment for
@@ -2920,8 +2942,7 @@ static inline int may_create(struct inode *dir, struct dentry *child)
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
 	s_user_ns = dir->i_sb->s_user_ns;
-	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-	    !kgid_has_mapping(s_user_ns, current_fsgid()))
+	if (!fsid_has_mapping(s_user_ns, dir->i_sb))
 		return -EOVERFLOW;
 	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 }
@@ -3103,8 +3124,7 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
 		return error;
 
 	s_user_ns = dir->dentry->d_sb->s_user_ns;
-	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-	    !kgid_has_mapping(s_user_ns, current_fsgid()))
+	if (!fsid_has_mapping(s_user_ns, dir->dentry->d_sb))
 		return -EOVERFLOW;
 
 	error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 17/28] inode: inode_owner_or_capable(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (15 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 16/28] namei: may_{o_}create(): handle fsid mappings Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 18/28] capability: privileged_wrt_inode_uidgid(): " Christian Brauner
                   ` (13 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch inode_owner_or_capable() to lookup fsids in the fsid mappings. If no
fsid mappings are setup the behavior is unchanged, i.e. fsids are looked up in
the id mappings.
Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/inode.c b/fs/inode.c
index 7d57068b6b7a..81d7a30b381d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,7 @@
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
 #include <linux/iversion.h>
+#include <linux/fsuidgid.h>
 #include <trace/events/writeback.h>
 #include "internal.h"
 
@@ -2087,8 +2088,12 @@ bool inode_owner_or_capable(const struct inode *inode)
 		return true;
 
 	ns = current_user_ns();
-	if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
+	if (is_userns_visible(inode->i_sb->s_iflags)) {
+		if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
+			return true;
+	} else if (kfsuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER)) {
 		return true;
+	}
 	return false;
 }
 EXPORT_SYMBOL(inode_owner_or_capable);
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 18/28] capability: privileged_wrt_inode_uidgid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (16 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 17/28] inode: inode_owner_or_capable(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 19/28] stat: " Christian Brauner
                   ` (12 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch privileged_wrt_inode_uidgid() to lookup fsids in the fsid mappings. If
no fsid mappings are setup the behavior is unchanged, i.e. fsids are looked up
in the id mappings.
Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 kernel/capability.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/kernel/capability.c b/kernel/capability.c
index 1444f3954d75..2b0c1dc992e2 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -19,6 +19,8 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/uaccess.h>
+#include <linux/fsuidgid.h>
+#include <linux/fs.h>
 
 /*
  * Leveraged for setting/resetting capabilities
@@ -486,8 +488,12 @@ EXPORT_SYMBOL(file_ns_capable);
  */
 bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
 {
-	return kuid_has_mapping(ns, inode->i_uid) &&
-		kgid_has_mapping(ns, inode->i_gid);
+	if (is_userns_visible(inode->i_sb->s_iflags))
+		return kuid_has_mapping(ns, inode->i_uid) &&
+		       kgid_has_mapping(ns, inode->i_gid);
+
+	return kfsuid_has_mapping(ns, inode->i_uid) &&
+	       kfsgid_has_mapping(ns, inode->i_gid);
 }
 
 /**
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 19/28] stat: handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (17 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 18/28] capability: privileged_wrt_inode_uidgid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 19:03   ` Tycho Andersen
  2020-02-14 18:35 ` [PATCH v2 20/28] open: " Christian Brauner
                   ` (11 subsequent siblings)
  30 siblings, 1 reply; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch attribute functions looking up fsids to them up in the fsid mappings. If
no fsid mappings are setup the behavior is unchanged, i.e. fsids are looked up
in the id mappings.
Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/stat.c            | 48 +++++++++++++++++++++++++++++++++++---------
 include/linux/stat.h |  1 +
 2 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/fs/stat.c b/fs/stat.c
index 030008796479..edd45678c4ed 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/file.h>
 #include <linux/highuid.h>
+#include <linux/fsuidgid.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/security.h>
@@ -79,6 +80,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 	if (IS_AUTOMOUNT(inode))
 		stat->attributes |= STATX_ATTR_AUTOMOUNT;
 
+	stat->userns_visible = is_userns_visible(inode->i_sb->s_iflags);
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(path, stat, request_mask,
 					    query_flags);
@@ -239,8 +242,13 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 	tmp.st_nlink = stat->nlink;
 	if (tmp.st_nlink != stat->nlink)
 		return -EOVERFLOW;
-	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
-	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	if (stat->userns_visible) {
+		SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	} else {
+		SET_UID(tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid));
+	}
 	tmp.st_rdev = old_encode_dev(stat->rdev);
 #if BITS_PER_LONG == 32
 	if (stat->size > MAX_NON_LFS)
@@ -327,8 +335,13 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 	tmp.st_nlink = stat->nlink;
 	if (tmp.st_nlink != stat->nlink)
 		return -EOVERFLOW;
-	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
-	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	if (stat->userns_visible) {
+		SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	} else {
+		SET_UID(tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid));
+	}
 	tmp.st_rdev = encode_dev(stat->rdev);
 	tmp.st_size = stat->size;
 	tmp.st_atime = stat->atime.tv_sec;
@@ -471,8 +484,13 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
 #endif
 	tmp.st_mode = stat->mode;
 	tmp.st_nlink = stat->nlink;
-	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
-	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	if (stat->userns_visible) {
+		tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid);
+		tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid);
+	} else {
+		tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid);
+		tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid);
+	}
 	tmp.st_atime = stat->atime.tv_sec;
 	tmp.st_atime_nsec = stat->atime.tv_nsec;
 	tmp.st_mtime = stat->mtime.tv_sec;
@@ -544,8 +562,13 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_blksize = stat->blksize;
 	tmp.stx_attributes = stat->attributes;
 	tmp.stx_nlink = stat->nlink;
-	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
-	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	if (stat->userns_visible) {
+		tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
+		tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	} else {
+		tmp.stx_uid = from_kfsuid_munged(current_user_ns(), stat->uid);
+		tmp.stx_gid = from_kfsgid_munged(current_user_ns(), stat->gid);
+	}
 	tmp.stx_mode = stat->mode;
 	tmp.stx_ino = stat->ino;
 	tmp.stx_size = stat->size;
@@ -615,8 +638,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
 	tmp.st_nlink = stat->nlink;
 	if (tmp.st_nlink != stat->nlink)
 		return -EOVERFLOW;
-	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
-	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	if (stat->userns_visible) {
+		SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	} else {
+		SET_UID(tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid));
+	}
 	tmp.st_rdev = old_encode_dev(stat->rdev);
 	if ((u64) stat->size > MAX_NON_LFS)
 		return -EOVERFLOW;
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 528c4baad091..e6d4ba73a970 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -47,6 +47,7 @@ struct kstat {
 	struct timespec64 ctime;
 	struct timespec64 btime;			/* File creation time */
 	u64		blocks;
+	bool		userns_visible;
 };
 
 #endif
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * Re: [PATCH v2 19/28] stat: handle fsid mappings
  2020-02-14 18:35 ` [PATCH v2 19/28] stat: " Christian Brauner
@ 2020-02-14 19:03   ` Tycho Andersen
  2020-02-16 14:12     ` Christian Brauner
  0 siblings, 1 reply; 43+ messages in thread
From: Tycho Andersen @ 2020-02-14 19:03 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn,
	Kees Cook, Jonathan Corbet, linux-kernel, containers, smbarber,
	Seth Forshee, linux-security-module, Alexander Viro, linux-api,
	linux-fsdevel, Alexey Dobriyan
On Fri, Feb 14, 2020 at 07:35:45PM +0100, Christian Brauner wrote:
> @@ -471,8 +484,13 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
>  #endif
>  	tmp.st_mode = stat->mode;
>  	tmp.st_nlink = stat->nlink;
> -	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
> -	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
> +	if (stat->userns_visible) {
> +		tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid);
> +		tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid);
> +	} else {
> +		tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid);
> +		tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid);
> +	}
I suppose this should be = ?
Tycho
^ permalink raw reply	[flat|nested] 43+ messages in thread
- * Re: [PATCH v2 19/28] stat: handle fsid mappings
  2020-02-14 19:03   ` Tycho Andersen
@ 2020-02-16 14:12     ` Christian Brauner
  0 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-16 14:12 UTC (permalink / raw)
  To: Tycho Andersen
  Cc: linux-security-module, Kees Cook, Jonathan Corbet,
	Alexey Dobriyan, linux-api, containers, Jann Horn, linux-kernel,
	smbarber, Seth Forshee, Eric W. Biederman, linux-fsdevel,
	Alexander Viro
On Fri, Feb 14, 2020 at 12:03:14PM -0700, Tycho Andersen wrote:
> On Fri, Feb 14, 2020 at 07:35:45PM +0100, Christian Brauner wrote:
> > @@ -471,8 +484,13 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
> >  #endif
> >  	tmp.st_mode = stat->mode;
> >  	tmp.st_nlink = stat->nlink;
> > -	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
> > -	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
> > +	if (stat->userns_visible) {
> > +		tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid);
> > +		tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid);
> > +	} else {
> > +		tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid);
> > +		tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid);
> > +	}
> 
> I suppose this should be = ?
Good catch. I thought I had eliminated all those by doing automated
conversion but apparently not. :)
Christian
^ permalink raw reply	[flat|nested] 43+ messages in thread
 
 
- * [PATCH v2 20/28] open: handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (18 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 19/28] stat: " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 21/28] posix_acl: " Christian Brauner
                   ` (10 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Let chown_common() lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
do_faccessat() just needs to translate from real ids into fsids.
Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - handle faccessat() too
---
 fs/open.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/open.c b/fs/open.c
index 0788b3715731..4e092845728f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -32,6 +32,7 @@
 #include <linux/ima.h>
 #include <linux/dnotify.h>
 #include <linux/compat.h>
+#include <linux/fsuidgid.h>
 
 #include "internal.h"
 
@@ -361,8 +362,10 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
 	if (!override_cred)
 		return -ENOMEM;
 
-	override_cred->fsuid = override_cred->uid;
-	override_cred->fsgid = override_cred->gid;
+	override_cred->kfsuid = override_cred->uid;
+	override_cred->kfsgid = override_cred->gid;
+	override_cred->fsuid = kuid_to_kfsuid(override_cred->user_ns, override_cred->uid);
+	override_cred->fsgid = kgid_to_kfsgid(override_cred->user_ns, override_cred->gid);
 
 	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 		/* Clear the capabilities if we switch to a non-root user */
@@ -626,8 +629,13 @@ static int chown_common(const struct path *path, uid_t user, gid_t group)
 	kuid_t uid;
 	kgid_t gid;
 
-	uid = make_kuid(current_user_ns(), user);
-	gid = make_kgid(current_user_ns(), group);
+	if (is_userns_visible(inode->i_sb->s_iflags)) {
+		uid = make_kuid(current_user_ns(), user);
+		gid = make_kgid(current_user_ns(), group);
+	} else {
+		uid = make_kfsuid(current_user_ns(), user);
+		gid = make_kfsgid(current_user_ns(), group);
+	}
 
 retry_deleg:
 	newattrs.ia_valid =  ATTR_CTIME;
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 21/28] posix_acl: handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (19 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 20/28] open: " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 22/28] attr: notify_change(): " Christian Brauner
                   ` (9 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch posix_acls() to lookup fsids in the fsid mappings. If no fsid
mappings are setup the behavior is unchanged, i.e. fsids are looked up in the
id mappings.
Afaict, all filesystems that share a superblock in all user namespaces
currently do not support acls so this change should be safe to do
unconditionally.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/posix_acl.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 249672bf54fe..763bba24f380 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,6 +22,7 @@
 #include <linux/xattr.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/fsuidgid.h>
 
 static struct posix_acl **acl_by_type(struct inode *inode, int type)
 {
@@ -692,12 +693,12 @@ static void posix_acl_fix_xattr_userns(
 	for (end = entry + count; entry != end; entry++) {
 		switch(le16_to_cpu(entry->e_tag)) {
 		case ACL_USER:
-			uid = make_kuid(from, le32_to_cpu(entry->e_id));
-			entry->e_id = cpu_to_le32(from_kuid(to, uid));
+			uid = make_kfsuid(from, le32_to_cpu(entry->e_id));
+			entry->e_id = cpu_to_le32(from_kfsuid(to, uid));
 			break;
 		case ACL_GROUP:
-			gid = make_kgid(from, le32_to_cpu(entry->e_id));
-			entry->e_id = cpu_to_le32(from_kgid(to, gid));
+			gid = make_kfsgid(from, le32_to_cpu(entry->e_id));
+			entry->e_id = cpu_to_le32(from_kfsgid(to, gid));
 			break;
 		default:
 			break;
@@ -746,12 +747,12 @@ posix_acl_from_xattr(struct user_namespace *user_ns,
 		return ERR_PTR(-EINVAL);
 	if (count == 0)
 		return NULL;
-	
+
 	acl = posix_acl_alloc(count, GFP_NOFS);
 	if (!acl)
 		return ERR_PTR(-ENOMEM);
 	acl_e = acl->a_entries;
-	
+
 	for (end = entry + count; entry != end; acl_e++, entry++) {
 		acl_e->e_tag  = le16_to_cpu(entry->e_tag);
 		acl_e->e_perm = le16_to_cpu(entry->e_perm);
@@ -765,14 +766,14 @@ posix_acl_from_xattr(struct user_namespace *user_ns,
 
 			case ACL_USER:
 				acl_e->e_uid =
-					make_kuid(user_ns,
+					make_kfsuid(user_ns,
 						  le32_to_cpu(entry->e_id));
 				if (!uid_valid(acl_e->e_uid))
 					goto fail;
 				break;
 			case ACL_GROUP:
 				acl_e->e_gid =
-					make_kgid(user_ns,
+					make_kfsgid(user_ns,
 						  le32_to_cpu(entry->e_id));
 				if (!gid_valid(acl_e->e_gid))
 					goto fail;
@@ -817,11 +818,11 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
 		switch(acl_e->e_tag) {
 		case ACL_USER:
 			ext_entry->e_id =
-				cpu_to_le32(from_kuid(user_ns, acl_e->e_uid));
+				cpu_to_le32(from_kfsuid(user_ns, acl_e->e_uid));
 			break;
 		case ACL_GROUP:
 			ext_entry->e_id =
-				cpu_to_le32(from_kgid(user_ns, acl_e->e_gid));
+				cpu_to_le32(from_kfsgid(user_ns, acl_e->e_gid));
 			break;
 		default:
 			ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 22/28] attr: notify_change(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (20 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 21/28] posix_acl: " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 23/28] commoncap: cap_bprm_set_creds(): " Christian Brauner
                   ` (8 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch notify_change() to lookup fsids in the fsid mappings. If no fsid
mappings are setup the behavior is unchanged, i.e. fsids are looked up in the
id mappings.
Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/attr.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/fs/attr.c b/fs/attr.c
index b4bbdbd4c8ca..b3fe9d9582d2 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -17,6 +17,8 @@
 #include <linux/security.h>
 #include <linux/evm.h>
 #include <linux/ima.h>
+#include <linux/fsuidgid.h>
+#include <linux/fs.h>
 
 static bool chown_ok(const struct inode *inode, kuid_t uid)
 {
@@ -310,12 +312,21 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
 	 * Verify that uid/gid changes are valid in the target
 	 * namespace of the superblock.
 	 */
-	if (ia_valid & ATTR_UID &&
-	    !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
-		return -EOVERFLOW;
-	if (ia_valid & ATTR_GID &&
-	    !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
-		return -EOVERFLOW;
+	if (is_userns_visible(inode->i_sb->s_iflags)) {
+		if (ia_valid & ATTR_UID &&
+		    !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
+			return -EOVERFLOW;
+		if (ia_valid & ATTR_GID &&
+		    !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
+			return -EOVERFLOW;
+	} else {
+		if (ia_valid & ATTR_UID &&
+		    !kfsuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
+			return -EOVERFLOW;
+		if (ia_valid & ATTR_GID &&
+		    !kfsgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
+			return -EOVERFLOW;
+	}
 
 	/* Don't allow modifications of files with invalid uids or
 	 * gids unless those uids & gids are being made valid.
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 23/28] commoncap: cap_bprm_set_creds(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (21 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 22/28] attr: notify_change(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 24/28] commoncap: cap_task_fix_setuid(): " Christian Brauner
                   ` (7 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
During exec the kfsids are currently reset to the effective kids. To retain the
same semantics with the introduction of fsid mappings, we lookup the userspace
effective id in the id mappings and translate the effective id into the
corresponding kfsid in the fsidmapping. This means, the behavior is unchanged
when no fsid mappings are setup and the semantics stay the same even when fsid
mappings are setup.
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Reset kfsids used for userns visible filesystems such as proc too.
---
 security/commoncap.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/security/commoncap.c b/security/commoncap.c
index f4ee0ae106b2..9641695d8383 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -810,7 +810,10 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
 	struct cred *new = bprm->cred;
 	bool effective = false, has_fcap = false, is_setid;
 	int ret;
-	kuid_t root_uid;
+	kuid_t root_uid, kfsuid;
+	kgid_t kfsgid;
+	uid_t fsuid;
+	gid_t fsgid;
 
 	if (WARN_ON(!cap_ambient_invariant_ok(old)))
 		return -EPERM;
@@ -847,8 +850,15 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
 						   old->cap_permitted);
 	}
 
-	new->suid = new->fsuid = new->euid;
-	new->sgid = new->fsgid = new->egid;
+	fsuid = from_kuid_munged(new->user_ns, new->euid);
+	kfsuid = make_kfsuid(new->user_ns, fsuid);
+	new->suid = new->kfsuid = new->euid;
+	new->fsuid = kfsuid;
+
+	fsgid = from_kgid_munged(new->user_ns, new->egid);
+	kfsgid = make_kfsgid(new->user_ns, fsgid);
+	new->sgid = new->kfsgid = new->egid;
+	new->fsgid = kfsgid;
 
 	/* File caps or setid cancels ambient. */
 	if (has_fcap || is_setid)
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 24/28] commoncap: cap_task_fix_setuid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (22 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 23/28] commoncap: cap_bprm_set_creds(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 25/28] commoncap: handle fsid mappings with vfs caps Christian Brauner
                   ` (6 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Switch cap_task_fix_setuid() to lookup fsids in the fsid mappings. If no fsid
mappings are setup the behavior is unchanged, i.e. fsids are looked up in the
id mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 security/commoncap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/security/commoncap.c b/security/commoncap.c
index 9641695d8383..0581c6aa8bdc 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -24,6 +24,7 @@
 #include <linux/user_namespace.h>
 #include <linux/binfmts.h>
 #include <linux/personality.h>
+#include <linux/fsuidgid.h>
 
 /*
  * If a non-root user executes a setuid-root binary in
@@ -1061,7 +1062,7 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
 		 *          if not, we might be a bit too harsh here.
 		 */
 		if (!issecure(SECURE_NO_SETUID_FIXUP)) {
-			kuid_t root_uid = make_kuid(old->user_ns, 0);
+			kuid_t root_uid = make_kfsuid(old->user_ns, 0);
 			if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
 				new->cap_effective =
 					cap_drop_fs_set(new->cap_effective);
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 25/28] commoncap: handle fsid mappings with vfs caps
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (23 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 24/28] commoncap: cap_task_fix_setuid(): " Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 26/28] exec: bprm_fill_uid(): handle fsid mappings Christian Brauner
                   ` (5 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 security/commoncap.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/security/commoncap.c b/security/commoncap.c
index 0581c6aa8bdc..d2259dc0450b 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -328,7 +328,7 @@ static bool rootid_owns_currentns(kuid_t kroot)
 		return false;
 
 	for (ns = current_user_ns(); ; ns = ns->parent) {
-		if (from_kuid(ns, kroot) == 0)
+		if (from_kfsuid(ns, kroot) == 0)
 			return true;
 		if (ns == &init_user_ns)
 			break;
@@ -411,11 +411,11 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
 
 	nscap = (struct vfs_ns_cap_data *) tmpbuf;
 	root = le32_to_cpu(nscap->rootid);
-	kroot = make_kuid(fs_ns, root);
+	kroot = make_kfsuid(fs_ns, root);
 
-	/* If the root kuid maps to a valid uid in current ns, then return
+	/* If the root kfsuid maps to a valid uid in current ns, then return
 	 * this as a nscap. */
-	mappedroot = from_kuid(current_user_ns(), kroot);
+	mappedroot = from_kfsuid(current_user_ns(), kroot);
 	if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
 		if (alloc) {
 			*buffer = tmpbuf;
@@ -460,7 +460,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size,
 	if (size == XATTR_CAPS_SZ_3)
 		rootid = le32_to_cpu(nscap->rootid);
 
-	return make_kuid(task_ns, rootid);
+	return make_kfsuid(task_ns, rootid);
 }
 
 static bool validheader(size_t size, const struct vfs_cap_data *cap)
@@ -501,7 +501,7 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
 	if (!uid_valid(rootid))
 		return -EINVAL;
 
-	nsrootid = from_kuid(fs_ns, rootid);
+	nsrootid = from_kfsuid(fs_ns, rootid);
 	if (nsrootid == -1)
 		return -EINVAL;
 
@@ -600,7 +600,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 
 	cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
 
-	rootkuid = make_kuid(fs_ns, 0);
+	rootkuid = make_kfsuid(fs_ns, 0);
 	switch (magic_etc & VFS_CAP_REVISION_MASK) {
 	case VFS_CAP_REVISION_1:
 		if (size != XATTR_CAPS_SZ_1)
@@ -616,7 +616,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 		if (size != XATTR_CAPS_SZ_3)
 			return -EINVAL;
 		tocopy = VFS_CAP_U32_3;
-		rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
+		rootkuid = make_kfsuid(fs_ns, le32_to_cpu(nscaps->rootid));
 		break;
 
 	default:
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 26/28] exec: bprm_fill_uid(): handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (24 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 25/28] commoncap: handle fsid mappings with vfs caps Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 27/28] ptrace: adapt ptrace_may_access() to always uses unmapped fsids Christian Brauner
                   ` (4 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
Make sure that during suid/sgid binary execution we lookup the fsids in the
fsid mappings. If the kernel is compiled without fsid mappings or now fsid
mappings are setup the behavior is unchanged.
Assuming we have a binary in a given user namespace that is owned by 0:0 in the
given user namespace which appears as 300000:300000 on-disk in the initial user
namespace. Now assume we write an id mapping of 0 100000 100000 and an fsid
mapping for 0 300000 300000 in the user namespace. When we hit bprm_fill_uid()
during setid execution we will retrieve inode kuid=100000 and kgid=1000000. We
first check whether there's an fsid mapping for these kids. In our scenario we
find that they map to fsuid=0 and fsgid=0 in the user namespace. Now we
translate them into kids in the id mapping. In our example they translate to
kuid=100000 and kgid=100000 which means the file will ultimately run as uid=0
and gid=0 in the user namespace and as uid=100000, gid=100000 in the initial
user namespace.
Let's alter the example and assume that there is an fsid mapping of 0 300000
300000 set up but no id mapping has been setup for the user namespace. In this
the last step of translating into a valid kid pair in the id mappings will fail
and we will behave as before and ignore the sid bits.
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
patch added
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Make sure that bprm_fill_uid() handles fsid mappings.
---
 fs/exec.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index db17be51b112..9e4a7e757cef 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,6 +62,7 @@
 #include <linux/oom.h>
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
+#include <linux/fsuidgid.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1518,8 +1519,8 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
 {
 	struct inode *inode;
 	unsigned int mode;
-	kuid_t uid;
-	kgid_t gid;
+	kuid_t uid, euid;
+	kgid_t gid, egid;
 
 	/*
 	 * Since this can be called multiple times (via prepare_binprm),
@@ -1551,18 +1552,30 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
 	inode_unlock(inode);
 
 	/* We ignore suid/sgid if there are no mappings for them in the ns */
-	if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
-		 !kgid_has_mapping(bprm->cred->user_ns, gid))
+	if (!kfsuid_has_mapping(bprm->cred->user_ns, uid) ||
+		 !kfsgid_has_mapping(bprm->cred->user_ns, gid))
 		return;
 
+	if (mode & S_ISUID) {
+		euid = kfsuid_to_kuid(bprm->cred->user_ns, uid);
+		if (!uid_valid(euid))
+			return;
+	}
+
+	if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+		egid = kfsgid_to_kgid(bprm->cred->user_ns, gid);
+		if (!gid_valid(egid))
+			return;
+	}
+
 	if (mode & S_ISUID) {
 		bprm->per_clear |= PER_CLEAR_ON_SETID;
-		bprm->cred->euid = uid;
+		bprm->cred->euid = euid;
 	}
 
 	if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
 		bprm->per_clear |= PER_CLEAR_ON_SETID;
-		bprm->cred->egid = gid;
+		bprm->cred->egid = egid;
 	}
 }
 
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 27/28] ptrace: adapt ptrace_may_access() to always uses unmapped fsids
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (25 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 26/28] exec: bprm_fill_uid(): handle fsid mappings Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-14 18:35 ` [PATCH v2 28/28] devpts: handle fsid mappings Christian Brauner
                   ` (3 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
ptrace_may_access() with PTRACE_MODE_FSCREDS is only used with proc and proc
wants to use the unmapped fsids.
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
patch added
---
 kernel/ptrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 43d6179508d6..3734713cc0dd 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -304,8 +304,8 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 		return 0;
 	rcu_read_lock();
 	if (mode & PTRACE_MODE_FSCREDS) {
-		caller_uid = cred->fsuid;
-		caller_gid = cred->fsgid;
+		caller_uid = cred->kfsuid;
+		caller_gid = cred->kfsgid;
 	} else {
 		/*
 		 * Using the euid would make more sense here, but something
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * [PATCH v2 28/28] devpts: handle fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (26 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 27/28] ptrace: adapt ptrace_may_access() to always uses unmapped fsids Christian Brauner
@ 2020-02-14 18:35 ` Christian Brauner
  2020-02-16 15:55 ` [PATCH v2 00/28] user_namespace: introduce " Florian Weimer
                   ` (2 subsequent siblings)
  30 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
When a uid or gid mount option is specified with devpts have it lookup the
corresponding kfsids in the fsid mappings. If no fsid mappings are setup the
behavior is unchanged, i.e. fsids are looked up in the id mappings.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/devpts/inode.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 42e5a766d33c..139958892572 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -24,6 +24,7 @@
 #include <linux/parser.h>
 #include <linux/fsnotify.h>
 #include <linux/seq_file.h>
+#include <linux/fsuidgid.h>
 
 #define DEVPTS_DEFAULT_MODE 0600
 /*
@@ -277,7 +278,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			uid = make_kuid(current_user_ns(), option);
+			uid = make_kfsuid(current_user_ns(), option);
 			if (!uid_valid(uid))
 				return -EINVAL;
 			opts->uid = uid;
@@ -286,7 +287,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			gid = make_kgid(current_user_ns(), option);
+			gid = make_kfsgid(current_user_ns(), option);
 			if (!gid_valid(gid))
 				return -EINVAL;
 			opts->gid = gid;
@@ -410,7 +411,7 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
 			   from_kuid_munged(&init_user_ns, opts->uid));
 	if (opts->setgid)
 		seq_printf(seq, ",gid=%u",
-			   from_kgid_munged(&init_user_ns, opts->gid));
+			   from_kfsgid_munged(&init_user_ns, opts->gid));
 	seq_printf(seq, ",mode=%03o", opts->mode);
 	seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
 	if (opts->max < NR_UNIX98_PTY_MAX)
-- 
2.25.0
^ permalink raw reply related	[flat|nested] 43+ messages in thread
- * Re: [PATCH v2 00/28] user_namespace: introduce fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (27 preceding siblings ...)
  2020-02-14 18:35 ` [PATCH v2 28/28] devpts: handle fsid mappings Christian Brauner
@ 2020-02-16 15:55 ` Florian Weimer
  2020-02-16 16:40   ` Christian Brauner
  2020-02-17 21:06 ` James Bottomley
  2020-02-17 21:11 ` James Bottomley
  30 siblings, 1 reply; 43+ messages in thread
From: Florian Weimer @ 2020-02-16 15:55 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn,
	smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api
* Christian Brauner:
> With fsid mappings we can solve this by writing an id mapping of 0
> 100000 100000 and an fsid mapping of 0 300000 100000. On filesystem
> access the kernel will now lookup the mapping for 300000 in the fsid
> mapping tables of the user namespace. And since such a mapping exists,
> the corresponding files will have correct ownership.
I'm worried that this is a bit of a management nightmare because the
data about the mapping does not live within the file system (it's
externally determined, static, but crucial to the interpretation of
file system content).  I expect that many organizations have
centralized allocation of user IDs, but centralized allocation of the
static mapping does not appear feasible.
Have you considered a more complex design, where untranslated nested
user IDs are store in a file attribute (or something like that)?  This
way, any existing user ID infrastructure can be carried over largely
unchanged.
^ permalink raw reply	[flat|nested] 43+ messages in thread
- * Re: [PATCH v2 00/28] user_namespace: introduce fsid mappings
  2020-02-16 15:55 ` [PATCH v2 00/28] user_namespace: introduce " Florian Weimer
@ 2020-02-16 16:40   ` Christian Brauner
  0 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-16 16:40 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn,
	smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api
On Sun, Feb 16, 2020 at 04:55:49PM +0100, Florian Weimer wrote:
> * Christian Brauner:
> 
> > With fsid mappings we can solve this by writing an id mapping of 0
> > 100000 100000 and an fsid mapping of 0 300000 100000. On filesystem
> > access the kernel will now lookup the mapping for 300000 in the fsid
> > mapping tables of the user namespace. And since such a mapping exists,
> > the corresponding files will have correct ownership.
> 
> I'm worried that this is a bit of a management nightmare because the
> data about the mapping does not live within the file system (it's
> externally determined, static, but crucial to the interpretation of
> file system content).  I expect that many organizations have
Iiuc, that's already the case with user namespaces right now e.g. when
you have an on-disk mapping that doesn't match your user namespace
mapping.
> centralized allocation of user IDs, but centralized allocation of the
> static mapping does not appear feasible.
I thought we're working on this right now with the new nss
infrastructure to register id mappings aka the shadow discussion we've
been having.
> 
> Have you considered a more complex design, where untranslated nested
> user IDs are store in a file attribute (or something like that)?  This
That doesn't sound like it would be feasible especially in the nesting
case wrt. to performance.
Christian
^ permalink raw reply	[flat|nested] 43+ messages in thread 
 
- * Re: [PATCH v2 00/28] user_namespace: introduce fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (28 preceding siblings ...)
  2020-02-16 15:55 ` [PATCH v2 00/28] user_namespace: introduce " Florian Weimer
@ 2020-02-17 21:06 ` James Bottomley
  2020-02-17 21:20   ` Christian Brauner
  2020-02-17 21:11 ` James Bottomley
  30 siblings, 1 reply; 43+ messages in thread
From: James Bottomley @ 2020-02-17 21:06 UTC (permalink / raw)
  To: Christian Brauner, Stéphane Graber, Eric W. Biederman,
	Aleksa Sarai, Jann Horn
  Cc: Kees Cook, Jonathan Corbet, linux-kernel, containers, smbarber,
	Seth Forshee, linux-security-module, Alexander Viro, linux-api,
	linux-fsdevel, Alexey Dobriyan
On Fri, 2020-02-14 at 19:35 +0100, Christian Brauner wrote:
[...]
> People not as familiar with user namespaces might not be aware that
> fsid mappings already exist. Right now, fsid mappings are always
> identical to id mappings. Specifically, the kernel will lookup fsuids
> in the uid mappings and fsgids in the gid mappings of the relevant
> user namespace.
This isn't actually entirely true: today we have the superblock user
namespace, which can be used for fsid remapping on filesystems that
support it (currently f2fs and fuse).  Since this is a single shift,
how is it going to play with s_user_ns?  Do you have to understand the
superblock mapping to use this shift, or are we simply using this to
replace s_user_ns?
James
^ permalink raw reply	[flat|nested] 43+ messages in thread
- * Re: [PATCH v2 00/28] user_namespace: introduce fsid mappings
  2020-02-17 21:06 ` James Bottomley
@ 2020-02-17 21:20   ` Christian Brauner
  2020-02-17 22:35     ` James Bottomley
  0 siblings, 1 reply; 43+ messages in thread
From: Christian Brauner @ 2020-02-17 21:20 UTC (permalink / raw)
  To: James Bottomley
  Cc: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn,
	Kees Cook, Jonathan Corbet, linux-kernel, containers, smbarber,
	Seth Forshee, linux-security-module, Alexander Viro, linux-api,
	linux-fsdevel, Alexey Dobriyan
On Mon, Feb 17, 2020 at 01:06:08PM -0800, James Bottomley wrote:
> On Fri, 2020-02-14 at 19:35 +0100, Christian Brauner wrote:
> [...]
> > People not as familiar with user namespaces might not be aware that
> > fsid mappings already exist. Right now, fsid mappings are always
> > identical to id mappings. Specifically, the kernel will lookup fsuids
> > in the uid mappings and fsgids in the gid mappings of the relevant
> > user namespace.
> 
> This isn't actually entirely true: today we have the superblock user
> namespace, which can be used for fsid remapping on filesystems that
> support it (currently f2fs and fuse).  Since this is a single shift,
Note that this states "the relevant" user namespace not the caller's
user namespace. And the point is true even for such filesystems. fuse
does call make_kuid(fc->user_ns, attr->uid) and hence looks up the
mapping in the id mappings.. This would be replaced by make_kfsuid().
> how is it going to play with s_user_ns?  Do you have to understand the
> superblock mapping to use this shift, or are we simply using this to
> replace s_user_ns?
I'm not sure what you mean by understand the superblock mapping. The
case is not different from the devpts patch in this series.
Fuse needs to be changed to call make_kfsuid() since it is mountable
inside user namespaces at which point everthing just works.
^ permalink raw reply	[flat|nested] 43+ messages in thread 
- * Re: [PATCH v2 00/28] user_namespace: introduce fsid mappings
  2020-02-17 21:20   ` Christian Brauner
@ 2020-02-17 22:35     ` James Bottomley
  2020-02-17 23:05       ` Christian Brauner
  0 siblings, 1 reply; 43+ messages in thread
From: James Bottomley @ 2020-02-17 22:35 UTC (permalink / raw)
  To: Christian Brauner
  Cc: linux-security-module, Kees Cook, Jonathan Corbet,
	Alexey Dobriyan, linux-api, containers, Jann Horn, linux-kernel,
	smbarber, Seth Forshee, Eric W. Biederman, linux-fsdevel,
	Alexander Viro
On Mon, 2020-02-17 at 22:20 +0100, Christian Brauner wrote:
> On Mon, Feb 17, 2020 at 01:06:08PM -0800, James Bottomley wrote:
> > On Fri, 2020-02-14 at 19:35 +0100, Christian Brauner wrote:
> > [...]
> > > People not as familiar with user namespaces might not be aware
> > > that fsid mappings already exist. Right now, fsid mappings are
> > > always identical to id mappings. Specifically, the kernel will
> > > lookup fsuids in the uid mappings and fsgids in the gid mappings
> > > of the relevant user namespace.
> > 
> > This isn't actually entirely true: today we have the superblock
> > user namespace, which can be used for fsid remapping on filesystems
> > that support it (currently f2fs and fuse).  Since this is a single
> > shift,
> 
> Note that this states "the relevant" user namespace not the caller's
> user namespace. And the point is true even for such filesystems. fuse
> does call make_kuid(fc->user_ns, attr->uid) and hence looks up the
> mapping in the id mappings.. This would be replaced by make_kfsuid().
> 
> > how is it going to play with s_user_ns?  Do you have to understand
> > the superblock mapping to use this shift, or are we simply using
> > this to replace s_user_ns?
> 
> I'm not sure what you mean by understand the superblock mapping. The
> case is not different from the devpts patch in this series.
So since devpts wasn't originally a s_user_ns consumer, I assume you're
thinking that this patch series just replaces the whole of s_user_ns
for fuse and f2fs and we can remove it?
> Fuse needs to be changed to call make_kfsuid() since it is mountable
> inside user namespaces at which point everthing just works.
The fuse case is slightly more complicated because there are sound
reasons to run the daemon in a separate user namespace regardless of
where the end fuse mount is.
James
^ permalink raw reply	[flat|nested] 43+ messages in thread 
- * Re: [PATCH v2 00/28] user_namespace: introduce fsid mappings
  2020-02-17 22:35     ` James Bottomley
@ 2020-02-17 23:05       ` Christian Brauner
  0 siblings, 0 replies; 43+ messages in thread
From: Christian Brauner @ 2020-02-17 23:05 UTC (permalink / raw)
  To: James Bottomley
  Cc: linux-security-module, Kees Cook, Jonathan Corbet,
	Alexey Dobriyan, linux-api, containers, Jann Horn, linux-kernel,
	smbarber, Seth Forshee, Eric W. Biederman, linux-fsdevel,
	Alexander Viro
On Mon, Feb 17, 2020 at 02:35:38PM -0800, James Bottomley wrote:
> On Mon, 2020-02-17 at 22:20 +0100, Christian Brauner wrote:
> > On Mon, Feb 17, 2020 at 01:06:08PM -0800, James Bottomley wrote:
> > > On Fri, 2020-02-14 at 19:35 +0100, Christian Brauner wrote:
> > > [...]
> > > > People not as familiar with user namespaces might not be aware
> > > > that fsid mappings already exist. Right now, fsid mappings are
> > > > always identical to id mappings. Specifically, the kernel will
> > > > lookup fsuids in the uid mappings and fsgids in the gid mappings
> > > > of the relevant user namespace.
> > > 
> > > This isn't actually entirely true: today we have the superblock
> > > user namespace, which can be used for fsid remapping on filesystems
> > > that support it (currently f2fs and fuse).  Since this is a single
> > > shift,
> > 
> > Note that this states "the relevant" user namespace not the caller's
> > user namespace. And the point is true even for such filesystems. fuse
> > does call make_kuid(fc->user_ns, attr->uid) and hence looks up the
> > mapping in the id mappings.. This would be replaced by make_kfsuid().
> > 
> > > how is it going to play with s_user_ns?  Do you have to understand
> > > the superblock mapping to use this shift, or are we simply using
> > > this to replace s_user_ns?
> > 
> > I'm not sure what you mean by understand the superblock mapping. The
> > case is not different from the devpts patch in this series.
> 
> So since devpts wasn't originally a s_user_ns consumer, I assume you're
> thinking that this patch series just replaces the whole of s_user_ns
> for fuse and f2fs and we can remove it?
No, as I said it's just about replacing make_kuid() with make_kfsuid().
This doesn't change anything for all cases where id mappings equal fsid
mappings and if there are separate id mappings it will look at the fsid
mappings for the user namespace in struct fuse_conn.
> 
> > Fuse needs to be changed to call make_kfsuid() since it is mountable
> > inside user namespaces at which point everthing just works.
> 
> The fuse case is slightly more complicated because there are sound
> reasons to run the daemon in a separate user namespace regardless of
> where the end fuse mount is.
I'm curious how you're doing that today as it's usually
tricky to mount across mount namespaces? In any case, this patchset
doesn't change any of that fuse logic, so thing will keep working as
they do today.
^ permalink raw reply	[flat|nested] 43+ messages in thread 
 
 
 
- * Re: [PATCH v2 00/28] user_namespace: introduce fsid mappings
  2020-02-14 18:35 [PATCH v2 00/28] user_namespace: introduce fsid mappings Christian Brauner
                   ` (29 preceding siblings ...)
  2020-02-17 21:06 ` James Bottomley
@ 2020-02-17 21:11 ` James Bottomley
       [not found]   ` <CA+enf=vwd-dxzve87t7Mw1Z35RZqdLzVaKq=fZ4EGOpnES0f5w@mail.gmail.com>
  30 siblings, 1 reply; 43+ messages in thread
From: James Bottomley @ 2020-02-17 21:11 UTC (permalink / raw)
  To: Christian Brauner, Stéphane Graber, Eric W. Biederman,
	Aleksa Sarai, Jann Horn
  Cc: Kees Cook, Jonathan Corbet, linux-kernel, containers, smbarber,
	Seth Forshee, linux-security-module, Alexander Viro, linux-api,
	linux-fsdevel, Alexey Dobriyan
On Fri, 2020-02-14 at 19:35 +0100, Christian Brauner wrote:
[...]
> With this patch series we simply introduce the ability to create fsid
> mappings that are different from the id mappings of a user namespace.
> The whole feature set is placed under a config option that defaults
> to false.
> 
> In the usual case of running an unprivileged container we will have
> setup an id mapping, e.g. 0 100000 100000. The on-disk mapping will
> correspond to this id mapping, i.e. all files which we want to appear
> as 0:0 inside the user namespace will be chowned to 100000:100000 on
> the host. This works, because whenever the kernel needs to do a
> filesystem access it will lookup the corresponding uid and gid in the
> idmapping tables of the container.
> Now think about the case where we want to have an id mapping of 0
> 100000 100000 but an on-disk mapping of 0 300000 100000 which is
> needed to e.g. share a single on-disk mapping with multiple
> containers that all have different id mappings.
> This will be problematic. Whenever a filesystem access is requested,
> the kernel will now try to lookup a mapping for 300000 in the id
> mapping tables of the user namespace but since there is none the
> files will appear to be owned by the overflow id, i.e. usually
> 65534:65534 or nobody:nogroup.
> 
> With fsid mappings we can solve this by writing an id mapping of 0
> 100000 100000 and an fsid mapping of 0 300000 100000. On filesystem
> access the kernel will now lookup the mapping for 300000 in the fsid
> mapping tables of the user namespace. And since such a mapping
> exists, the corresponding files will have correct ownership.
How do we parametrise this new fsid shift for the unprivileged use
case?  For newuidmap/newgidmap, it's easy because each user gets a
dedicated range and everything "just works (tm)".  However, for the
fsid mapping, assuming some newfsuid/newfsgid tool to help, that tool
has to know not only your allocated uid/gid chunk, but also the offset
map of the image.  The former is easy, but the latter is going to vary
by the actual image ... well unless we standardise some accepted shift
for images and it simply becomes a known static offset.
James
^ permalink raw reply	[flat|nested] 43+ messages in thread