Linux Security Modules development
 help / color / mirror / Atom feed
* [PATCH v19 09/12] arch: Wire up landlock() syscall
From: Mickaël Salaün @ 2020-07-07 18:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Al Viro, Andy Lutomirski, Anton Ivanov,
	Arnd Bergmann, Casey Schaufler, James Morris, Jann Horn,
	Jeff Dike, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Richard Weinberger, Serge E . Hallyn,
	Shuah Khan, Vincent Dagonneau, kernel-hardening, linux-api,
	linux-arch, linux-doc, linux-fsdevel, linux-kselftest,
	linux-security-module, x86
In-Reply-To: <20200707180955.53024-1-mic@digikod.net>

Wire up the landlock() system call for all architectures.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: James Morris <jmorris@namei.org>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v18:
* Increase the syscall number because of the new faccessat2(2).

Changes since v14:
* Add all architectures.

Changes since v13:
* New implementation.
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 1 +
 arch/arm/tools/syscall.tbl                  | 1 +
 arch/arm64/include/asm/unistd.h             | 2 +-
 arch/arm64/include/asm/unistd32.h           | 2 ++
 arch/ia64/kernel/syscalls/syscall.tbl       | 1 +
 arch/m68k/kernel/syscalls/syscall.tbl       | 1 +
 arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 1 +
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 1 +
 arch/parisc/kernel/syscalls/syscall.tbl     | 1 +
 arch/powerpc/kernel/syscalls/syscall.tbl    | 1 +
 arch/s390/kernel/syscalls/syscall.tbl       | 1 +
 arch/sh/kernel/syscalls/syscall.tbl         | 1 +
 arch/sparc/kernel/syscalls/syscall.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_32.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_64.tbl      | 1 +
 arch/xtensa/kernel/syscalls/syscall.tbl     | 1 +
 include/uapi/asm-generic/unistd.h           | 4 +++-
 19 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 5ddd128d4b7a..f11e690e0419 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -478,3 +478,4 @@
 547	common	openat2				sys_openat2
 548	common	pidfd_getfd			sys_pidfd_getfd
 549	common	faccessat2			sys_faccessat2
+550	common	landlock			sys_landlock
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index d5cae5ffede0..1ebac2411785 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -452,3 +452,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 3b859596840d..b3b2019f8d16 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		440
+#define __NR_compat_syscalls		441
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 6d95d0c8bf2f..083f7994d924 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -885,6 +885,8 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_landlock 440
+__SYSCALL(__NR_landlock, sys_landlock)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 49e325b604b3..210692084aff 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -359,3 +359,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index f71b1bbcc198..31ed52871053 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -438,3 +438,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index edacc4561f2b..56a1d49884a5 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f777141f5256..37457511334a 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -377,3 +377,4 @@
 437	n32	openat2				sys_openat2
 438	n32	pidfd_getfd			sys_pidfd_getfd
 439	n32	faccessat2			sys_faccessat2
+440	n32	landlock			sys_landlock
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index da8c76394e17..0c68b01f47d2 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -353,3 +353,4 @@
 437	n64	openat2				sys_openat2
 438	n64	pidfd_getfd			sys_pidfd_getfd
 439	n64	faccessat2			sys_faccessat2
+440	n64	landlock			sys_landlock
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 13280625d312..7e4ba2c41f8c 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -426,3 +426,4 @@
 437	o32	openat2				sys_openat2
 438	o32	pidfd_getfd			sys_pidfd_getfd
 439	o32	faccessat2			sys_faccessat2
+440	o32	landlock			sys_landlock
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5a758fa6ec52..40d14e1480cb 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -436,3 +436,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index f833a3190822..1615462665e1 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -528,3 +528,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bfdcb7633957..5f0835738e90 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -441,3 +441,4 @@
 437  common	openat2			sys_openat2			sys_openat2
 438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
 439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	landlock		sys_landlock			sys_landlock
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index acc35daa1b79..6e7414763f10 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -441,3 +441,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 8004a276cb74..b9996e1f70f0 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -484,3 +484,4 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d8f8a1a69ed1..1673c287f2e6 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -443,3 +443,4 @@
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
+440	i386	landlock		sys_landlock
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..b3bae66a2c7c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -360,6 +360,7 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	landlock		sys_landlock
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 69d0d73876b3..ea0f0b186e8e 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -409,3 +409,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	landlock			sys_landlock
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f4a01305d9a6..a63a411a74d5 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_landlock 440
+__SYSCALL(__NR_landlock, sys_landlock)
 
 #undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
 
 /*
  * 32 bit systems traditionally used different
-- 
2.27.0


^ permalink raw reply related

* [PATCH v19 08/12] landlock: Add syscall implementation
From: Mickaël Salaün @ 2020-07-07 18:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Al Viro, Andy Lutomirski, Anton Ivanov,
	Arnd Bergmann, Casey Schaufler, James Morris, Jann Horn,
	Jeff Dike, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Richard Weinberger, Serge E . Hallyn,
	Shuah Khan, Vincent Dagonneau, kernel-hardening, linux-api,
	linux-arch, linux-doc, linux-fsdevel, linux-kselftest,
	linux-security-module, x86
In-Reply-To: <20200707180955.53024-1-mic@digikod.net>

This system call, inspired from seccomp(2) and bpf(2), is designed to be
used by unprivileged processes to sandbox themselves.  It has the same
usage restrictions as seccomp(2): the caller must have the no_new_privs
attribute set or have CAP_SYS_ADMIN in the current user namespace.

Here are the motivations for this new syscall:
* A sandboxed process may not have access to file systems, including
  /dev, /sys or /proc, but it should still be able to add more
  restrictions to itself.
* Neither prctl(2) nor seccomp(2) (which was used in a previous version)
  fit well with the current definition of a Landlock security policy.
* It is quite easy to whitelist this syscall with seccomp-bpf to enable
  all processes to use it.  It is also easy to filter specific commands
  or options to restrict a process to a subset of Landlock features.

There is currently four commands:
* LANDLOCK_CMD_GET_FEATURES: Gets the supported features (required for
  backward and forward compatibility, and best-effort security).
* LANDLOCK_CMD_CREATE_RULESET: Creates a ruleset and returns its file
  descriptor.
* LANDLOCK_CMD_ADD_RULE: Adds a rule (e.g. file hierarchy access) to a
  ruleset, identified by the dedicated file descriptor.
* LANDLOCK_CMD_ENFORCE_RULESET: Enforces a ruleset on the current thread
  and its future children (similar to seccomp).

Each command has at least one option, which enables to define the
attribute types passed to the syscall.  All attribute types (structures)
are checked at build time to ensure that they don't contain holes and
that they are aligned the same way for each architecture.  The struct
landlock_attr_features contains __u32 options_* fields which is enough
to store 32-bits syscall arguments, and __u16 size_attr_* fields which
is enough for the maximal struct size (i.e. page size) passed through
the landlock syscall.  The other fields can have __u64 type for flags
and bitfields, and __s32 type for file descriptors.

See the user and kernel documentation for more details (provided by a
following commit): Documentation/security/landlock/

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: James Morris <jmorris@namei.org>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v18:
* Remove useless include.
* Remove LLATTR_SIZE() which was only used to shorten lines. Cf. commit
  bdc48fa11e46 ("checkpatch/coding-style: deprecate 80-column warning").

Changes since v17:
* Synchronize syscall declaration.
* Fix comment.

Changes since v16:
* Add a size_attr_features field to struct landlock_attr_features for
  self-introspection, and move the access_fs field to be more
  consistent.
* Replace __aligned_u64 types of attribute fields with __u16, __s32,
  __u32 and __u64, and check at build time that these structures does
  not contain hole and that they are aligned the same way (8-bits) on
  all architectures.  This shrinks the size of the userspace ABI, which
  may be appreciated especially for struct landlock_attr_features which
  could grow a lot in the future.  For instance, struct
  landlock_attr_features shrinks from 72 bytes to 32 bytes.  This change
  also enables to remove 64-bits to 32-bits conversion checks.
* Switch syscall attribute pointer and size arguments to follow similar
  syscall argument order (e.g. bpf, clone3, openat2).
* Set LANDLOCK_OPT_* types to 32-bits.
* Allow enforcement of empty ruleset, which enables deny-all policies.
* Fix documentation inconsistency.

Changes since v15:
* Do not add file descriptors referring to internal filesystems (e.g.
  nsfs) in a ruleset.
* Replace is_user_mountable() with in-place clean checks.
* Replace EBADR with EBADFD in get_ruleset_from_fd() and
  get_path_from_fd().
* Remove ruleset's show_fdinfo() for now.

Changes since v14:
* Remove the security_file_open() check in get_path_from_fd(): an
  opened FD should not be restricted here, and even less with this hook.
  As a result, it is now allowed to add a path to a ruleset even if the
  access to this path is not allowed (without O_PATH). This doesn't
  change the fact that enforcing a ruleset can't grant any right, only
  remove some rights.  The new layer levels add more consistent
  restrictions.
* Check minimal landlock_attr_* size/content. This fix the case when
  no data was provided and e.g., FD 0 was interpreted as ruleset_fd.
  Now this leads to a returned -EINVAL.
* Fix credential double-free error case.
* Complete struct landlock_attr_size with size_attr_enforce.
* Fix undefined reference to syscall when Landlock is not selected.
* Remove f.file->f_path.mnt check (suggested by Al Viro).
* Add build-time checks.
* Move ABI checks from fs.c .
* Constify variables.
* Fix spelling.
* Add comments.

Changes since v13:
* New implementation, replacing the dependency on seccomp(2) and bpf(2).
---
 include/linux/syscalls.h      |   3 +
 include/uapi/linux/landlock.h | 224 +++++++++++++++
 kernel/sys_ni.c               |   3 +
 security/landlock/Makefile    |   2 +-
 security/landlock/syscall.c   | 526 ++++++++++++++++++++++++++++++++++
 5 files changed, 757 insertions(+), 1 deletion(-)
 create mode 100644 security/landlock/syscall.c

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b951a87da987..3c0d8830d3bc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1005,6 +1005,9 @@ asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
 				       siginfo_t __user *info,
 				       unsigned int flags);
 asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags);
+asmlinkage long sys_landlock(unsigned int command, unsigned int options,
+			     void __user *attr1_ptr, size_t attr1_size,
+			     void __user *attr2_ptr, size_t attr2_size);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 5141185e6487..0e5829f2178b 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -9,6 +9,230 @@
 #ifndef _UAPI__LINUX_LANDLOCK_H__
 #define _UAPI__LINUX_LANDLOCK_H__
 
+#include <linux/types.h>
+
+/**
+ * enum landlock_cmd - Landlock commands
+ *
+ * First argument of sys_landlock().
+ */
+enum landlock_cmd {
+	/**
+	 * @LANDLOCK_CMD_GET_FEATURES: Asks the kernel for supported Landlock
+	 * features.  The option argument must contains
+	 * %LANDLOCK_OPT_GET_FEATURES.  This commands fills the &struct
+	 * landlock_attr_features provided as first attribute.
+	 */
+	LANDLOCK_CMD_GET_FEATURES = 1,
+	/**
+	 * @LANDLOCK_CMD_CREATE_RULESET: Creates a new ruleset and return its
+	 * file descriptor on success.  The option argument must contains
+	 * %LANDLOCK_OPT_CREATE_RULESET.  The ruleset is defined by the &struct
+	 * landlock_attr_ruleset provided as first attribute.
+	 */
+	LANDLOCK_CMD_CREATE_RULESET,
+	/**
+	 * @LANDLOCK_CMD_ADD_RULE: Adds a rule to a ruleset.  The option
+	 * argument must contains %LANDLOCK_OPT_ADD_RULE_PATH_BENEATH.  The
+	 * ruleset and the rule are both defined by the &struct
+	 * landlock_attr_path_beneath provided as first attribute.
+	 */
+	LANDLOCK_CMD_ADD_RULE,
+	/**
+	 * @LANDLOCK_CMD_ENFORCE_RULESET: Enforces a ruleset on the current
+	 * process.  The option argument must contains
+	 * %LANDLOCK_OPT_ENFORCE_RULESET.  The ruleset is defined by the
+	 * &struct landlock_attr_enforce provided as first attribute.
+	 */
+	LANDLOCK_CMD_ENFORCE_RULESET,
+};
+
+/**
+ * DOC: options_intro
+ *
+ * These options may be used as second argument of sys_landlock().  Each
+ * command have a dedicated set of options, represented as bitmasks.  For two
+ * different commands, their options may overlap.  Each command have at least
+ * one option defining the used attribute type.  This also enables to always
+ * have a usable &struct landlock_attr_features (i.e. filled with bits).
+ */
+
+/**
+ * DOC: options_get_features
+ *
+ * Options for ``LANDLOCK_CMD_GET_FEATURES``
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * - %LANDLOCK_OPT_GET_FEATURES: the attr type is `struct
+ *   landlock_attr_features`.
+ */
+#define LANDLOCK_OPT_GET_FEATURES			(1 << 0)
+
+/**
+ * DOC: options_create_ruleset
+ *
+ * Options for ``LANDLOCK_CMD_CREATE_RULESET``
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * - %LANDLOCK_OPT_CREATE_RULESET: the attr type is `struct
+ *   landlock_attr_ruleset`.
+ */
+#define LANDLOCK_OPT_CREATE_RULESET			(1 << 0)
+
+/**
+ * DOC: options_add_rule
+ *
+ * Options for ``LANDLOCK_CMD_ADD_RULE``
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * - %LANDLOCK_OPT_ADD_RULE_PATH_BENEATH: the attr type is `struct
+ *   landlock_attr_path_beneath`.
+ */
+#define LANDLOCK_OPT_ADD_RULE_PATH_BENEATH		(1 << 0)
+
+/**
+ * DOC: options_enforce_ruleset
+ *
+ * Options for ``LANDLOCK_CMD_ENFORCE_RULESET``
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * - %LANDLOCK_OPT_ENFORCE_RULESET: the attr type is `struct
+ *   landlock_attr_enforce`.
+ */
+#define LANDLOCK_OPT_ENFORCE_RULESET			(1 << 0)
+
+/**
+ * struct landlock_attr_features - Receives the supported features
+ *
+ * This struct should be allocated by user space but it will be filled by the
+ * kernel to indicate the subset of Landlock features effectively handled by
+ * the running kernel.  This enables backward compatibility for applications
+ * which are developed on a newer kernel than the one running the application.
+ * This helps avoid hard errors that may entirely disable the use of Landlock
+ * features because some of them may not be supported.  Indeed, because
+ * Landlock is a security feature, even if the kernel doesn't support all the
+ * requested features, user space applications should still use the subset
+ * which is supported by the running kernel.  Indeed, a partial security policy
+ * can still improve the security of the application and better protect the
+ * user (i.e. best-effort approach).  The %LANDLOCK_CMD_GET_FEATURES command
+ * and &struct landlock_attr_features are future-proof because the future
+ * unknown fields requested by user space (i.e. a larger &struct
+ * landlock_attr_features) can still be filled with zeros.
+ *
+ * The Landlock commands will fail if an unsupported option or access is
+ * requested.  By firstly requesting the supported options and accesses, it is
+ * quite easy for the developer to binary AND these returned bitmasks with the
+ * used options and accesses from the attribute structs (e.g. &struct
+ * landlock_attr_ruleset), and even infer the supported Landlock commands.
+ * Indeed, because each command must support at least one option, the options_*
+ * fields are always filled if the related commands are supported.  The
+ * supported attributes are also discoverable thanks to the size_* fields.  All
+ * this data enable to create applications doing their best to sandbox
+ * themselves regardless of the running kernel.
+ */
+struct landlock_attr_features {
+	/**
+	 * @options_get_features: Options supported by the
+	 * %LANDLOCK_CMD_GET_FEATURES command. Cf. `Options`_.
+	 */
+	__u32 options_get_features;
+	/**
+	 * @options_create_ruleset: Options supported by the
+	 * %LANDLOCK_CMD_CREATE_RULESET command. Cf. `Options`_.
+	 */
+	__u32 options_create_ruleset;
+	/**
+	 * @options_add_rule: Options supported by the %LANDLOCK_CMD_ADD_RULE
+	 * command. Cf. `Options`_.
+	 */
+	__u32 options_add_rule;
+	/**
+	 * @options_enforce_ruleset: Options supported by the
+	 * %LANDLOCK_CMD_ENFORCE_RULESET command. Cf. `Options`_.
+	 */
+	__u32 options_enforce_ruleset;
+	/**
+	 * @size_attr_features: Size of the &struct landlock_attr_features as
+	 * known by the kernel (i.e.  ``sizeof(struct
+	 * landlock_attr_features)``).
+	 */
+	__u16 size_attr_features;
+	/**
+	 * @size_attr_ruleset: Size of the &struct landlock_attr_ruleset as
+	 * known by the kernel (i.e.  ``sizeof(struct
+	 * landlock_attr_ruleset)``).
+	 */
+	__u16 size_attr_ruleset;
+	/**
+	 * @size_attr_path_beneath: Size of the &struct
+	 * landlock_attr_path_beneath as known by the kernel (i.e.
+	 * ``sizeof(struct landlock_path_beneath)``).
+	 */
+	__u16 size_attr_path_beneath;
+	/**
+	 * @size_attr_enforce: Size of the &struct landlock_attr_enforce as
+	 * known by the kernel (i.e.  ``sizeof(struct landlock_enforce)``).
+	 */
+	__u16 size_attr_enforce;
+	/**
+	 * @access_fs: Subset of file system access supported by the running
+	 * kernel, used in &struct landlock_attr_ruleset and &struct
+	 * landlock_attr_path_beneath.  Cf. `Filesystem flags`_.
+	 */
+	__u64 access_fs;
+};
+
+/**
+ * struct landlock_attr_ruleset- Defines a new ruleset
+ *
+ * Used as first attribute for the %LANDLOCK_CMD_CREATE_RULESET command and
+ * with the %LANDLOCK_OPT_CREATE_RULESET option.
+ */
+struct landlock_attr_ruleset {
+	/**
+	 * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_)
+	 * that is handled by this ruleset and should then be forbidden if no
+	 * rule explicitly allow them.  This is needed for backward
+	 * compatibility reasons.  The user space code should check the
+	 * effectively supported actions thanks to %LANDLOCK_CMD_GET_SUPPORTED
+	 * and &struct landlock_attr_features, and then adjust the arguments of
+	 * the next calls to sys_landlock() accordingly.
+	 */
+	__u64 handled_access_fs;
+};
+
+/**
+ * struct landlock_attr_path_beneath - Defines a path hierarchy
+ */
+struct landlock_attr_path_beneath {
+	/**
+	 * @ruleset_fd: File descriptor tied to the ruleset which should be
+	 * extended with this new access.
+	 */
+	__s32 ruleset_fd;
+	/**
+	 * @parent_fd: File descriptor, open with ``O_PATH``, which identify
+	 * the parent directory of a file hierarchy, or just a file.
+	 */
+	__s32 parent_fd;
+	/**
+	 * @allowed_access: Bitmask of allowed actions for this file hierarchy
+	 * (cf. `Filesystem flags`_).
+	 */
+	__u64 allowed_access;
+};
+
+/**
+ * struct landlock_attr_enforce - Describes the enforcement
+ */
+struct landlock_attr_enforce {
+	/**
+	 * @ruleset_fd: File descriptor tied to the ruleset to merge with the
+	 * current domain.
+	 */
+	__s32 ruleset_fd;
+};
+
 /**
  * DOC: fs_access
  *
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 3b69a560a7ac..70cea36eb7ce 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -264,6 +264,9 @@ COND_SYSCALL(request_key);
 COND_SYSCALL(keyctl);
 COND_SYSCALL_COMPAT(keyctl);
 
+/* security/landlock/syscall.c */
+COND_SYSCALL(landlock);
+
 /* arch/example/kernel/sys_example.c */
 
 /* mm/fadvise.c */
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 92e3d80ab8ed..4388494779ec 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
 
-landlock-y := setup.o object.o ruleset.o \
+landlock-y := setup.o syscall.o object.o ruleset.o \
 	cred.o ptrace.o fs.o
diff --git a/security/landlock/syscall.c b/security/landlock/syscall.c
new file mode 100644
index 000000000000..1559304bcfec
--- /dev/null
+++ b/security/landlock/syscall.c
@@ -0,0 +1,526 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - System call and user space interfaces
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/anon_inodes.h>
+#include <linux/build_bug.h>
+#include <linux/capability.h>
+#include <linux/compiler_types.h>
+#include <linux/dcache.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/stddef.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/landlock.h>
+
+#include "cred.h"
+#include "fs.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * copy_struct_if_any_from_user - Safe future-proof argument copying
+ *
+ * Extend copy_struct_from_user() to handle NULL @src, which allows for future
+ * use of @src even if it is not used right now.
+ *
+ * @dst: Kernel space pointer or NULL.
+ * @ksize: Actual size of the data pointed to by @dst.
+ * @ksize_min: Minimal required size to be copied.
+ * @src: User space pointer or NULL.
+ * @usize: (Alleged) size of the data pointed to by @src.
+ */
+static int copy_struct_if_any_from_user(void *const dst, const size_t ksize,
+		const size_t ksize_min, const void __user *const src,
+		const size_t usize)
+{
+	int ret;
+
+	/* Checks kernel buffer size inconsistencies. */
+	if (dst) {
+		if (WARN_ON_ONCE(ksize == 0))
+			return -EFAULT;
+	} else {
+		if (WARN_ON_ONCE(ksize != 0))
+			return -EFAULT;
+	}
+
+	/* Checks minimal size. */
+	if (WARN_ON_ONCE(ksize < ksize_min))
+		return -EFAULT;
+	if (usize < ksize_min)
+		return -EINVAL;
+
+	/* Handles empty user buffer. */
+	if (!src) {
+		if (usize != 0)
+			return -EFAULT;
+		if (dst)
+			memset(dst, 0, ksize);
+		return 0;
+	}
+
+	/* Checks user buffer size inconsistency and limit. */
+	if (usize == 0)
+		return -ENODATA;
+	if (usize > PAGE_SIZE)
+		return -E2BIG;
+
+	/* Copies user buffer and fills with zeros. */
+	if (dst)
+		return copy_struct_from_user(dst, ksize, src, usize);
+
+	/* Checks unknown user data. */
+	ret = check_zeroed_user(src, usize);
+	if (ret <= 0)
+		return ret ?: -E2BIG;
+	return 0;
+}
+
+/* Features */
+
+#define _LANDLOCK_OPT_GET_FEATURES_LAST		LANDLOCK_OPT_GET_FEATURES
+#define _LANDLOCK_OPT_GET_FEATURES_MASK		((_LANDLOCK_OPT_GET_FEATURES_LAST << 1) - 1)
+
+#define _LANDLOCK_OPT_CREATE_RULESET_LAST	LANDLOCK_OPT_CREATE_RULESET
+#define _LANDLOCK_OPT_CREATE_RULESET_MASK	((_LANDLOCK_OPT_CREATE_RULESET_LAST << 1) - 1)
+
+#define _LANDLOCK_OPT_ADD_RULE_LAST		LANDLOCK_OPT_ADD_RULE_PATH_BENEATH
+#define _LANDLOCK_OPT_ADD_RULE_MASK		((_LANDLOCK_OPT_ADD_RULE_LAST << 1) - 1)
+
+#define _LANDLOCK_OPT_ENFORCE_RULESET_LAST	LANDLOCK_OPT_ENFORCE_RULESET
+#define _LANDLOCK_OPT_ENFORCE_RULESET_MASK	((_LANDLOCK_OPT_ENFORCE_RULESET_LAST << 1) - 1)
+
+static int syscall_get_features(void __user *const attr_ptr,
+		const size_t attr_size)
+{
+	size_t data_size, fill_size;
+	const struct landlock_attr_features supported = {
+		.options_get_features = _LANDLOCK_OPT_GET_FEATURES_MASK,
+		.options_create_ruleset = _LANDLOCK_OPT_CREATE_RULESET_MASK,
+		.options_add_rule = _LANDLOCK_OPT_ADD_RULE_MASK,
+		.options_enforce_ruleset = _LANDLOCK_OPT_ENFORCE_RULESET_MASK,
+		.size_attr_features = sizeof(struct landlock_attr_features),
+		.size_attr_ruleset = sizeof(struct landlock_attr_ruleset),
+		.size_attr_path_beneath = sizeof(struct
+				landlock_attr_path_beneath),
+		.size_attr_enforce = sizeof(struct landlock_attr_enforce),
+		.access_fs = _LANDLOCK_ACCESS_FS_MASK,
+	};
+
+	BUILD_BUG_ON(!__same_type(supported.access_fs,
+		((struct landlock_attr_ruleset *)NULL)->handled_access_fs));
+	BUILD_BUG_ON(!__same_type(supported.access_fs,
+		((struct landlock_attr_path_beneath *)NULL)->allowed_access));
+
+	/* Checks attribute consistency. */
+	if (attr_size == 0)
+		return -ENODATA;
+	if (attr_size > PAGE_SIZE)
+		return -E2BIG;
+
+	/* Copy a subset of features to user space. */
+	data_size = min(sizeof(supported), attr_size);
+	if (copy_to_user(attr_ptr, &supported, data_size))
+		return -EFAULT;
+
+	/* Fills with zeros. */
+	fill_size = attr_size - data_size;
+	if (fill_size > 0 && clear_user(attr_ptr + data_size, fill_size))
+		return -EFAULT;
+	return 0;
+}
+
+/* Ruleset handling */
+
+static int fop_ruleset_release(struct inode *const inode,
+		struct file *const filp)
+{
+	struct landlock_ruleset *ruleset = filp->private_data;
+
+	landlock_put_ruleset(ruleset);
+	return 0;
+}
+
+static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
+		const size_t size, loff_t *const ppos)
+{
+	/* Dummy handler to enable FMODE_CAN_READ. */
+	return -EINVAL;
+}
+
+static ssize_t fop_dummy_write(struct file *const filp,
+		const char __user *const buf, const size_t size,
+		loff_t *const ppos)
+{
+	/* Dummy handler to enable FMODE_CAN_WRITE. */
+	return -EINVAL;
+}
+
+/*
+ * A ruleset file descriptor enables to build a ruleset by adding (i.e.
+ * writing) rule after rule, without relying on the task's context.  This
+ * reentrant design is also used in a read way to enforce the ruleset on the
+ * current task.
+ */
+static const struct file_operations ruleset_fops = {
+	.release = fop_ruleset_release,
+	.read = fop_dummy_read,
+	.write = fop_dummy_write,
+};
+
+static int syscall_create_ruleset(const void __user *const attr_ptr,
+		const size_t attr_size)
+{
+	struct landlock_attr_ruleset attr_ruleset;
+	struct landlock_ruleset *ruleset;
+	int err, ruleset_fd;
+
+	/* Copies raw user space buffer. */
+	err = copy_struct_if_any_from_user(&attr_ruleset, sizeof(attr_ruleset),
+			offsetofend(typeof(attr_ruleset), handled_access_fs),
+			attr_ptr, attr_size);
+	if (err)
+		return err;
+
+	/* Checks content (and 32-bits cast). */
+	if ((attr_ruleset.handled_access_fs | _LANDLOCK_ACCESS_FS_MASK) !=
+			_LANDLOCK_ACCESS_FS_MASK)
+		return -EINVAL;
+
+	/* Checks arguments and transforms to kernel struct. */
+	ruleset = landlock_create_ruleset(attr_ruleset.handled_access_fs);
+	if (IS_ERR(ruleset))
+		return PTR_ERR(ruleset);
+
+	/* Creates anonymous FD referring to the ruleset. */
+	ruleset_fd = anon_inode_getfd("landlock-ruleset", &ruleset_fops,
+			ruleset, O_RDWR | O_CLOEXEC);
+	if (ruleset_fd < 0)
+		landlock_put_ruleset(ruleset);
+	return ruleset_fd;
+}
+
+/*
+ * Returns an owned ruleset from a FD. It is thus needed to call
+ * landlock_put_ruleset() on the return value.
+ */
+static struct landlock_ruleset *get_ruleset_from_fd(const s32 fd,
+		const fmode_t mode)
+{
+	struct fd ruleset_f;
+	struct landlock_ruleset *ruleset;
+	int err;
+
+	BUILD_BUG_ON(!__same_type(fd,
+		((struct landlock_attr_path_beneath *)NULL)->ruleset_fd));
+	BUILD_BUG_ON(!__same_type(fd,
+		((struct landlock_attr_enforce *)NULL)->ruleset_fd));
+
+	ruleset_f = fdget(fd);
+	if (!ruleset_f.file)
+		return ERR_PTR(-EBADF);
+
+	/* Checks FD type and access right. */
+	err = 0;
+	if (ruleset_f.file->f_op != &ruleset_fops)
+		err = -EBADFD;
+	else if (!(ruleset_f.file->f_mode & mode))
+		err = -EPERM;
+	if (!err) {
+		ruleset = ruleset_f.file->private_data;
+		landlock_get_ruleset(ruleset);
+	}
+	fdput(ruleset_f);
+	return err ? ERR_PTR(err) : ruleset;
+}
+
+/* Path handling */
+
+/*
+ * @path: Must call put_path(@path) after the call if it succeeded.
+ */
+static int get_path_from_fd(const s32 fd, struct path *const path)
+{
+	struct fd f;
+	int err = 0;
+
+	BUILD_BUG_ON(!__same_type(fd,
+		((struct landlock_attr_path_beneath *)NULL)->parent_fd));
+
+	/* Handles O_PATH. */
+	f = fdget_raw(fd);
+	if (!f.file)
+		return -EBADF;
+	/*
+	 * Only allows O_PATH file descriptor: enables to restrict ambient
+	 * filesystem access without requiring to open and risk leaking or
+	 * misusing a file descriptor.  Forbid internal filesystems (e.g.
+	 * nsfs), including pseudo filesystems that will never be mountable
+	 * (e.g. sockfs, pipefs).
+	 */
+	if (!(f.file->f_mode & FMODE_PATH) ||
+			(f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
+			(f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
+			d_is_negative(f.file->f_path.dentry) ||
+			IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) {
+		err = -EBADFD;
+		goto out_fdput;
+	}
+	path->mnt = f.file->f_path.mnt;
+	path->dentry = f.file->f_path.dentry;
+	path_get(path);
+
+out_fdput:
+	fdput(f);
+	return err;
+}
+
+static int syscall_add_rule_path_beneath(const void __user *const attr_ptr,
+		const size_t attr_size)
+{
+	struct landlock_attr_path_beneath attr_path_beneath;
+	struct path path;
+	struct landlock_ruleset *ruleset;
+	int err;
+
+	/* Copies raw user space buffer. */
+	err = copy_struct_if_any_from_user(&attr_path_beneath,
+			sizeof(attr_path_beneath),
+			offsetofend(typeof(attr_path_beneath), allowed_access),
+			attr_ptr, attr_size);
+	if (err)
+		return err;
+
+	/* Gets and checks the ruleset. */
+	ruleset = get_ruleset_from_fd(attr_path_beneath.ruleset_fd,
+			FMODE_CAN_WRITE);
+	if (IS_ERR(ruleset))
+		return PTR_ERR(ruleset);
+
+	/*
+	 * Checks that allowed_access matches the @ruleset constraints
+	 * (ruleset->fs_access_mask is automatically upgraded to 64-bits).
+	 * Allows empty allowed_access i.e., deny @ruleset->fs_access_mask .
+	 */
+	if ((attr_path_beneath.allowed_access | ruleset->fs_access_mask) !=
+			ruleset->fs_access_mask) {
+		err = -EINVAL;
+		goto out_put_ruleset;
+	}
+
+	/* Gets and checks the new rule. */
+	err = get_path_from_fd(attr_path_beneath.parent_fd, &path);
+	if (err)
+		goto out_put_ruleset;
+
+	/* Imports the new rule. */
+	err = landlock_append_fs_rule(ruleset, &path,
+			attr_path_beneath.allowed_access);
+	path_put(&path);
+
+out_put_ruleset:
+	landlock_put_ruleset(ruleset);
+	return err;
+}
+
+/* Enforcement */
+
+static int syscall_enforce_ruleset(const void __user *const attr_ptr,
+		const size_t attr_size)
+{
+	struct landlock_ruleset *new_dom, *ruleset;
+	struct cred *new_cred;
+	struct landlock_cred_security *new_llcred;
+	struct landlock_attr_enforce attr_enforce;
+	int err;
+
+	/*
+	 * Enforcing a Landlock ruleset requires that the task has
+	 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
+	 * This avoids scenarios where unprivileged tasks can affect the
+	 * behavior of privileged children.  These are similar checks as for
+	 * seccomp(2), except that an -EPERM may be returned.
+	 */
+	if (!task_no_new_privs(current)) {
+		err = security_capable(current_cred(), current_user_ns(),
+				CAP_SYS_ADMIN, CAP_OPT_NOAUDIT);
+		if (err)
+			return err;
+	}
+
+	/* Copies raw user space buffer. */
+	err = copy_struct_if_any_from_user(&attr_enforce, sizeof(attr_enforce),
+			offsetofend(typeof(attr_enforce), ruleset_fd),
+			attr_ptr, attr_size);
+	if (err)
+		return err;
+
+	/* Gets and checks the ruleset. */
+	ruleset = get_ruleset_from_fd(attr_enforce.ruleset_fd, FMODE_CAN_READ);
+	if (IS_ERR(ruleset))
+		return PTR_ERR(ruleset);
+
+	/* Prepares new credentials. */
+	new_cred = prepare_creds();
+	if (!new_cred) {
+		err = -ENOMEM;
+		goto out_put_ruleset;
+	}
+	new_llcred = landlock_cred(new_cred);
+
+	/*
+	 * There is no possible race condition while copying and manipulating
+	 * the current credentials because they are dedicated per thread.
+	 */
+	new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset);
+	if (IS_ERR(new_dom)) {
+		err = PTR_ERR(new_dom);
+		goto out_put_creds;
+	}
+
+	/* Replaces the old (prepared) domain. */
+	landlock_put_ruleset(new_llcred->domain);
+	new_llcred->domain = new_dom;
+
+	landlock_put_ruleset(ruleset);
+	return commit_creds(new_cred);
+
+out_put_creds:
+	abort_creds(new_cred);
+	return err;
+
+out_put_ruleset:
+	landlock_put_ruleset(ruleset);
+	return err;
+}
+
+/*
+ * This function only contains arithmetic operations with constants, leading to
+ * BUILD_BUG_ON().  The related code is evaluated and checked at build time,
+ * but it is then ignored thanks to compiler optimizations.
+ */
+static void build_check_abi(void)
+{
+	size_t size_features, size_ruleset, size_path_beneath, size_enforce;
+
+	/*
+	 * For each user space ABI structures, first checks that there is no
+	 * hole in them, then checks that all architectures have the same
+	 * struct size.
+	 */
+	size_features = sizeof_field(struct landlock_attr_features, options_get_features);
+	size_features += sizeof_field(struct landlock_attr_features, options_create_ruleset);
+	size_features += sizeof_field(struct landlock_attr_features, options_add_rule);
+	size_features += sizeof_field(struct landlock_attr_features, options_enforce_ruleset);
+	size_features += sizeof_field(struct landlock_attr_features, size_attr_features);
+	size_features += sizeof_field(struct landlock_attr_features, size_attr_ruleset);
+	size_features += sizeof_field(struct landlock_attr_features, size_attr_path_beneath);
+	size_features += sizeof_field(struct landlock_attr_features, size_attr_enforce);
+	size_features += sizeof_field(struct landlock_attr_features, access_fs);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_features) != size_features);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_features) != 32);
+
+	size_ruleset = sizeof_field(struct landlock_attr_ruleset, handled_access_fs);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_ruleset) != size_ruleset);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_ruleset) != 8);
+
+	size_path_beneath = sizeof_field(struct landlock_attr_path_beneath, ruleset_fd);
+	size_path_beneath += sizeof_field(struct landlock_attr_path_beneath, parent_fd);
+	size_path_beneath += sizeof_field(struct landlock_attr_path_beneath, allowed_access);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_path_beneath) != size_path_beneath);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_path_beneath) != 16);
+
+	size_enforce = sizeof_field(struct landlock_attr_enforce, ruleset_fd);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_enforce) != size_enforce);
+	BUILD_BUG_ON(sizeof(struct landlock_attr_enforce) != 4);
+}
+
+/**
+ * sys_landlock - System call to enable a process to safely sandbox itself
+ *
+ * @command: Landlock command to perform miscellaneous, but safe, actions. Cf.
+ *           `Commands`_.
+ * @options: Bitmask of options dedicated to one command. Cf. `Options`_.
+ * @attr1_ptr: Pointer to the first attribute. Cf. `Attributes`_.
+ * @attr1_size: First attribute size (i.e. size of the struct).
+ * @attr2_ptr: Unused for now, must be NULL.
+ * @attr2_size: Unused for now, must be 0.
+ *
+ * The @command and @options arguments enable a seccomp-bpf policy to control
+ * the requested actions.  However, it should be noted that Landlock is
+ * designed from the ground to enable unprivileged process to drop privileges
+ * and accesses in a way that can not harm other processes.  This syscall and
+ * all its arguments should then be allowed for any process, which will then
+ * enable applications to strengthen the security of the whole system.
+ *
+ * @attr2_ptr and @attr2_size describe a second attribute which could be used
+ * in the future to compose with the first attribute (e.g. a
+ * landlock_attr_path_beneath with a landlock_attr_ioctl).
+ *
+ * The order of return errors begins with ENOPKG (disabled Landlock),
+ * EOPNOTSUPP (unknown command or option) and then EINVAL (invalid attribute).
+ * The other error codes may be specific to each command.
+ */
+SYSCALL_DEFINE6(landlock, const unsigned int, command,
+		const unsigned int, options,
+		void __user *const, attr1_ptr, const size_t, attr1_size,
+		void __user *const, attr2_ptr, const size_t, attr2_size)
+{
+	build_check_abi();
+	/*
+	 * Enables user space to identify if Landlock is disabled, thanks to a
+	 * specific error code.
+	 */
+	if (!landlock_initialized)
+		return -ENOPKG;
+
+	switch ((enum landlock_cmd)command) {
+	case LANDLOCK_CMD_GET_FEATURES:
+		if (options == LANDLOCK_OPT_GET_FEATURES) {
+			if (attr2_ptr || attr2_size)
+				return -EINVAL;
+			return syscall_get_features(attr1_ptr, attr1_size);
+		}
+		return -EOPNOTSUPP;
+
+	case LANDLOCK_CMD_CREATE_RULESET:
+		if (options == LANDLOCK_OPT_CREATE_RULESET) {
+			if (attr2_ptr || attr2_size)
+				return -EINVAL;
+			return syscall_create_ruleset(attr1_ptr, attr1_size);
+		}
+		return -EOPNOTSUPP;
+
+	case LANDLOCK_CMD_ADD_RULE:
+		if (options == LANDLOCK_OPT_ADD_RULE_PATH_BENEATH) {
+			if (attr2_ptr || attr2_size)
+				return -EINVAL;
+			return syscall_add_rule_path_beneath(attr1_ptr,
+					attr1_size);
+		}
+		return -EOPNOTSUPP;
+
+	case LANDLOCK_CMD_ENFORCE_RULESET:
+		if (options == LANDLOCK_OPT_ENFORCE_RULESET) {
+			if (attr2_ptr || attr2_size)
+				return -EINVAL;
+			return syscall_enforce_ruleset(attr1_ptr, attr1_size);
+		}
+		return -EOPNOTSUPP;
+	}
+	return -EOPNOTSUPP;
+}
-- 
2.27.0


^ permalink raw reply related

* [PATCH v19 04/12] landlock: Add ptrace restrictions
From: Mickaël Salaün @ 2020-07-07 18:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Al Viro, Andy Lutomirski, Anton Ivanov,
	Arnd Bergmann, Casey Schaufler, James Morris, Jann Horn,
	Jeff Dike, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Richard Weinberger, Serge E . Hallyn,
	Shuah Khan, Vincent Dagonneau, kernel-hardening, linux-api,
	linux-arch, linux-doc, linux-fsdevel, linux-kselftest,
	linux-security-module, x86
In-Reply-To: <20200707180955.53024-1-mic@digikod.net>

Using ptrace(2) and related debug features on a target process can lead
to a privilege escalation.  Indeed, ptrace(2) can be used by an attacker
to impersonate another task and to remain undetected while performing
malicious activities.  Thanks to  ptrace_may_access(), various part of
the kernel can check if a tracer is more privileged than a tracee.

A landlocked process has fewer privileges than a non-landlocked process
and must then be subject to additional restrictions when manipulating
processes. To be allowed to use ptrace(2) and related syscalls on a
target process, a landlocked process must have a subset of the target
process' rules (i.e. the tracee must be in a sub-domain of the tracer).

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: James Morris <jmorris@namei.org>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v14:
* Constify variables.

Changes since v13:
* Make the ptrace restriction mandatory, like in the v10.
* Remove the eBPF dependency.

Previous changes:
https://lore.kernel.org/lkml/20191104172146.30797-5-mic@digikod.net/
---
 security/landlock/Makefile |   2 +-
 security/landlock/ptrace.c | 120 +++++++++++++++++++++++++++++++++++++
 security/landlock/ptrace.h |  14 +++++
 security/landlock/setup.c  |   2 +
 4 files changed, 137 insertions(+), 1 deletion(-)
 create mode 100644 security/landlock/ptrace.c
 create mode 100644 security/landlock/ptrace.h

diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 041ea242e627..f1d1eb72fa76 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
 
 landlock-y := setup.o object.o ruleset.o \
-	cred.o
+	cred.o ptrace.o
diff --git a/security/landlock/ptrace.c b/security/landlock/ptrace.c
new file mode 100644
index 000000000000..61df38b13f5c
--- /dev/null
+++ b/security/landlock/ptrace.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Ptrace hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2020 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/cred.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/lsm_hooks.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "common.h"
+#include "cred.h"
+#include "ptrace.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * domain_scope_le - Checks domain ordering for scoped ptrace
+ *
+ * @parent: Parent domain.
+ * @child: Potential child of @parent.
+ *
+ * Checks if the @parent domain is less or equal to (i.e. an ancestor, which
+ * means a subset of) the @child domain.
+ */
+static bool domain_scope_le(const struct landlock_ruleset *const parent,
+		const struct landlock_ruleset *const child)
+{
+	const struct landlock_hierarchy *walker;
+
+	if (!parent)
+		return true;
+	if (!child)
+		return false;
+	for (walker = child->hierarchy; walker; walker = walker->parent) {
+		if (walker == parent->hierarchy)
+			/* @parent is in the scoped hierarchy of @child. */
+			return true;
+	}
+	/* There is no relationship between @parent and @child. */
+	return false;
+}
+
+static bool task_is_scoped(const struct task_struct *const parent,
+		const struct task_struct *const child)
+{
+	bool is_scoped;
+	const struct landlock_ruleset *dom_parent, *dom_child;
+
+	rcu_read_lock();
+	dom_parent = landlock_get_task_domain(parent);
+	dom_child = landlock_get_task_domain(child);
+	is_scoped = domain_scope_le(dom_parent, dom_child);
+	rcu_read_unlock();
+	return is_scoped;
+}
+
+static int task_ptrace(const struct task_struct *const parent,
+		const struct task_struct *const child)
+{
+	/* Quick return for non-landlocked tasks. */
+	if (!landlocked(parent))
+		return 0;
+	if (task_is_scoped(parent, child))
+		return 0;
+	return -EPERM;
+}
+
+/**
+ * hook_ptrace_access_check - Determines whether the current process may access
+ *			      another
+ *
+ * @child: Process to be accessed.
+ * @mode: Mode of attachment.
+ *
+ * If the current task has Landlock rules, then the child must have at least
+ * the same rules.  Else denied.
+ *
+ * Determines whether a process may access another, returning 0 if permission
+ * granted, -errno if denied.
+ */
+static int hook_ptrace_access_check(struct task_struct *const child,
+		const unsigned int mode)
+{
+	return task_ptrace(current, child);
+}
+
+/**
+ * hook_ptrace_traceme - Determines whether another process may trace the
+ *			 current one
+ *
+ * @parent: Task proposed to be the tracer.
+ *
+ * If the parent has Landlock rules, then the current task must have the same
+ * or more rules.  Else denied.
+ *
+ * Determines whether the nominated task is permitted to trace the current
+ * process, returning 0 if permission is granted, -errno if denied.
+ */
+static int hook_ptrace_traceme(struct task_struct *const parent)
+{
+	return task_ptrace(parent, current);
+}
+
+static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+	LSM_HOOK_INIT(ptrace_access_check, hook_ptrace_access_check),
+	LSM_HOOK_INIT(ptrace_traceme, hook_ptrace_traceme),
+};
+
+__init void landlock_add_hooks_ptrace(void)
+{
+	security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+			LANDLOCK_NAME);
+}
diff --git a/security/landlock/ptrace.h b/security/landlock/ptrace.h
new file mode 100644
index 000000000000..6740c6a723de
--- /dev/null
+++ b/security/landlock/ptrace.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Ptrace hooks
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_PTRACE_H
+#define _SECURITY_LANDLOCK_PTRACE_H
+
+__init void landlock_add_hooks_ptrace(void);
+
+#endif /* _SECURITY_LANDLOCK_PTRACE_H */
diff --git a/security/landlock/setup.c b/security/landlock/setup.c
index 39ee1766f175..5e7540fdeefa 100644
--- a/security/landlock/setup.c
+++ b/security/landlock/setup.c
@@ -11,6 +11,7 @@
 
 #include "common.h"
 #include "cred.h"
+#include "ptrace.h"
 #include "setup.h"
 
 struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = {
@@ -20,6 +21,7 @@ struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = {
 static int __init landlock_init(void)
 {
 	landlock_add_hooks_cred();
+	landlock_add_hooks_ptrace();
 	pr_info("Up and running.\n");
 	return 0;
 }
-- 
2.27.0


^ permalink raw reply related

* [PATCH v19 07/12] landlock: Support filesystem access-control
From: Mickaël Salaün @ 2020-07-07 18:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Al Viro, Andy Lutomirski, Anton Ivanov,
	Arnd Bergmann, Casey Schaufler, James Morris, Jann Horn,
	Jeff Dike, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Richard Weinberger, Serge E . Hallyn,
	Shuah Khan, Vincent Dagonneau, kernel-hardening, linux-api,
	linux-arch, linux-doc, linux-fsdevel, linux-kselftest,
	linux-security-module, x86
In-Reply-To: <20200707180955.53024-1-mic@digikod.net>

Thanks to the Landlock objects and ruleset, it is possible to identify
inodes according to a process's domain.  To enable an unprivileged
process to express a file hierarchy, it first needs to open a directory
(or a file) and pass this file descriptor to the kernel through
landlock(2).  When checking if a file access request is allowed, we walk
from the requested dentry to the real root, following the different
mount layers.  The access to each "tagged" inodes are collected
according to their rule layer level, and ANDed to create access to the
requested file hierarchy.  This makes possible to identify a lot of
files without tagging every inodes nor modifying the filesystem, while
still following the view and understanding the user has from the
filesystem.

Add a new ARCH_EPHEMERAL_STATES for UML because it currently does not
keep the same struct inodes for the same inodes whereas these inodes are
in use.

This commit adds a minimal set of supported filesystem access-control
which doesn't enable to restrict all file-related actions.  This is the
result of multiple discussions to minimize the code of Landlock to ease
review.  Thanks to the Landlock design, extending this access-control
without breaking user space will not be a problem.  Moreover, seccomp
filters can be used to restrict the use of syscall families which may
not be currently handled by Landlock.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jann Horn <jannh@google.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v18:
* Remove useless include.
* Fix spelling.

Changes since v17:
* Replace landlock_release_inodes() with security_sb_delete() (requested
  by James Morris).
* Replace struct super_block->s_landlock_inode_refs with the LSM
  infrastructure management of the superblock (requested by James
  Morris).
* Fix mknod restriction with a zero mode (spotted by Vincent Dagonneau).
* Minimize executed code in path_mknod and file_open hooks when the
  current tasks is not sandboxed.
* Remove useless checks on the file pointer and inode in
  hook_file_open() .
* Constify domain pointers.
* Rename inode_landlock() to landlock_inode().
* Import include/uapi/linux/landlock.h and _LANDLOCK_ACCESS_FS_* from
  the ruleset and domain management patch.
* Explain the rational of this minimal set of access-control.
  https://lore.kernel.org/lkml/f646e1c7-33cf-333f-070c-0a40ad0468cd@digikod.net/

Changes since v16:
* Add ARCH_EPHEMERAL_STATES and enable it for UML.

Changes since v15:
* Replace layer_levels and layer_depth with a bitfield of layers: this
  enables to properly manage superset and subset of access rights,
  whatever their order in the stack of layers.
  Cf. https://lore.kernel.org/lkml/e07fe473-1801-01cc-12ae-b3167f95250e@digikod.net/
* Allow to open pipes and similar special files through /proc/self/fd/.
* Properly handle internal filesystems such as nsfs: always allow these
  kind of roots because disconnected path cannot be evaluated.
* Remove the LANDLOCK_ACCESS_FS_LINK_TO and
  LANDLOCK_ACCESS_FS_RENAME_{TO,FROM}, but use the
  LANDLOCK_ACCESS_FS_REMOVE_{FILE,DIR} and LANDLOCK_ACCESS_FS_MAKE_*
  instead.  Indeed, it is not possible for now (and not really useful)
  to express the semantic of a source and a destination.
* Check access rights to remove a directory or a file with rename(2).
* Forbid reparenting when linking or renaming.  This is needed to easily
  protect against possible privilege escalation by changing the place of
  a file or directory in relation to an enforced access policy (from the
  set of layers).  This will be relaxed in the future.
* Update hooks to take into account replacement of the object's self and
  beneath access bitfields with one.  Simplify the code.
* Check file related access rights.
* Check d_is_negative() instead of !d_backing_inode() in
  check_access_path_continue(), and continue the path walk while there
  is no mapped inode e.g., with rename(2).
* Check private inode in check_access_path().
* Optimize get_file_access() when dealing with a directory.
* Add missing atomic.h .

Changes since v14:
* Simplify the object, rule and ruleset management at the expense of a
  less aggressive memory freeing (contributed by Jann Horn, with
  additional modifications):
  - Rewrite release_inode() to use inode->sb->s_landlock_inode_refs.
  - Remove useless checks in landlock_release_inodes(), clean object
    pointer according to the new struct landlock_object and wait for all
    iput() to complete.
  - Rewrite get_inode_object() according to the new struct
    landlock_object.  If there is a race-condition when cleaning up an
    object, we retry until the concurrent thread finished the object
    cleaning.
  Cf. https://lore.kernel.org/lkml/CAG48ez21bEn0wL1bbmTiiu8j9jP5iEWtHOwz4tURUJ+ki0ydYw@mail.gmail.com/
* Fix nested domains by implementing a notion of layer level and depth:
  - Check for matching level ranges when walking through a file path.
  - Only allow access if every layer granted the access request.
* Handles files without mount points (e.g. pipes).
* Hardens path walk by checking inode pointer values.
* Prefetches d_parent when walking to the root directory.
* Remove useless inode_alloc_security hook() (suggested by Jann Horn):
  already initialized by lsm_inode_alloc().
* Remove the inode_free_security hook.
* Remove access checks that may be required for FD-only requests:
  truncate, getattr, lock, chmod, chown, chgrp, ioctl.  This will be
  handle in a future evolution of Landlock, but right now the goal is to
  lighten the code to ease review.
* Constify variables.
* Move ABI checks into syscall.c .
* Cosmetic variable renames.

Changes since v11:
* Add back, revamp and make a fully working filesystem access-control
  based on paths and inodes.
* Remove the eBPF dependency.

Previous changes:
https://lore.kernel.org/lkml/20190721213116.23476-6-mic@digikod.net/
---
 arch/Kconfig                  |   7 +
 arch/um/Kconfig               |   1 +
 include/uapi/linux/landlock.h |  78 +++++
 security/landlock/Kconfig     |   2 +-
 security/landlock/Makefile    |   2 +-
 security/landlock/fs.c        | 609 ++++++++++++++++++++++++++++++++++
 security/landlock/fs.h        |  60 ++++
 security/landlock/setup.c     |   7 +
 security/landlock/setup.h     |   2 +
 9 files changed, 766 insertions(+), 2 deletions(-)
 create mode 100644 include/uapi/linux/landlock.h
 create mode 100644 security/landlock/fs.c
 create mode 100644 security/landlock/fs.h

diff --git a/arch/Kconfig b/arch/Kconfig
index 8cc35dc556c7..483b7476ac69 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -845,6 +845,13 @@ config COMPAT_32BIT_TIME
 config ARCH_NO_PREEMPT
 	bool
 
+config ARCH_EPHEMERAL_STATES
+	def_bool n
+	help
+	  An arch should select this symbol if it do not keep an internal kernel
+	  state for kernel objects such as inodes, but instead rely on something
+	  else (e.g. the host kernel for an UML kernel).
+
 config ARCH_SUPPORTS_RT
 	bool
 
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 9318dc6d1a0c..4109fdc36adb 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -5,6 +5,7 @@ menu "UML-specific options"
 config UML
 	bool
 	default y
+	select ARCH_EPHEMERAL_STATES
 	select ARCH_HAS_KCOV
 	select ARCH_NO_PREEMPT
 	select HAVE_ARCH_AUDITSYSCALL
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
new file mode 100644
index 000000000000..5141185e6487
--- /dev/null
+++ b/include/uapi/linux/landlock.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Landlock - UAPI headers
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _UAPI__LINUX_LANDLOCK_H__
+#define _UAPI__LINUX_LANDLOCK_H__
+
+/**
+ * DOC: fs_access
+ *
+ * A set of actions on kernel objects may be defined by an attribute (e.g.
+ * &struct landlock_attr_path_beneath) and a bitmask of access.
+ *
+ * Filesystem flags
+ * ~~~~~~~~~~~~~~~~
+ *
+ * These flags enable to restrict a sandbox process to a set of actions on
+ * files and directories.  Files or directories opened before the sandboxing
+ * are not subject to these restrictions.
+ *
+ * A file can only receive these access rights:
+ *
+ * - %LANDLOCK_ACCESS_FS_EXECUTE: Execute a file.
+ * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access.
+ * - %LANDLOCK_ACCESS_FS_READ_FILE: Open a file with read access.
+ *
+ * A directory can receive access rights related to files or directories.  This
+ * set of access rights is applied to the directory itself, and the directories
+ * beneath it:
+ *
+ * - %LANDLOCK_ACCESS_FS_READ_DIR: Open a directory or list its content.
+ * - %LANDLOCK_ACCESS_FS_CHROOT: Change the root directory of the current
+ *   process.
+ *
+ * However, the following access rights only apply to the content of a
+ * directory, not the directory itself:
+ *
+ * - %LANDLOCK_ACCESS_FS_REMOVE_DIR: Remove an empty directory or rename one.
+ * - %LANDLOCK_ACCESS_FS_REMOVE_FILE: Unlink (or rename) a file.
+ * - %LANDLOCK_ACCESS_FS_MAKE_CHAR: Create (or rename or link) a character
+ *   device.
+ * - %LANDLOCK_ACCESS_FS_MAKE_DIR: Create (or rename) a directory.
+ * - %LANDLOCK_ACCESS_FS_MAKE_REG: Create (or rename or link) a regular file.
+ * - %LANDLOCK_ACCESS_FS_MAKE_SOCK: Create (or rename or link) a UNIX domain
+ *   socket.
+ * - %LANDLOCK_ACCESS_FS_MAKE_FIFO: Create (or rename or link) a named pipe.
+ * - %LANDLOCK_ACCESS_FS_MAKE_BLOCK: Create (or rename or link) a block device.
+ * - %LANDLOCK_ACCESS_FS_MAKE_SYM: Create (or rename or link) a symbolic link.
+ *
+ * .. warning::
+ *
+ *   It is currently not possible to restrict some file-related actions
+ *   accessible through these syscall families: :manpage:`chdir(2)`,
+ *   :manpage:`truncate(2)`, :manpage:`stat(2)`, :manpage:`flock(2)`,
+ *   :manpage:`chmod(2)`, :manpage:`chown(2)`, :manpage:`setxattr(2)`,
+ *   :manpage:`ioctl(2)`, :manpage:`fcntl(2)`.
+ *   Future Landlock evolutions will enable to restrict them.
+ */
+#define LANDLOCK_ACCESS_FS_EXECUTE			(1ULL << 0)
+#define LANDLOCK_ACCESS_FS_WRITE_FILE			(1ULL << 1)
+#define LANDLOCK_ACCESS_FS_READ_FILE			(1ULL << 2)
+#define LANDLOCK_ACCESS_FS_READ_DIR			(1ULL << 3)
+#define LANDLOCK_ACCESS_FS_CHROOT			(1ULL << 4)
+#define LANDLOCK_ACCESS_FS_REMOVE_DIR			(1ULL << 5)
+#define LANDLOCK_ACCESS_FS_REMOVE_FILE			(1ULL << 6)
+#define LANDLOCK_ACCESS_FS_MAKE_CHAR			(1ULL << 7)
+#define LANDLOCK_ACCESS_FS_MAKE_DIR			(1ULL << 8)
+#define LANDLOCK_ACCESS_FS_MAKE_REG			(1ULL << 9)
+#define LANDLOCK_ACCESS_FS_MAKE_SOCK			(1ULL << 10)
+#define LANDLOCK_ACCESS_FS_MAKE_FIFO			(1ULL << 11)
+#define LANDLOCK_ACCESS_FS_MAKE_BLOCK			(1ULL << 12)
+#define LANDLOCK_ACCESS_FS_MAKE_SYM			(1ULL << 13)
+
+#endif /* _UAPI__LINUX_LANDLOCK_H__ */
diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
index 9ec7593a534a..487d88328d98 100644
--- a/security/landlock/Kconfig
+++ b/security/landlock/Kconfig
@@ -2,7 +2,7 @@
 
 config SECURITY_LANDLOCK
 	bool "Landlock support"
-	depends on SECURITY
+	depends on SECURITY && !ARCH_EPHEMERAL_STATES
 	select SECURITY_PATH
 	help
 	  Landlock is a safe sandboxing mechanism which enables processes to
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index f1d1eb72fa76..92e3d80ab8ed 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
 
 landlock-y := setup.o object.o ruleset.o \
-	cred.o ptrace.o
+	cred.o ptrace.o fs.o
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
new file mode 100644
index 000000000000..7d5df26c3441
--- /dev/null
+++ b/security/landlock/fs.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Filesystem management and hooks
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/atomic.h>
+#include <linux/compiler_types.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lsm_hooks.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/path.h>
+#include <linux/prefetch.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/wait_bit.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/landlock.h>
+
+#include "common.h"
+#include "cred.h"
+#include "fs.h"
+#include "object.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/* Underlying object management */
+
+static void release_inode(struct landlock_object *const object)
+	__releases(object->lock)
+{
+	struct inode *const inode = object->underobj;
+	struct super_block *sb;
+
+	if (!inode) {
+		spin_unlock(&object->lock);
+		return;
+	}
+
+	spin_lock(&inode->i_lock);
+	/*
+	 * Make sure that if the filesystem is concurrently unmounted,
+	 * hook_sb_delete() will wait for us to finish iput().
+	 */
+	sb = inode->i_sb;
+	atomic_long_inc(&landlock_superblock(sb)->inode_refs);
+	rcu_assign_pointer(landlock_inode(inode)->object, NULL);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&object->lock);
+	/*
+	 * Now, new rules can safely be tied to @inode.
+	 */
+
+	iput(inode);
+	if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
+		wake_up_var(&landlock_superblock(sb)->inode_refs);
+}
+
+static const struct landlock_object_underops landlock_fs_underops = {
+	.release = release_inode
+};
+
+/* Ruleset management */
+
+static struct landlock_object *get_inode_object(struct inode *const inode)
+{
+	struct landlock_object *object, *new_object;
+	struct landlock_inode_security *inode_sec = landlock_inode(inode);
+
+	rcu_read_lock();
+retry:
+	object = rcu_dereference(inode_sec->object);
+	if (object) {
+		if (likely(refcount_inc_not_zero(&object->usage))) {
+			rcu_read_unlock();
+			return object;
+		}
+		/*
+		 * We're racing with release_inode(), the object is going away.
+		 * Wait for release_inode(), then retry.
+		 */
+		spin_lock(&object->lock);
+		spin_unlock(&object->lock);
+		goto retry;
+	}
+	rcu_read_unlock();
+
+	/*
+	 * If there is no object tied to @inode, then create a new one (without
+	 * holding any locks).
+	 */
+	new_object = landlock_create_object(&landlock_fs_underops, inode);
+
+	spin_lock(&inode->i_lock);
+	object = rcu_dereference_protected(inode_sec->object,
+			lockdep_is_held(&inode->i_lock));
+	if (unlikely(object)) {
+		/* Someone else just created the object, bail out and retry. */
+		kfree(new_object);
+		spin_unlock(&inode->i_lock);
+
+		rcu_read_lock();
+		goto retry;
+	} else {
+		rcu_assign_pointer(inode_sec->object, new_object);
+		/*
+		 * @inode will be released by hook_sb_delete() on its
+		 * superblock shutdown.
+		 */
+		ihold(inode);
+		spin_unlock(&inode->i_lock);
+		return new_object;
+	}
+}
+
+/* All access rights which can be tied to files. */
+#define ACCESS_FILE ( \
+	LANDLOCK_ACCESS_FS_EXECUTE | \
+	LANDLOCK_ACCESS_FS_WRITE_FILE | \
+	LANDLOCK_ACCESS_FS_READ_FILE)
+
+/*
+ * @path: Should have been checked by get_path_from_fd().
+ */
+int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+		const struct path *const path, u32 access_rights)
+{
+	int err;
+	struct landlock_rule rule = {};
+
+	/* Files only get access rights that make sense. */
+	if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) !=
+			ACCESS_FILE)
+		return -EINVAL;
+
+	/* Transforms relative access rights to absolute ones. */
+	access_rights |= _LANDLOCK_ACCESS_FS_MASK & ~ruleset->fs_access_mask;
+	rule.access = access_rights;
+	rule.object = get_inode_object(d_backing_inode(path->dentry));
+	mutex_lock(&ruleset->lock);
+	err = landlock_insert_rule(ruleset, &rule, false);
+	mutex_unlock(&ruleset->lock);
+	/*
+	 * No need to check for an error because landlock_insert_rule()
+	 * increments the refcount for the new rule, if any.
+	 */
+	landlock_put_object(rule.object);
+	return err;
+}
+
+/* Access-control management */
+
+static bool check_access_path_continue(
+		const struct landlock_ruleset *const domain,
+		const struct path *const path, const u32 access_request,
+		bool *const allow, u64 *const layer_mask)
+{
+	const struct landlock_rule *rule;
+	const struct inode *inode;
+	bool next = true;
+
+	prefetch(path->dentry->d_parent);
+	if (d_is_negative(path->dentry))
+		/* Continues to walk while there is no mapped inode. */
+		return true;
+	inode = d_backing_inode(path->dentry);
+	rcu_read_lock();
+	rule = landlock_find_rule(domain,
+			rcu_dereference(landlock_inode(inode)->object));
+	rcu_read_unlock();
+
+	/* Checks for matching layers. */
+	if (rule && (rule->layers | *layer_mask)) {
+		*allow = (rule->access & access_request) == access_request;
+		if (*allow) {
+			*layer_mask &= ~rule->layers;
+			/* Stops when a rule from each layer granted access. */
+			next = !!*layer_mask;
+		} else {
+			next = false;
+		}
+	}
+	return next;
+}
+
+static int check_access_path(const struct landlock_ruleset *const domain,
+		const struct path *const path, u32 access_request)
+{
+	bool allow = false;
+	struct path walker_path;
+	u64 layer_mask;
+
+	if (WARN_ON_ONCE(!domain || !path))
+		return 0;
+	/*
+	 * Allows access to pseudo filesystems that will never be mountable
+	 * (e.g. sockfs, pipefs), but can still be reachable through
+	 * /proc/self/fd .
+	 */
+	if ((path->dentry->d_sb->s_flags & SB_NOUSER) ||
+			(d_is_positive(path->dentry) &&
+			 unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
+		return 0;
+	if (WARN_ON_ONCE(domain->nb_layers < 1))
+		return -EACCES;
+
+	layer_mask = GENMASK_ULL(domain->nb_layers - 1, 0);
+	/*
+	 * An access request which is not handled by the domain should be
+	 * allowed.
+	 */
+	access_request &= domain->fs_access_mask;
+	if (access_request == 0)
+		return 0;
+	walker_path = *path;
+	path_get(&walker_path);
+	/*
+	 * We need to walk through all the hierarchy to not miss any relevant
+	 * restriction.
+	 */
+	while (check_access_path_continue(domain, &walker_path, access_request,
+				&allow, &layer_mask)) {
+		struct dentry *parent_dentry;
+
+jump_up:
+		/*
+		 * Does not work with orphaned/private mounts like overlayfs
+		 * layers for now (cf. ovl_path_real() and ovl_path_open()).
+		 */
+		if (walker_path.dentry == walker_path.mnt->mnt_root) {
+			if (follow_up(&walker_path)) {
+				/* Ignores hidden mount points. */
+				goto jump_up;
+			} else {
+				/*
+				 * Stops at the real root.  Denies access
+				 * because not all layers have granted access.
+				 */
+				allow = false;
+				break;
+			}
+		}
+		if (unlikely(IS_ROOT(walker_path.dentry))) {
+			/*
+			 * Stops at disconnected root directories.  Only allows
+			 * access to internal filesystems (e.g. nsfs which is
+			 * reachable through /proc/self/ns).
+			 */
+			allow = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
+			break;
+		}
+		parent_dentry = dget_parent(walker_path.dentry);
+		dput(walker_path.dentry);
+		walker_path.dentry = parent_dentry;
+	}
+	path_put(&walker_path);
+	return allow ? 0 : -EACCES;
+}
+
+static inline int current_check_access_path(const struct path *const path,
+		const u32 access_request)
+{
+	const struct landlock_ruleset *const dom =
+		landlock_get_current_domain();
+
+	if (!dom)
+		return 0;
+	return check_access_path(dom, path, access_request);
+}
+
+/* Super-block hooks */
+
+/*
+ * Release the inodes used in a security policy.
+ *
+ * Cf. fsnotify_unmount_inodes()
+ */
+static void hook_sb_delete(struct super_block *const sb)
+{
+	struct inode *inode, *iput_inode = NULL;
+
+	if (!landlock_initialized)
+		return;
+
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		struct landlock_inode_security *inode_sec =
+			landlock_inode(inode);
+		struct landlock_object *object;
+		bool do_put = false;
+
+		rcu_read_lock();
+		object = rcu_dereference(inode_sec->object);
+		if (!object) {
+			rcu_read_unlock();
+			continue;
+		}
+
+		spin_lock(&object->lock);
+		if (object->underobj) {
+			object->underobj = NULL;
+			do_put = true;
+			spin_lock(&inode->i_lock);
+			rcu_assign_pointer(inode_sec->object, NULL);
+			spin_unlock(&inode->i_lock);
+		}
+		spin_unlock(&object->lock);
+		rcu_read_unlock();
+		if (!do_put)
+			/*
+			 * A concurrent iput() in release_inode() is ongoing
+			 * and we will just wait for it to finish.
+			 */
+			continue;
+
+		/*
+		 * At this point, we own the ihold() reference that was
+		 * originally set up by get_inode_object(). Therefore we can
+		 * drop the list lock and know that the inode won't disappear
+		 * from under us until the next loop walk.
+		 */
+		spin_unlock(&sb->s_inode_list_lock);
+		/*
+		 * We can now actually put the previous inode, which is not
+		 * needed anymore for the loop walk.
+		 */
+		if (iput_inode)
+			iput(iput_inode);
+		iput_inode = inode;
+		spin_lock(&sb->s_inode_list_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+	if (iput_inode)
+		iput(iput_inode);
+
+	/*
+	 * Wait for pending iput() in release_inode().
+	 */
+	wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read(
+				&landlock_superblock(sb)->inode_refs));
+}
+
+/*
+ * Because a Landlock security policy is defined according to the filesystem
+ * layout (i.e. the mount namespace), changing it may grant access to files not
+ * previously allowed.
+ *
+ * To make it simple, deny any filesystem layout modification by landlocked
+ * processes.  Non-landlocked processes may still change the namespace of a
+ * landlocked process, but this kind of threat must be handled by a system-wide
+ * access-control security policy.
+ *
+ * This could be lifted in the future if Landlock can safely handle mount
+ * namespace updates requested by a landlocked process.  Indeed, we could
+ * update the current domain (which is currently read-only) by taking into
+ * account the accesses of the source and the destination of a new mount point.
+ * However, it would also require to make all the child domains dynamically
+ * inherit these new constraints.  Anyway, for backward compatibility reasons,
+ * a dedicated user space option would be required (e.g. as a ruleset command
+ * option).
+ */
+static int hook_sb_mount(const char *const dev_name,
+		const struct path *const path, const char *const type,
+		const unsigned long flags, void *const data)
+{
+	if (!landlock_get_current_domain())
+		return 0;
+	return -EPERM;
+}
+
+static int hook_move_mount(const struct path *const from_path,
+		const struct path *const to_path)
+{
+	if (!landlock_get_current_domain())
+		return 0;
+	return -EPERM;
+}
+
+/*
+ * Removing a mount point may reveal a previously hidden file hierarchy, which
+ * may then grant access to files, which may have previously been forbidden.
+ */
+static int hook_sb_umount(struct vfsmount *const mnt, const int flags)
+{
+	if (!landlock_get_current_domain())
+		return 0;
+	return -EPERM;
+}
+
+static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
+{
+	if (!landlock_get_current_domain())
+		return 0;
+	return -EPERM;
+}
+
+/*
+ * pivot_root(2), like mount(2), changes the current mount namespace.  It must
+ * then be forbidden for a landlocked process.
+ *
+ * However, chroot(2) may be allowed because it only changes the relative root
+ * directory of the current process.
+ */
+static int hook_sb_pivotroot(const struct path *const old_path,
+		const struct path *const new_path)
+{
+	if (!landlock_get_current_domain())
+		return 0;
+	return -EPERM;
+}
+
+/* Path hooks */
+
+static inline u32 get_mode_access(const umode_t mode)
+{
+	switch (mode & S_IFMT) {
+	case S_IFLNK:
+		return LANDLOCK_ACCESS_FS_MAKE_SYM;
+	case 0:
+		/* A zero mode translates to S_IFREG. */
+	case S_IFREG:
+		return LANDLOCK_ACCESS_FS_MAKE_REG;
+	case S_IFDIR:
+		return LANDLOCK_ACCESS_FS_MAKE_DIR;
+	case S_IFCHR:
+		return LANDLOCK_ACCESS_FS_MAKE_CHAR;
+	case S_IFBLK:
+		return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
+	case S_IFIFO:
+		return LANDLOCK_ACCESS_FS_MAKE_FIFO;
+	case S_IFSOCK:
+		return LANDLOCK_ACCESS_FS_MAKE_SOCK;
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+/*
+ * Creating multiple links or renaming may lead to privilege escalations if not
+ * handled properly.  Indeed, we must be sure that the source doesn't gain more
+ * privileges by being accessible from the destination.  This is getting more
+ * complex when dealing with multiple layers.  The whole picture can be seen as
+ * a multilayer partial ordering problem.  A future version of Landlock will
+ * deal with that.
+ */
+static int hook_path_link(struct dentry *const old_dentry,
+		const struct path *const new_dir,
+		struct dentry *const new_dentry)
+{
+	const struct landlock_ruleset *const dom =
+		landlock_get_current_domain();
+
+	if (!dom)
+		return 0;
+	/* The mount points are the same for old and new paths, cf. EXDEV. */
+	if (old_dentry->d_parent != new_dir->dentry)
+		/* For now, forbid reparenting. */
+		return -EACCES;
+	if (unlikely(d_is_negative(old_dentry)))
+		return -EACCES;
+	return check_access_path(dom, new_dir,
+			get_mode_access(d_backing_inode(old_dentry)->i_mode));
+}
+
+static inline u32 maybe_remove(const struct dentry *const dentry)
+{
+	if (d_is_negative(dentry))
+		return 0;
+	return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
+		LANDLOCK_ACCESS_FS_REMOVE_FILE;
+}
+
+static int hook_path_rename(const struct path *const old_dir,
+		struct dentry *const old_dentry,
+		const struct path *const new_dir,
+		struct dentry *const new_dentry)
+{
+	const struct landlock_ruleset *const dom =
+		landlock_get_current_domain();
+
+	if (!dom)
+		return 0;
+	/* The mount points are the same for old and new paths, cf. EXDEV. */
+	if (old_dir->dentry != new_dir->dentry)
+		/* For now, forbid reparenting. */
+		return -EACCES;
+	if (WARN_ON_ONCE(d_is_negative(old_dentry)))
+		return -EACCES;
+	/* RENAME_EXCHANGE is handled because directories are the same. */
+	return check_access_path(dom, old_dir, maybe_remove(old_dentry) |
+			maybe_remove(new_dentry) |
+			get_mode_access(d_backing_inode(old_dentry)->i_mode));
+}
+
+static int hook_path_mkdir(const struct path *const dir,
+		struct dentry *const dentry, const umode_t mode)
+{
+	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
+}
+
+static int hook_path_mknod(const struct path *const dir,
+		struct dentry *const dentry, const umode_t mode,
+		const unsigned int dev)
+{
+	const struct landlock_ruleset *const dom =
+		landlock_get_current_domain();
+
+	if (!dom)
+		return 0;
+	return check_access_path(dom, dir, get_mode_access(mode));
+}
+
+static int hook_path_symlink(const struct path *const dir,
+		struct dentry *const dentry, const char *const old_name)
+{
+	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
+}
+
+static int hook_path_unlink(const struct path *const dir,
+		struct dentry *const dentry)
+{
+	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
+}
+
+static int hook_path_rmdir(const struct path *const dir,
+		struct dentry *const dentry)
+{
+	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
+}
+
+static int hook_path_chroot(const struct path *const path)
+{
+	return current_check_access_path(path, LANDLOCK_ACCESS_FS_CHROOT);
+}
+
+/* File hooks */
+
+static inline u32 get_file_access(const struct file *const file)
+{
+	u32 access = 0;
+
+	if (file->f_mode & FMODE_READ) {
+		/* A directory can only be opened in read mode. */
+		if (S_ISDIR(file_inode(file)->i_mode))
+			return LANDLOCK_ACCESS_FS_READ_DIR;
+		access = LANDLOCK_ACCESS_FS_READ_FILE;
+	}
+	/*
+	 * A LANDLOCK_ACCESS_FS_APPEND could be added but we also need to check
+	 * fcntl(2).
+	 */
+	if (file->f_mode & FMODE_WRITE)
+		access |= LANDLOCK_ACCESS_FS_WRITE_FILE;
+	/* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
+	if (file->f_flags & __FMODE_EXEC)
+		access |= LANDLOCK_ACCESS_FS_EXECUTE;
+	return access;
+}
+
+static int hook_file_open(struct file *const file)
+{
+	const struct landlock_ruleset *const dom =
+		landlock_get_current_domain();
+
+	if (!dom)
+		return 0;
+	/*
+	 * Because a file may be opened with O_PATH, get_file_access() may
+	 * return 0.  This case will be handled with a future Landlock
+	 * evolution.
+	 */
+	return current_check_access_path(&file->f_path, get_file_access(file));
+}
+
+static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+	LSM_HOOK_INIT(sb_delete, hook_sb_delete),
+	LSM_HOOK_INIT(sb_mount, hook_sb_mount),
+	LSM_HOOK_INIT(move_mount, hook_move_mount),
+	LSM_HOOK_INIT(sb_umount, hook_sb_umount),
+	LSM_HOOK_INIT(sb_remount, hook_sb_remount),
+	LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
+
+	LSM_HOOK_INIT(path_link, hook_path_link),
+	LSM_HOOK_INIT(path_rename, hook_path_rename),
+	LSM_HOOK_INIT(path_mkdir, hook_path_mkdir),
+	LSM_HOOK_INIT(path_mknod, hook_path_mknod),
+	LSM_HOOK_INIT(path_symlink, hook_path_symlink),
+	LSM_HOOK_INIT(path_unlink, hook_path_unlink),
+	LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
+	LSM_HOOK_INIT(path_chroot, hook_path_chroot),
+
+	LSM_HOOK_INIT(file_open, hook_file_open),
+};
+
+__init void landlock_add_hooks_fs(void)
+{
+	security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+			LANDLOCK_NAME);
+}
diff --git a/security/landlock/fs.h b/security/landlock/fs.h
new file mode 100644
index 000000000000..58b462eb7f10
--- /dev/null
+++ b/security/landlock/fs.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Filesystem management and hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_FS_H
+#define _SECURITY_LANDLOCK_FS_H
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <uapi/linux/landlock.h>
+
+#include "ruleset.h"
+#include "setup.h"
+
+#define _LANDLOCK_ACCESS_FS_LAST	LANDLOCK_ACCESS_FS_MAKE_SYM
+#define _LANDLOCK_ACCESS_FS_MASK	((_LANDLOCK_ACCESS_FS_LAST << 1) - 1)
+
+struct landlock_inode_security {
+	/*
+	 * @object: Weak pointer to an allocated object.  All writes (i.e.
+	 * creating a new object or removing one) are protected by the
+	 * underlying inode->i_lock.  Disassociating @object from the inode is
+	 * additionally protected by @object->lock, from the time @object's
+	 * usage refcount drops to zero to the time this pointer is nulled out.
+	 * Cf. release_inode().
+	 */
+	struct landlock_object __rcu *object;
+};
+
+struct landlock_superblock_security {
+	/*
+	 * @inode_refs: References to Landlock underlying objects.
+	 * Cf. struct super_block->s_fsnotify_inode_refs .
+	 */
+	atomic_long_t inode_refs;
+};
+
+static inline struct landlock_inode_security *landlock_inode(
+		const struct inode *const inode)
+{
+	return inode->i_security + landlock_blob_sizes.lbs_inode;
+}
+
+static inline struct landlock_superblock_security *landlock_superblock(
+		const struct super_block *const superblock)
+{
+	return superblock->s_security + landlock_blob_sizes.lbs_superblock;
+}
+
+__init void landlock_add_hooks_fs(void);
+
+int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+		const struct path *const path, u32 access_hierarchy);
+
+#endif /* _SECURITY_LANDLOCK_FS_H */
diff --git a/security/landlock/setup.c b/security/landlock/setup.c
index 5e7540fdeefa..722cbea82324 100644
--- a/security/landlock/setup.c
+++ b/security/landlock/setup.c
@@ -11,17 +11,24 @@
 
 #include "common.h"
 #include "cred.h"
+#include "fs.h"
 #include "ptrace.h"
 #include "setup.h"
 
+bool landlock_initialized __lsm_ro_after_init = false;
+
 struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct landlock_cred_security),
+	.lbs_inode = sizeof(struct landlock_inode_security),
+	.lbs_superblock = sizeof(struct landlock_superblock_security),
 };
 
 static int __init landlock_init(void)
 {
 	landlock_add_hooks_cred();
 	landlock_add_hooks_ptrace();
+	landlock_add_hooks_fs();
+	landlock_initialized = true;
 	pr_info("Up and running.\n");
 	return 0;
 }
diff --git a/security/landlock/setup.h b/security/landlock/setup.h
index 9fdbf33fcc33..1daffab1ab4b 100644
--- a/security/landlock/setup.h
+++ b/security/landlock/setup.h
@@ -11,6 +11,8 @@
 
 #include <linux/lsm_hooks.h>
 
+extern bool landlock_initialized;
+
 extern struct lsm_blob_sizes landlock_blob_sizes;
 
 #endif /* _SECURITY_LANDLOCK_SETUP_H */
-- 
2.27.0


^ permalink raw reply related

* [PATCH v19 00/12] Landlock LSM
From: Mickaël Salaün @ 2020-07-07 18:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Al Viro, Andy Lutomirski, Anton Ivanov,
	Arnd Bergmann, Casey Schaufler, James Morris, Jann Horn,
	Jeff Dike, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Richard Weinberger, Serge E . Hallyn,
	Shuah Khan, Vincent Dagonneau, kernel-hardening, linux-api,
	linux-arch, linux-doc, linux-fsdevel, linux-kselftest,
	linux-security-module, x86

Hi,

This new patch series is a light update of the previous one, with some
minor fixes and cosmetic changes.  All reviews have been taken into
account.

The SLOC count is 1299 for security/landlock/ and 1752 for
tools/testing/selftest/landlock/ .  Test coverage for security/landlock/
is 93.6% of lines.  The code not covered only deals with internal kernel
errors (e.g. memory allocation) and race conditions.

The compiled documentation is available here:
https://landlock.io/linux-doc/landlock-v19/security/landlock/index.html

This series can be applied on top of v5.8-rc4 .  This can be tested with
CONFIG_SECURITY_LANDLOCK and CONFIG_SAMPLE_LANDLOCK.  This patch series
can be found in a Git repository here:
https://github.com/landlock-lsm/linux/commits/landlock-v19
I would really appreciate constructive comments on this patch series.


# Landlock LSM

The goal of Landlock is to enable to restrict ambient rights (e.g.
global filesystem access) for a set of processes.  Because Landlock is a
stackable LSM [1], it makes possible to create safe security sandboxes
as new security layers in addition to the existing system-wide
access-controls. This kind of sandbox is expected to help mitigate the
security impact of bugs or unexpected/malicious behaviors in user-space
applications. Landlock empowers any process, including unprivileged
ones, to securely restrict themselves.

Landlock is inspired by seccomp-bpf but instead of filtering syscalls
and their raw arguments, a Landlock rule can restrict the use of kernel
objects like file hierarchies, according to the kernel semantic.
Landlock also takes inspiration from other OS sandbox mechanisms: XNU
Sandbox, FreeBSD Capsicum or OpenBSD Pledge/Unveil.

In this current form, Landlock misses some access-control features.
This enables to minimize this patch series and ease review.  This series
still addresses multiple use cases, especially with the combined use of
seccomp-bpf: applications with built-in sandboxing, init systems,
security sandbox tools and security-oriented APIs [2].

Previous version:
https://lore.kernel.org/lkml/20200526205322.23465-1-mic@digikod.net/

[1] https://lore.kernel.org/lkml/50db058a-7dde-441b-a7f9-f6837fe8b69f@schaufler-ca.com/
[2] https://lore.kernel.org/lkml/f646e1c7-33cf-333f-070c-0a40ad0468cd@digikod.net/


Casey Schaufler (1):
  LSM: Infrastructure management of the superblock

Mickaël Salaün (11):
  landlock: Add object management
  landlock: Add ruleset and domain management
  landlock: Set up the security framework and manage credentials
  landlock: Add ptrace restrictions
  fs,security: Add sb_delete hook
  landlock: Support filesystem access-control
  landlock: Add syscall implementation
  arch: Wire up landlock() syscall
  selftests/landlock: Add initial tests
  samples/landlock: Add a sandbox manager example
  landlock: Add user and kernel documentation

 Documentation/security/index.rst              |    1 +
 Documentation/security/landlock/index.rst     |   18 +
 Documentation/security/landlock/kernel.rst    |   69 +
 Documentation/security/landlock/user.rst      |  268 +++
 MAINTAINERS                                   |   11 +
 arch/Kconfig                                  |    7 +
 arch/alpha/kernel/syscalls/syscall.tbl        |    1 +
 arch/arm/tools/syscall.tbl                    |    1 +
 arch/arm64/include/asm/unistd.h               |    2 +-
 arch/arm64/include/asm/unistd32.h             |    2 +
 arch/ia64/kernel/syscalls/syscall.tbl         |    1 +
 arch/m68k/kernel/syscalls/syscall.tbl         |    1 +
 arch/microblaze/kernel/syscalls/syscall.tbl   |    1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl     |    1 +
 arch/mips/kernel/syscalls/syscall_n64.tbl     |    1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl     |    1 +
 arch/parisc/kernel/syscalls/syscall.tbl       |    1 +
 arch/powerpc/kernel/syscalls/syscall.tbl      |    1 +
 arch/s390/kernel/syscalls/syscall.tbl         |    1 +
 arch/sh/kernel/syscalls/syscall.tbl           |    1 +
 arch/sparc/kernel/syscalls/syscall.tbl        |    1 +
 arch/um/Kconfig                               |    1 +
 arch/x86/entry/syscalls/syscall_32.tbl        |    1 +
 arch/x86/entry/syscalls/syscall_64.tbl        |    1 +
 arch/xtensa/kernel/syscalls/syscall.tbl       |    1 +
 fs/super.c                                    |    1 +
 include/linux/lsm_hook_defs.h                 |    1 +
 include/linux/lsm_hooks.h                     |    3 +
 include/linux/security.h                      |    4 +
 include/linux/syscalls.h                      |    3 +
 include/uapi/asm-generic/unistd.h             |    4 +-
 include/uapi/linux/landlock.h                 |  302 +++
 kernel/sys_ni.c                               |    3 +
 samples/Kconfig                               |    7 +
 samples/Makefile                              |    1 +
 samples/landlock/.gitignore                   |    1 +
 samples/landlock/Makefile                     |   15 +
 samples/landlock/sandboxer.c                  |  228 +++
 security/Kconfig                              |   11 +-
 security/Makefile                             |    2 +
 security/landlock/Kconfig                     |   18 +
 security/landlock/Makefile                    |    4 +
 security/landlock/common.h                    |   20 +
 security/landlock/cred.c                      |   46 +
 security/landlock/cred.h                      |   58 +
 security/landlock/fs.c                        |  609 ++++++
 security/landlock/fs.h                        |   60 +
 security/landlock/object.c                    |   66 +
 security/landlock/object.h                    |   91 +
 security/landlock/ptrace.c                    |  120 ++
 security/landlock/ptrace.h                    |   14 +
 security/landlock/ruleset.c                   |  342 ++++
 security/landlock/ruleset.h                   |  157 ++
 security/landlock/setup.c                     |   40 +
 security/landlock/setup.h                     |   18 +
 security/landlock/syscall.c                   |  526 +++++
 security/security.c                           |   51 +-
 security/selinux/hooks.c                      |   58 +-
 security/selinux/include/objsec.h             |    6 +
 security/selinux/ss/services.c                |    3 +-
 security/smack/smack.h                        |    6 +
 security/smack/smack_lsm.c                    |   35 +-
 tools/testing/selftests/Makefile              |    1 +
 tools/testing/selftests/landlock/.gitignore   |    2 +
 tools/testing/selftests/landlock/Makefile     |   29 +
 tools/testing/selftests/landlock/base_test.c  |  163 ++
 tools/testing/selftests/landlock/common.h     |   93 +
 tools/testing/selftests/landlock/config       |    5 +
 tools/testing/selftests/landlock/fs_test.c    | 1740 +++++++++++++++++
 .../testing/selftests/landlock/ptrace_test.c  |  321 +++
 tools/testing/selftests/landlock/true.c       |    5 +
 71 files changed, 5611 insertions(+), 77 deletions(-)
 create mode 100644 Documentation/security/landlock/index.rst
 create mode 100644 Documentation/security/landlock/kernel.rst
 create mode 100644 Documentation/security/landlock/user.rst
 create mode 100644 include/uapi/linux/landlock.h
 create mode 100644 samples/landlock/.gitignore
 create mode 100644 samples/landlock/Makefile
 create mode 100644 samples/landlock/sandboxer.c
 create mode 100644 security/landlock/Kconfig
 create mode 100644 security/landlock/Makefile
 create mode 100644 security/landlock/common.h
 create mode 100644 security/landlock/cred.c
 create mode 100644 security/landlock/cred.h
 create mode 100644 security/landlock/fs.c
 create mode 100644 security/landlock/fs.h
 create mode 100644 security/landlock/object.c
 create mode 100644 security/landlock/object.h
 create mode 100644 security/landlock/ptrace.c
 create mode 100644 security/landlock/ptrace.h
 create mode 100644 security/landlock/ruleset.c
 create mode 100644 security/landlock/ruleset.h
 create mode 100644 security/landlock/setup.c
 create mode 100644 security/landlock/setup.h
 create mode 100644 security/landlock/syscall.c
 create mode 100644 tools/testing/selftests/landlock/.gitignore
 create mode 100644 tools/testing/selftests/landlock/Makefile
 create mode 100644 tools/testing/selftests/landlock/base_test.c
 create mode 100644 tools/testing/selftests/landlock/common.h
 create mode 100644 tools/testing/selftests/landlock/config
 create mode 100644 tools/testing/selftests/landlock/fs_test.c
 create mode 100644 tools/testing/selftests/landlock/ptrace_test.c
 create mode 100644 tools/testing/selftests/landlock/true.c

-- 
2.27.0


^ permalink raw reply

* [PATCH v19 01/12] landlock: Add object management
From: Mickaël Salaün @ 2020-07-07 18:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Al Viro, Andy Lutomirski, Anton Ivanov,
	Arnd Bergmann, Casey Schaufler, James Morris, Jann Horn,
	Jeff Dike, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Richard Weinberger, Serge E . Hallyn,
	Shuah Khan, Vincent Dagonneau, kernel-hardening, linux-api,
	linux-arch, linux-doc, linux-fsdevel, linux-kselftest,
	linux-security-module, x86
In-Reply-To: <20200707180955.53024-1-mic@digikod.net>

A Landlock object enables to identify a kernel object (e.g. an inode).
A Landlock rule is a set of access rights allowed on an object.  Rules
are grouped in rulesets that may be tied to a set of processes (i.e.
subjects) to enforce a scoped access-control (i.e. a domain).

Because Landlock's goal is to empower any process (especially
unprivileged ones) to sandbox themselves, we can't rely on a system-wide
object identification such as file extended attributes.  Indeed, we need
innocuous, composable and modular access-controls.

The main challenge with these constraints is to identify kernel objects
while this identification is useful (i.e. when a security policy makes
use of this object).  But this identification data should be freed once
no policy is using it.  This ephemeral tagging should not and may not be
written in the filesystem.  We then need to manage the lifetime of a
rule according to the lifetime of its object.  To avoid a global lock,
this implementation make use of RCU and counters to safely reference
objects.

A following commit uses this generic object management for inodes.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: James Morris <jmorris@namei.org>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v18:
* Account objects to kmemcg.

Changes since v14:
* Simplify the object, rule and ruleset management at the expense of a
  less aggressive memory freeing (contributed by Jann Horn, with
  additional modifications):
  - Remove object->list aggregating the rules tied to an object.
  - Remove landlock_get_object(), landlock_drop_object(),
    {get,put}_object_cleaner() and landlock_rule_is_disabled().
  - Rewrite landlock_put_object() to use a more simple mechanism
    (no tricky RCU).
  - Replace enum landlock_object_type and landlock_release_object() with
    landlock_object_underops->release()
  - Adjust unions and Sparse annotations.
  Cf. https://lore.kernel.org/lkml/CAG48ez21bEn0wL1bbmTiiu8j9jP5iEWtHOwz4tURUJ+ki0ydYw@mail.gmail.com/
* Merge struct landlock_rule into landlock_ruleset_elem to simplify the
  rule management.
* Constify variables.
* Improve kernel documentation.
* Cosmetic variable renames.
* Remove the "default" in the Kconfig (suggested by Jann Horn).
* Only use refcount_inc() through getter helpers.
* Update Kconfig description.

Changes since v13:
* New dedicated implementation, removing the need for eBPF.

Previous changes:
https://lore.kernel.org/lkml/20190721213116.23476-6-mic@digikod.net/
---
 MAINTAINERS                | 10 +++++
 security/Kconfig           |  1 +
 security/Makefile          |  2 +
 security/landlock/Kconfig  | 18 ++++++++
 security/landlock/Makefile |  3 ++
 security/landlock/object.c | 66 +++++++++++++++++++++++++++
 security/landlock/object.h | 91 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 191 insertions(+)
 create mode 100644 security/landlock/Kconfig
 create mode 100644 security/landlock/Makefile
 create mode 100644 security/landlock/object.c
 create mode 100644 security/landlock/object.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 1d4aa7f942de..4c229c961d0d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9630,6 +9630,16 @@ F:	net/core/sock_map.c
 F:	net/ipv4/tcp_bpf.c
 F:	net/ipv4/udp_bpf.c
 
+LANDLOCK SECURITY MODULE
+M:	Mickaël Salaün <mic@digikod.net>
+L:	linux-security-module@vger.kernel.org
+S:	Supported
+W:	https://landlock.io
+T:	git https://github.com/landlock-lsm/linux.git
+F:	security/landlock/
+K:	landlock
+K:	LANDLOCK
+
 LANTIQ / INTEL Ethernet drivers
 M:	Hauke Mehrtens <hauke@hauke-m.de>
 L:	netdev@vger.kernel.org
diff --git a/security/Kconfig b/security/Kconfig
index cd3cc7da3a55..582fd777a757 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -238,6 +238,7 @@ source "security/loadpin/Kconfig"
 source "security/yama/Kconfig"
 source "security/safesetid/Kconfig"
 source "security/lockdown/Kconfig"
+source "security/landlock/Kconfig"
 
 source "security/integrity/Kconfig"
 
diff --git a/security/Makefile b/security/Makefile
index 3baf435de541..c688f4907a1b 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -13,6 +13,7 @@ subdir-$(CONFIG_SECURITY_LOADPIN)	+= loadpin
 subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
 subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown
 subdir-$(CONFIG_BPF_LSM)		+= bpf
+subdir-$(CONFIG_SECURITY_LANDLOCK)		+= landlock
 
 # always enable default capabilities
 obj-y					+= commoncap.o
@@ -32,6 +33,7 @@ obj-$(CONFIG_SECURITY_SAFESETID)       += safesetid/
 obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown/
 obj-$(CONFIG_CGROUPS)			+= device_cgroup.o
 obj-$(CONFIG_BPF_LSM)			+= bpf/
+obj-$(CONFIG_SECURITY_LANDLOCK)	+= landlock/
 
 # Object integrity file lists
 subdir-$(CONFIG_INTEGRITY)		+= integrity
diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
new file mode 100644
index 000000000000..9ec7593a534a
--- /dev/null
+++ b/security/landlock/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config SECURITY_LANDLOCK
+	bool "Landlock support"
+	depends on SECURITY
+	select SECURITY_PATH
+	help
+	  Landlock is a safe sandboxing mechanism which enables processes to
+	  restrict themselves (and their future children) by gradually
+	  enforcing tailored access control policies.  A security policy is a
+	  set of access rights (e.g. open a file in read-only, make a
+	  directory) tied to a file hierarchy.  The configuration can be set by
+	  any processes, including unprivileged ones, thanks to the landlock()
+	  system call.
+
+	  See Documentation/security/landlock/ for further information.
+
+	  If you are unsure how to answer this question, answer N.
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
new file mode 100644
index 000000000000..cb6deefbf4c0
--- /dev/null
+++ b/security/landlock/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
+
+landlock-y := object.o
diff --git a/security/landlock/object.c b/security/landlock/object.c
new file mode 100644
index 000000000000..67937650885b
--- /dev/null
+++ b/security/landlock/object.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Object management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler_types.h>
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "object.h"
+
+struct landlock_object *landlock_create_object(
+		const struct landlock_object_underops *underops,
+		void *const underobj)
+{
+	struct landlock_object *new_object;
+
+	if (WARN_ON_ONCE(!underops || !underobj))
+		return NULL;
+	new_object = kzalloc(sizeof(*new_object), GFP_KERNEL_ACCOUNT);
+	if (!new_object)
+		return NULL;
+	refcount_set(&new_object->usage, 1);
+	spin_lock_init(&new_object->lock);
+	new_object->underops = underops;
+	new_object->underobj = underobj;
+	return new_object;
+}
+
+/*
+ * The caller must own the object (i.e. thanks to object->usage) to safely put
+ * it.
+ */
+void landlock_put_object(struct landlock_object *const object)
+{
+	/*
+	 * The call to @object->underops->release(object) might sleep e.g.,
+	 * because of iput().
+	 */
+	might_sleep();
+	if (!object)
+		return;
+
+	/*
+	 * If the @object's refcount can't drop to zero, we can just decrement
+	 * the refcount without holding a lock. Otherwise, the decrement must
+	 * happen under @object->lock for synchronization with things like
+	 * get_inode_object().
+	 */
+	if (refcount_dec_and_lock(&object->usage, &object->lock)) {
+		__acquire(&object->lock);
+		/*
+		 * With @object->lock initially held, remove the reference from
+		 * @object->underobj to @object (if it still exists).
+		 */
+		object->underops->release(object);
+		kfree_rcu(object, rcu_free);
+	}
+}
diff --git a/security/landlock/object.h b/security/landlock/object.h
new file mode 100644
index 000000000000..942bc0e18064
--- /dev/null
+++ b/security/landlock/object.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Object management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_OBJECT_H
+#define _SECURITY_LANDLOCK_OBJECT_H
+
+#include <linux/compiler_types.h>
+#include <linux/refcount.h>
+#include <linux/spinlock.h>
+
+struct landlock_object;
+
+/**
+ * struct landlock_object_underops - Operations on an underlying object
+ */
+struct landlock_object_underops {
+	/**
+	 * @release: Releases the underlying object (e.g. iput() for an inode).
+	 */
+	void (*release)(struct landlock_object *const object)
+		__releases(object->lock);
+};
+
+/**
+ * struct landlock_object - Security blob tied to a kernel object
+ *
+ * The goal of this structure is to enable to tie a set of ephemeral access
+ * rights (pertaining to different domains) to a kernel object (e.g an inode)
+ * in a safe way.  This imply to handle concurrent use and modification.
+ *
+ * The lifetime of a &struct landlock_object depends of the rules referring to
+ * it.
+ */
+struct landlock_object {
+	/**
+	 * @usage: This counter is used to tie an object to the rules matching
+	 * it or to keep it alive while adding a new rule.  If this counter
+	 * reaches zero, this struct must not be modified, but this counter can
+	 * still be read from within an RCU read-side critical section.  When
+	 * adding a new rule to an object with a usage counter of zero, we must
+	 * wait until the pointer to this object is set to NULL (or recycled).
+	 */
+	refcount_t usage;
+	/**
+	 * @lock: Guards against concurrent modifications.  This lock must be
+	 * from the time @usage drops to zero until any weak references from
+	 * @underobj to this object have been cleaned up.
+	 *
+	 * Lock ordering: inode->i_lock nests inside this.
+	 */
+	spinlock_t lock;
+	/**
+	 * @underobj: Used when cleaning up an object and to mark an object as
+	 * tied to its underlying kernel structure.  This pointer is protected
+	 * by @lock.  Cf. landlock_release_inodes() and release_inode().
+	 */
+	void *underobj;
+	union {
+		/**
+		 * @rcu_free: Enables lockless use of @usage, @lock and
+		 * @underobj from within an RCU read-side critical section.
+		 * @rcu_free and @underops are only used by
+		 * landlock_put_object().
+		 */
+		struct rcu_head rcu_free;
+		/**
+		 * @underops: Enables landlock_put_object() to release the
+		 * underlying object (e.g. inode).
+		 */
+		const struct landlock_object_underops *underops;
+	};
+};
+
+struct landlock_object *landlock_create_object(
+		const struct landlock_object_underops *const underops,
+		void *const underojb);
+
+void landlock_put_object(struct landlock_object *const object);
+
+static inline void landlock_get_object(struct landlock_object *const object)
+{
+	if (object)
+		refcount_inc(&object->usage);
+}
+
+#endif /* _SECURITY_LANDLOCK_OBJECT_H */
-- 
2.27.0


^ permalink raw reply related

* [PATCH v19 06/12] fs,security: Add sb_delete hook
From: Mickaël Salaün @ 2020-07-07 18:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Al Viro, Andy Lutomirski, Anton Ivanov,
	Arnd Bergmann, Casey Schaufler, James Morris, Jann Horn,
	Jeff Dike, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Richard Weinberger, Serge E . Hallyn,
	Shuah Khan, Vincent Dagonneau, kernel-hardening, linux-api,
	linux-arch, linux-doc, linux-fsdevel, linux-kselftest,
	linux-security-module, x86
In-Reply-To: <20200707180955.53024-1-mic@digikod.net>

The sb_delete security hook is called when shutting down a superblock,
which may be useful to release kernel objects tied to the superblock's
lifetime (e.g. inodes).

This new hook is needed by Landlock to release (ephemerally) tagged
struct inodes.  This comes from the unprivileged nature of Landlock
described in the next commit.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: James Morris <jmorris@namei.org>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v17:
* Initial patch to replace the direct call to landlock_release_inodes()
  (requested by James Morris).
  https://lore.kernel.org/lkml/alpine.LRH.2.21.2005150536440.7929@namei.org/
---
 fs/super.c                    | 1 +
 include/linux/lsm_hook_defs.h | 1 +
 include/linux/lsm_hooks.h     | 2 ++
 include/linux/security.h      | 4 ++++
 security/security.c           | 5 +++++
 5 files changed, 13 insertions(+)

diff --git a/fs/super.c b/fs/super.c
index 904459b35119..d5517e49ccdf 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -454,6 +454,7 @@ void generic_shutdown_super(struct super_block *sb)
 		evict_inodes(sb);
 		/* only nonzero refcount inodes can have marks */
 		fsnotify_sb_delete(sb);
+		security_sb_delete(sb);
 
 		if (sb->s_dio_done_wq) {
 			destroy_workqueue(sb->s_dio_done_wq);
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index af998f93d256..2b855d6a299e 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -59,6 +59,7 @@ LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
 LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
 	 struct fs_parameter *param)
 LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb)
+LSM_HOOK(void, LSM_RET_VOID, sb_delete, struct super_block *sb)
 LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb)
 LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts)
 LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 80c629d8a2ad..499f19a70a19 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -108,6 +108,8 @@
  *	allocated.
  *	@sb contains the super_block structure to be modified.
  *	Return 0 if operation was successful.
+ * @sb_delete:
+ *	Release objects tied to a superblock (e.g. inodes).
  * @sb_free_security:
  *	Deallocate and clear the sb->s_security field.
  *	@sb contains the super_block structure to be modified.
diff --git a/include/linux/security.h b/include/linux/security.h
index 0a0a03b36a3b..28b0ee6c7239 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -286,6 +286,7 @@ void security_bprm_committed_creds(struct linux_binprm *bprm);
 int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
 int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
 int security_sb_alloc(struct super_block *sb);
+void security_sb_delete(struct super_block *sb);
 void security_sb_free(struct super_block *sb);
 void security_free_mnt_opts(void **mnt_opts);
 int security_sb_eat_lsm_opts(char *options, void **mnt_opts);
@@ -614,6 +615,9 @@ static inline int security_sb_alloc(struct super_block *sb)
 	return 0;
 }
 
+static inline void security_sb_delete(struct super_block *sb)
+{ }
+
 static inline void security_sb_free(struct super_block *sb)
 { }
 
diff --git a/security/security.c b/security/security.c
index d60aa835b670..9337b511306c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -898,6 +898,11 @@ int security_sb_alloc(struct super_block *sb)
 	return rc;
 }
 
+void security_sb_delete(struct super_block *sb)
+{
+	call_void_hook(sb_delete, sb);
+}
+
 void security_sb_free(struct super_block *sb)
 {
 	call_void_hook(sb_free_security, sb);
-- 
2.27.0


^ permalink raw reply related

* Re: [PATCH v10 9/9] ima: add FIRMWARE_PARTIAL_READ support
From: Scott Branden @ 2020-07-07 17:13 UTC (permalink / raw)
  To: Kees Cook
  Cc: Luis Chamberlain, Wolfram Sang, Greg Kroah-Hartman, David Brown,
	Alexander Viro, Shuah Khan, bjorn.andersson, Shuah Khan,
	Arnd Bergmann, Mimi Zohar, Rafael J . Wysocki, linux-kernel,
	linux-arm-msm, linux-fsdevel, BCM Kernel Feedback, Olof Johansson,
	Andrew Morton, Dan Carpenter, Colin Ian King, Takashi Iwai,
	linux-kselftest, Andy Gross, linux-integrity,
	linux-security-module
In-Reply-To: <202007061950.F6B3D9E6A@keescook>

Hi Kees,

You and others are certainly more experts in the filesystem and security
infrastructure of the kernel.
What I am trying to accomplish is a simple operation:
request part of a file into a buffer rather than the whole file.
If someone could add such support I would be more than happy to use it.

This has now bubbled into many other designs issues in the existing 
codebase.
I will need more details on your comments - see below.


On 2020-07-06 8:08 p.m., Kees Cook wrote:
> On Mon, Jul 06, 2020 at 04:23:09PM -0700, Scott Branden wrote:
>> Add FIRMWARE_PARTIAL_READ support for integrity
>> measurement on partial reads of firmware files.
> Hi,
>
> Several versions ago I'd suggested that the LSM infrastructure handle
> the "full read" semantics so that individual LSMs don't need to each
> duplicate the same efforts. As it happens, only IMA is impacted (SELinux
> ignores everything except modules, and LoadPin only cares about origin
> not contents).
Does your patch series "Fix misused kernel_read_file() enums" handle this
because this suggestion is outside the scope of my change?
>
> Next is the problem that enum kernel_read_file_id is an object
> TYPE enum, not a HOW enum. (And it seems I missed the addition of
> READING_FIRMWARE_PREALLOC_BUFFER, which may share a similar problem.)
> That it's a partial read doesn't change _what_ you're reading: that's an
> internal API detail. What happens when I attempt to do a partial read of
> a kexec image?
It does not appear there is any user of partial reads of kexec images?
I have been informed by Greg K-H to not add apis that are not used so 
such support
doesn't make sense to add at this time.
>   I'll use kernel_pread_file() and pass READING_KEXEC_IMAGE,
> but the LSMs will have no idea it's a partial read.
The addition I am adding is for request_partial_firmware_into_buf.
In order to do so it adds internal support for partial reads of firmware 
files,
not kexec image.

The above seems outside the scope of my patch?
>
> Finally, what keeps the contents of the file from changing between the
> first call (which IMA will read the entire file for) and the next reads
> which will bypass IMA?
The request is for a partial read.  IMA ensures the whole file integrity 
even though I only do a partial read.
The next partial read will re-read and check integrity of file.
>   I'd suggested that the open file must have writes
> disabled on it (as execve() does).
The file will be reopened and integrity checked on the next partial read 
(if there is one).
So I don't think there is any change to be made here.
If writes aren't already disabled for a whole file read then that is 
something that needs to be fixed in the existing code.
>
> So, please redesign this:
> - do not add an enum
I used existing infrastructure provided by Mimi but now looks like it 
will have to fit with your patches from yesterday.
> - make the file unwritable for the life of having the handle open
It's no different than a full file read so no change to be made here.
> - make the "full read" happen as part of the first partial read so the
>    LSMs don't have to reimplement everything
Each partial read is an individual operation so I think a "full read" is 
performed every time
if your security IMA is enabled.  If someone wants to add a file lock 
and then partial reads in the kernel
then that would be different than what is needed by the kernel driver.
>
> -Kees
>
Regards,
Scott

^ permalink raw reply

* Re: [PATCH v3 13/16] exit: Factor thread_group_exited out of pidfd_poll
From: Eric W. Biederman @ 2020-07-07 17:09 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Alexei Starovoitov, linux-kernel, David Miller,
	Greg Kroah-Hartman, Tetsuo Handa, Kees Cook, Andrew Morton,
	Alexei Starovoitov, Al Viro, bpf, linux-fsdevel, Daniel Borkmann,
	Jakub Kicinski, Masahiro Yamada, Gary Lin, Bruno Meneguele,
	LSM List, Casey Schaufler, Luis Chamberlain, Linus Torvalds
In-Reply-To: <20200704155052.kmrest5useyxcfnu@wittgenstein>

Christian Brauner <christian.brauner@ubuntu.com> writes:

> On Fri, Jul 03, 2020 at 04:37:47PM -0500, Eric W. Biederman wrote:
>> Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:
>> 
>> > On Thu, Jul 02, 2020 at 11:41:37AM -0500, Eric W. Biederman wrote:
>> >> Create an independent helper thread_group_exited report return true
>> >> when all threads have passed exit_notify in do_exit.  AKA all of the
>> >> threads are at least zombies and might be dead or completely gone.
>> >> 
>> >> Create this helper by taking the logic out of pidfd_poll where
>> >> it is already tested, and adding a missing READ_ONCE on
>> >> the read of task->exit_state.
>> >> 
>> >> I will be changing the user mode driver code to use this same logic
>> >> to know when a user mode driver needs to be restarted.
>> >> 
>> >> Place the new helper thread_group_exited in kernel/exit.c and
>> >> EXPORT it so it can be used by modules.
>> >> 
>> >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>> >> ---
>> >>  include/linux/sched/signal.h |  2 ++
>> >>  kernel/exit.c                | 24 ++++++++++++++++++++++++
>> >>  kernel/fork.c                |  6 +-----
>> >>  3 files changed, 27 insertions(+), 5 deletions(-)
>> >> 
>> >> diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
>> >> index 0ee5e696c5d8..1bad18a1d8ba 100644
>> >> --- a/include/linux/sched/signal.h
>> >> +++ b/include/linux/sched/signal.h
>> >> @@ -674,6 +674,8 @@ static inline int thread_group_empty(struct task_struct *p)
>> >>  #define delay_group_leader(p) \
>> >>  		(thread_group_leader(p) && !thread_group_empty(p))
>> >>  
>> >> +extern bool thread_group_exited(struct pid *pid);
>> >> +
>> >>  extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
>> >>  							unsigned long *flags);
>> >>  
>> >> diff --git a/kernel/exit.c b/kernel/exit.c
>> >> index d3294b611df1..a7f112feb0f6 100644
>> >> --- a/kernel/exit.c
>> >> +++ b/kernel/exit.c
>> >> @@ -1713,6 +1713,30 @@ COMPAT_SYSCALL_DEFINE5(waitid,
>> >>  }
>> >>  #endif
>> >>  
>> >> +/**
>> >> + * thread_group_exited - check that a thread group has exited
>> >> + * @pid: tgid of thread group to be checked.
>> >> + *
>> >> + * Test if thread group is has exited (all threads are zombies, dead
>> >> + * or completely gone).
>> >> + *
>> >> + * Return: true if the thread group has exited. false otherwise.
>> >> + */
>> >> +bool thread_group_exited(struct pid *pid)
>> >> +{
>> >> +	struct task_struct *task;
>> >> +	bool exited;
>> >> +
>> >> +	rcu_read_lock();
>> >> +	task = pid_task(pid, PIDTYPE_PID);
>> >> +	exited = !task ||
>> >> +		(READ_ONCE(task->exit_state) && thread_group_empty(task));
>> >> +	rcu_read_unlock();
>> >> +
>> >> +	return exited;
>> >> +}
>> >
>> > I'm not sure why you think READ_ONCE was missing.
>> > It's different in wait_consider_task() where READ_ONCE is needed because
>> > of multiple checks. Here it's done once.
>> 
>> In practice it probably has no effect on the generated code.  But
>> READ_ONCE is about telling the compiler not to be clever.  Don't use
>> tearing loads or stores etc.  When all of the other readers are using
>> READ_ONCE I just get nervous if we have a case that doesn't.
>
> That's not true. The only place where READ_ONCE(->exit_state) is used is
> in wait_consider_task() and nowhere else. We had that discussion a while
> ago where I or someone proposed to simply place a READ_ONCE() around all
> accesses to exit_state for the sake of kcsan and we agreed that it's
> unnecessary and not to do this.
> But it obviously doesn't hurt to have it.

There is a larger discussion to be had around the proper handling of
exit_state.

In this particular case because we are accessing exit_state with
only rcu_read_lock protection, because the outcome of the read
is about correctness, and because the compiler has nothing else
telling it not to re-read exit_state, I believe we actually need
the READ_ONCE.

At the same time it would take a pretty special compiler to want to
reaccess that field in thread_group_exited.

I have looked through and I don't find any of the other access of
exit_state where the result is about correctness (so that we care)
and we don't hold tasklist_lock.

But I have removed the necessary wording from the commit comment.

There is a much larger discussion to be had about what to do with
exit_state, because I think I found about half the accesses were
slightly buggy in one form or another.

Eric



^ permalink raw reply

* Re: [PATCH 2/4] fs: Remove FIRMWARE_PREALLOC_BUFFER from kernel_read_file() enums
From: Scott Branden @ 2020-07-07 16:42 UTC (permalink / raw)
  To: Kees Cook, James Morris
  Cc: Luis Chamberlain, Mimi Zohar, Greg Kroah-Hartman,
	Rafael J. Wysocki, Alexander Viro, Jessica Yu, Dmitry Kasatkin,
	Serge E. Hallyn, Casey Schaufler, Eric W. Biederman,
	Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module,
	Christoph Hellwig
In-Reply-To: <20200707081926.3688096-3-keescook@chromium.org>



On 2020-07-07 1:19 a.m., Kees Cook wrote:
> FIRMWARE_PREALLOC_BUFFER is a "how", not a "what", and confuses the LSMs
> that are interested in filtering between types of things. The "how"
> should be an internal detail made uninteresting to the LSMs.
>
> Fixes: a098ecd2fa7d ("firmware: support loading into a pre-allocated buffer")
> Fixes: fd90bc559bfb ("ima: based on policy verify firmware signatures (pre-allocated buffer)")
> Fixes: 4f0496d8ffa3 ("ima: based on policy warn about loading firmware (pre-allocated buffer)")
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
>   drivers/base/firmware_loader/main.c | 5 ++---
>   fs/exec.c                           | 7 ++++---
>   include/linux/fs.h                  | 2 +-
>   security/integrity/ima/ima_main.c   | 6 ++----
>   4 files changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
> index ca871b13524e..c2f57cedcd6f 100644
> --- a/drivers/base/firmware_loader/main.c
> +++ b/drivers/base/firmware_loader/main.c
> @@ -465,14 +465,12 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
>   	int i, len;
>   	int rc = -ENOENT;
>   	char *path;
> -	enum kernel_read_file_id id = READING_FIRMWARE;
>   	size_t msize = INT_MAX;
>   	void *buffer = NULL;
>   
>   	/* Already populated data member means we're loading into a buffer */
>   	if (!decompress && fw_priv->data) {
>   		buffer = fw_priv->data;
> -		id = READING_FIRMWARE_PREALLOC_BUFFER;
>   		msize = fw_priv->allocated_size;
>   	}
>   
> @@ -496,7 +494,8 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
>   
>   		/* load firmware files from the mount namespace of init */
>   		rc = kernel_read_file_from_path_initns(path, &buffer,
> -						       &size, msize, id);
> +						       &size, msize,
> +						       READING_FIRMWARE);
>   		if (rc) {
>   			if (rc != -ENOENT)
>   				dev_warn(device, "loading %s failed with error %d\n",
> diff --git a/fs/exec.c b/fs/exec.c
> index e6e8a9a70327..2bf549757ce7 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -927,6 +927,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
>   {
>   	loff_t i_size, pos;
>   	ssize_t bytes = 0;
> +	void *allocated = NULL;
>   	int ret;
>   
>   	if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
> @@ -950,8 +951,8 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
>   		goto out;
>   	}
>   
> -	if (id != READING_FIRMWARE_PREALLOC_BUFFER)
> -		*buf = vmalloc(i_size);
> +	if (!*buf)
> +		*buf = allocated = vmalloc(i_size);
>   	if (!*buf) {
>   		ret = -ENOMEM;
>   		goto out;
> @@ -980,7 +981,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
>   
>   out_free:
>   	if (ret < 0) {
> -		if (id != READING_FIRMWARE_PREALLOC_BUFFER) {
> +		if (allocated) {
>   			vfree(*buf);
>   			*buf = NULL;
>   		}
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 3f881a892ea7..95fc775ed937 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2993,10 +2993,10 @@ static inline void i_readcount_inc(struct inode *inode)
>   #endif
>   extern int do_pipe_flags(int *, int);
>   
> +/* This is a list of *what* is being read, not *how*. */
>   #define __kernel_read_file_id(id) \
>   	id(UNKNOWN, unknown)		\
>   	id(FIRMWARE, firmware)		\
With this change, I'm trying to figure out how the partial firmware read 
is going to work on top of this reachitecture.
Is it going to be ok to add READING_PARTIAL_FIRMWARE here as that is a 
"what"?
> -	id(FIRMWARE_PREALLOC_BUFFER, firmware)	\
My patch series gets rejected any time I make a change to the 
kernel_read_file* region in linux/fs.h.
The requirement is for this api to move to another header file outside 
of linux/fs.h
It seems the same should apply to your change.
Could you please add the following patch to the start of you patch 
series to move the kernel_read_file* to its own include file?
https://patchwork.kernel.org/patch/11647063/
>   	id(FIRMWARE_EFI_EMBEDDED, firmware)	\
>   	id(MODULE, kernel-module)		\
>   	id(KEXEC_IMAGE, kexec-image)		\
> diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
> index c1583d98c5e5..f80ee4ce4669 100644
> --- a/security/integrity/ima/ima_main.c
> +++ b/security/integrity/ima/ima_main.c
> @@ -611,19 +611,17 @@ void ima_post_path_mknod(struct dentry *dentry)
>   int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
>   {
>   	/*
> -	 * READING_FIRMWARE_PREALLOC_BUFFER
> -	 *
>   	 * Do devices using pre-allocated memory run the risk of the
>   	 * firmware being accessible to the device prior to the completion
>   	 * of IMA's signature verification any more than when using two
> -	 * buffers?
> +	 * buffers? It may be desirable to include the buffer address
> +	 * in this API and walk all the dma_map_single() mappings to check.
>   	 */
>   	return 0;
>   }
>   
>   const int read_idmap[READING_MAX_ID] = {
>   	[READING_FIRMWARE] = FIRMWARE_CHECK,
> -	[READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
>   	[READING_MODULE] = MODULE_CHECK,
>   	[READING_KEXEC_IMAGE] = KEXEC_KERNEL_CHECK,
>   	[READING_KEXEC_INITRAMFS] = KEXEC_INITRAMFS_CHECK,


^ permalink raw reply

* Re: [PATCH v4 3/3] prctl: Allow ptrace capable processes to change /proc/self/exe
From: Christian Brauner @ 2020-07-07 15:45 UTC (permalink / raw)
  To: Nicolas Viennot, Jann Horn
  Cc: Paul Moore, Serge E. Hallyn, Adrian Reber, Eric Biederman,
	Pavel Emelyanov, Oleg Nesterov, Dmitry Safonov, Andrei Vagin,
	Michał Cłapiński, Kamil Yurtsever, Dirk Petersen,
	Christine Flood, Casey Schaufler, Mike Rapoport,
	Radostin Stoyanov, Cyrill Gorcunov, Stephen Smalley,
	Sargun Dhillon, Arnd Bergmann,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org, selinux@vger.kernel.org, Eric Paris,
	Jann Horn, linux-fsdevel@vger.kernel.org
In-Reply-To: <20200706174437.zpshxlul7rl3vmmq@wittgenstein>

On Mon, Jul 06, 2020 at 07:44:38PM +0200, Christian Brauner wrote:
> On Mon, Jul 06, 2020 at 05:13:35PM +0000, Nicolas Viennot wrote:
> > > > This is scary.  But I believe it is safe.
> > > >
> > > > Reviewed-by: Serge Hallyn <serge@hallyn.com>
> > > >
> > > > I am a bit curious about the implications of the selinux patch.
> > > > IIUC you are using the permission of the tracing process to execute
> > > > the file without transition, so this is a way to work around the
> > > > policy which might prevent the tracee from doing so.
> > > > Given that SELinux wants to be MAC, I'm not *quite* sure that's
> > > > considered kosher.  You also are skipping the PROCESS__PTRACE to
> > > > SECCLASS_PROCESS check which selinux_bprm_set_creds does later on.
> > > > Again I'm just not quite sure what's considered normal there these
> > > > days.
> > > >
> > > > Paul, do you have input there?
> > >
> > > I agree, the SELinux hook looks wrong.  Building on what Christian said, this looks more like a ptrace operation than an exec operation.
> > 
> > Serge, Paul, Christian,
> > 
> > I made a PoC to demonstrate the change of /proc/self/exe without CAP_SYS_ADMIN using only ptrace and execve.
> > You may find it here: https://github.com/nviennot/run_as_exe
> > 
> > What do you recommend to relax the security checks in the kernel when it comes to changing the exe link?
> 
> Looks fun! Yeah, so that this is possible is known afaict. But you're
> not really circumventing the kernel check but are mucking with the EFL
> by changing the auxv, right?
> 
> Originally, you needed to be userns root, i.e. only uid 0 could
> change the /proc/self/exe link (cf. [1]). This was changed to
> ns_capable(CAP_SYS_ADMIN) in [2].
> 
> The original reasoning in [1] is interesting as it basically already
> points to your poc:
> 
> "Still note that updating exe-file link now doesn't require sys-resource
>  capability anymore, after all there is no much profit in preventing
>  setup own file link (there are a number of ways to execute own code --
>  ptrace, ld-preload, so that the only reliable way to find which exactly
>  code is executed is to inspect running program memory).  Still we
>  require the caller to be at least user-namespace root user."
> 
> There were arguments being made that /proc/<pid>/exe needs to be sm that
> userspace can have a decent amount of trust in but I believe that that's
> not a great argument.
> 
> But let me dig a little into the original discussion and see what the
> thread-model was.
> At this point I'm starting to believe that it was people being cautios
> but better be sure.

Ok, so the original patch proposal was presented in [4] in 2014. The
final version of that patch added the PR_SET_MM_MAP we know today. The
initial version presented in [4] did not require _any_ privilege.

So the reasoning for only placing the /proc/<pid>/exe link under
ns_capable(CAP_SYS_ADMIN) is very thin. to quote from [5]:

"Controlling exe_fd without privileges may turn out to be dangerous. At
 least things like tomoyo examine it for making policy decisions (see
 tomoyo_manager())."

So yes, tomoyo_get_exe() is what this was retained for apparently:

const char *tomoyo_get_exe(void)
{
	struct file *exe_file;
	const char *cp;
	struct mm_struct *mm = current->mm;

	if (!mm)
		return NULL;
	exe_file = get_mm_exe_file(mm);
	if (!exe_file)
		return NULL;

	cp = tomoyo_realpath_from_path(&exe_file->f_path);
	fput(exe_file);
	return cp;
}

The exe path is literally used in tomoyo_manager() to verify that you
are allowed to change policy. That seems like a bad idea to me but then
again, I don't know enough about Tomoyo. In any case, I think that means
we can't remove CAP_SYS_ADMIN because that would make things worse than
they are right now for Tomoyo but I also don't see why placing this
under ns_capable(CAP_SYS_ADMIN) || ns_capable(CAP_CHECKPOINT_RESTORE)
would make this any worse.

And Cyrill (and later in that thread Andrei) already mentioned it in [6]:
"@exe_fd is just a hint and as I mentioned if we have ptrace/preload
 rights there damn a lot of ways to inject own code into any program so
 that a user won't even notice ;)"

Another place where the exe file is relevant is for the coredump with
the -E option. But it only uses the path when generating the coredump
pattern and if that's a security issue than your poc shows that this can
already be achieved today.

Christian

> 
> [1]: f606b77f1a9e ("prctl: PR_SET_MM -- introduce PR_SET_MM_MAP operation")
> [2]: 4d28df6152aa ("prctl: Allow local CAP_SYS_ADMIN changing exe_file")
> [3]: https://lore.kernel.org/patchwork/patch/697304/

[4]: https://lore.kernel.org/lkml/20140703151102.842945837@openvz.org/ 
[5]: https://lore.kernel.org/lkml/CAGXu5jL3exT4j+8rjMv1O54uJWQ5UHL69Z-24b61rhXROqZamQ@mail.gmail.com/
[6]: https://lore.kernel.org/lkml/20140722203614.GF838@moon/

^ permalink raw reply

* Re: [PATCH 0/4] Fix misused kernel_read_file() enums
From: Mimi Zohar @ 2020-07-07 15:36 UTC (permalink / raw)
  To: Kees Cook, James Morris
  Cc: Luis Chamberlain, Scott Branden, Greg Kroah-Hartman,
	Rafael J. Wysocki, Alexander Viro, Jessica Yu, Dmitry Kasatkin,
	Serge E. Hallyn, Casey Schaufler, Eric W. Biederman,
	Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module
In-Reply-To: <20200707081926.3688096-1-keescook@chromium.org>

Hi Kees,

On Tue, 2020-07-07 at 01:19 -0700, Kees Cook wrote:
> Hi,
> 
> In looking for closely at the additions that got made to the
> kernel_read_file() enums, I noticed that FIRMWARE_PREALLOC_BUFFER
> and FIRMWARE_EFI_EMBEDDED were added, but they are not appropriate
> *kinds* of files for the LSM to reason about. They are a "how" and
> "where", respectively. Remove these improper aliases and refactor the
> code to adapt to the changes.

Thank you for adding the missing calls and the firmware pre allocated
buffer comment update.

> 
> Additionally adds in missing calls to security_kernel_post_read_file()
> in the platform firmware fallback path (to match the sysfs firmware
> fallback path) and in module loading. I considered entirely removing
> security_kernel_post_read_file() hook since it is technically unused,
> but IMA probably wants to be able to measure EFI-stored firmware images,
> so I wired it up and matched it for modules, in case anyone wants to
> move the module signature checks out of the module core and into an LSM
> to avoid the current layering violations.

IMa has always verified kernel module signatures.  Recently appended
kernel module signature support was added to IMA.  The same appended
signature format is also being used to sign and verify the kexec
kernel image.

With IMA's new kernel module appended signature support and patch 4/4
in this series, IMA won't be limit to the finit_module syscall, but
could support the init_module syscall as well.

> 
> This touches several trees, and I suspect it would be best to go through
> James's LSM tree.

Sure.

thanks!

Mimi


^ permalink raw reply

* Re: [PATCH 0/4] Fix misused kernel_read_file() enums
From: Greg Kroah-Hartman @ 2020-07-07  9:31 UTC (permalink / raw)
  To: Kees Cook
  Cc: James Morris, Luis Chamberlain, Mimi Zohar, Scott Branden,
	Rafael J. Wysocki, Alexander Viro, Jessica Yu, Dmitry Kasatkin,
	Serge E. Hallyn, Casey Schaufler, Eric W. Biederman,
	Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module
In-Reply-To: <20200707081926.3688096-1-keescook@chromium.org>

On Tue, Jul 07, 2020 at 01:19:22AM -0700, Kees Cook wrote:
> Hi,
> 
> In looking for closely at the additions that got made to the
> kernel_read_file() enums, I noticed that FIRMWARE_PREALLOC_BUFFER
> and FIRMWARE_EFI_EMBEDDED were added, but they are not appropriate
> *kinds* of files for the LSM to reason about. They are a "how" and
> "where", respectively. Remove these improper aliases and refactor the
> code to adapt to the changes.
> 
> Additionally adds in missing calls to security_kernel_post_read_file()
> in the platform firmware fallback path (to match the sysfs firmware
> fallback path) and in module loading. I considered entirely removing
> security_kernel_post_read_file() hook since it is technically unused,
> but IMA probably wants to be able to measure EFI-stored firmware images,
> so I wired it up and matched it for modules, in case anyone wants to
> move the module signature checks out of the module core and into an LSM
> to avoid the current layering violations.
> 
> This touches several trees, and I suspect it would be best to go through
> James's LSM tree.
> 
> Thanks!
> 
> -Kees
> 
> Kees Cook (4):
>   firmware_loader: EFI firmware loader must handle pre-allocated buffer
>   fs: Remove FIRMWARE_PREALLOC_BUFFER from kernel_read_file() enums
>   fs: Remove FIRMWARE_EFI_EMBEDDED from kernel_read_file() enums
>   module: Add hook for security_kernel_post_read_file()

Looks good to me, thanks for fixing this up:

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

^ permalink raw reply

* [PATCH 4/4] module: Add hook for security_kernel_post_read_file()
From: Kees Cook @ 2020-07-07  8:19 UTC (permalink / raw)
  To: James Morris
  Cc: Kees Cook, Jessica Yu, Luis Chamberlain, Mimi Zohar,
	Scott Branden, Greg Kroah-Hartman, Rafael J. Wysocki,
	Alexander Viro, Dmitry Kasatkin, Serge E. Hallyn, Casey Schaufler,
	Eric W. Biederman, Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module
In-Reply-To: <20200707081926.3688096-1-keescook@chromium.org>

Calls to security_kernel_load_data() should be paired with a call to
security_kernel_post_read_file() with a NULL file argument. Add the
missing call so the module contents are visible to the LSMs interested
in measuring the module content. (This also paves the way for moving
module signature checking out of the module core and into an LSM.)

Cc: Jessica Yu <jeyu@kernel.org>
Fixes: c77b8cdf745d ("module: replace the existing LSM hook in init_module")
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/module.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/module.c b/kernel/module.c
index 0c6573b98c36..af9679f8e5c6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2980,7 +2980,12 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
 		return -EFAULT;
 	}
 
-	return 0;
+	err = security_kernel_post_read_file(NULL, (char *)info->hdr,
+					     info->len, READING_MODULE);
+	if (err)
+		vfree(info->hdr);
+
+	return err;
 }
 
 static void free_copy(struct load_info *info)
-- 
2.25.1


^ permalink raw reply related

* [PATCH 0/4] Fix misused kernel_read_file() enums
From: Kees Cook @ 2020-07-07  8:19 UTC (permalink / raw)
  To: James Morris
  Cc: Kees Cook, Luis Chamberlain, Mimi Zohar, Scott Branden,
	Greg Kroah-Hartman, Rafael J. Wysocki, Alexander Viro, Jessica Yu,
	Dmitry Kasatkin, Serge E. Hallyn, Casey Schaufler,
	Eric W. Biederman, Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module

Hi,

In looking for closely at the additions that got made to the
kernel_read_file() enums, I noticed that FIRMWARE_PREALLOC_BUFFER
and FIRMWARE_EFI_EMBEDDED were added, but they are not appropriate
*kinds* of files for the LSM to reason about. They are a "how" and
"where", respectively. Remove these improper aliases and refactor the
code to adapt to the changes.

Additionally adds in missing calls to security_kernel_post_read_file()
in the platform firmware fallback path (to match the sysfs firmware
fallback path) and in module loading. I considered entirely removing
security_kernel_post_read_file() hook since it is technically unused,
but IMA probably wants to be able to measure EFI-stored firmware images,
so I wired it up and matched it for modules, in case anyone wants to
move the module signature checks out of the module core and into an LSM
to avoid the current layering violations.

This touches several trees, and I suspect it would be best to go through
James's LSM tree.

Thanks!

-Kees

Kees Cook (4):
  firmware_loader: EFI firmware loader must handle pre-allocated buffer
  fs: Remove FIRMWARE_PREALLOC_BUFFER from kernel_read_file() enums
  fs: Remove FIRMWARE_EFI_EMBEDDED from kernel_read_file() enums
  module: Add hook for security_kernel_post_read_file()

 drivers/base/firmware_loader/fallback_platform.c | 12 ++++++++++--
 drivers/base/firmware_loader/main.c              |  5 ++---
 fs/exec.c                                        |  7 ++++---
 include/linux/fs.h                               |  3 +--
 include/linux/lsm_hooks.h                        |  6 +++++-
 kernel/module.c                                  |  7 ++++++-
 security/integrity/ima/ima_main.c                |  6 ++----
 7 files changed, 30 insertions(+), 16 deletions(-)

-- 
2.25.1


^ permalink raw reply

* [PATCH 3/4] fs: Remove FIRMWARE_EFI_EMBEDDED from kernel_read_file() enums
From: Kees Cook @ 2020-07-07  8:19 UTC (permalink / raw)
  To: James Morris
  Cc: Kees Cook, Luis Chamberlain, Mimi Zohar, Scott Branden,
	Greg Kroah-Hartman, Rafael J. Wysocki, Alexander Viro, Jessica Yu,
	Dmitry Kasatkin, Serge E. Hallyn, Casey Schaufler,
	Eric W. Biederman, Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module
In-Reply-To: <20200707081926.3688096-1-keescook@chromium.org>

The "FIRMWARE_EFI_EMBEDDED" enum is a "where", not a "what". It
should not be distinguished separately from just "FIRMWARE", as this
confuses the LSMs about what is being loaded. Additionally, there was
no actual validation of the firmware contents happening. Add call to
security_kernel_post_read_file() so the contents can be measured/verified,
just as the firmware sysfs fallback does. This would allow for IMA (or
other LSMs) to validate known-good EFI firmware images.

Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firmware_request_platform()")
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/base/firmware_loader/fallback_platform.c | 7 ++++++-
 include/linux/fs.h                               | 3 +--
 include/linux/lsm_hooks.h                        | 6 +++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/base/firmware_loader/fallback_platform.c b/drivers/base/firmware_loader/fallback_platform.c
index 685edb7dd05a..76e0c4a7835f 100644
--- a/drivers/base/firmware_loader/fallback_platform.c
+++ b/drivers/base/firmware_loader/fallback_platform.c
@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 opt_flags)
 	if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM))
 		return -ENOENT;
 
-	rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED);
+	rc = security_kernel_load_data(LOADING_FIRMWARE);
 	if (rc)
 		return rc;
 
@@ -25,6 +25,11 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 opt_flags)
 	if (rc)
 		return rc; /* rc == -ENOENT when the fw was not found */
 
+	rc = security_kernel_post_read_file(NULL, (char *)data, size,
+					    READING_FIRMWARE);
+	if (rc)
+		return rc;
+
 	if (fw_priv->data && size > fw_priv->allocated_size)
 		return -ENOMEM;
 	if (!fw_priv->data)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 95fc775ed937..f50a35d54a61 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2993,11 +2993,10 @@ static inline void i_readcount_inc(struct inode *inode)
 #endif
 extern int do_pipe_flags(int *, int);
 
-/* This is a list of *what* is being read, not *how*. */
+/* This is a list of *what* is being read, not *how* nor *where*. */
 #define __kernel_read_file_id(id) \
 	id(UNKNOWN, unknown)		\
 	id(FIRMWARE, firmware)		\
-	id(FIRMWARE_EFI_EMBEDDED, firmware)	\
 	id(MODULE, kernel-module)		\
 	id(KEXEC_IMAGE, kexec-image)		\
 	id(KEXEC_INITRAMFS, kexec-initramfs)	\
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 95b7c1d32062..7cfc3166a1e2 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -633,15 +633,19 @@
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
  * @kernel_load_data:
- *	Load data provided by userspace.
+ *	Load data provided by a non-file source (usually userspace buffer).
  *	@id kernel load data identifier
  *	Return 0 if permission is granted.
+ *	This may be paired with a kernel_post_read_file() with a NULL
+ *	@file, but contains the actual data loaded.
  * @kernel_read_file:
  *	Read a file specified by userspace.
  *	@file contains the file structure pointing to the file being read
  *	by the kernel.
  *	@id kernel read file identifier
  *	Return 0 if permission is granted.
+ *	This must be paired with a kernel_post_read_file(), which contains
+ *	the actual data read from @file.
  * @kernel_post_read_file:
  *	Read a file specified by userspace.
  *	@file contains the file structure pointing to the file being read
-- 
2.25.1


^ permalink raw reply related

* [PATCH 1/4] firmware_loader: EFI firmware loader must handle pre-allocated buffer
From: Kees Cook @ 2020-07-07  8:19 UTC (permalink / raw)
  To: James Morris
  Cc: Kees Cook, stable, Luis Chamberlain, Mimi Zohar, Scott Branden,
	Greg Kroah-Hartman, Rafael J. Wysocki, Alexander Viro, Jessica Yu,
	Dmitry Kasatkin, Serge E. Hallyn, Casey Schaufler,
	Eric W. Biederman, Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module
In-Reply-To: <20200707081926.3688096-1-keescook@chromium.org>

The EFI platform firmware fallback would clobber any pre-allocated
buffers. Instead, correctly refuse to reallocate when too small (as
already done in the sysfs fallback), or perform allocation normally
when needed.

Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firm ware_request_platform()")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/base/firmware_loader/fallback_platform.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/base/firmware_loader/fallback_platform.c b/drivers/base/firmware_loader/fallback_platform.c
index cdd2c9a9f38a..685edb7dd05a 100644
--- a/drivers/base/firmware_loader/fallback_platform.c
+++ b/drivers/base/firmware_loader/fallback_platform.c
@@ -25,7 +25,10 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 opt_flags)
 	if (rc)
 		return rc; /* rc == -ENOENT when the fw was not found */
 
-	fw_priv->data = vmalloc(size);
+	if (fw_priv->data && size > fw_priv->allocated_size)
+		return -ENOMEM;
+	if (!fw_priv->data)
+		fw_priv->data = vmalloc(size);
 	if (!fw_priv->data)
 		return -ENOMEM;
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH 2/4] fs: Remove FIRMWARE_PREALLOC_BUFFER from kernel_read_file() enums
From: Kees Cook @ 2020-07-07  8:19 UTC (permalink / raw)
  To: James Morris
  Cc: Kees Cook, Luis Chamberlain, Mimi Zohar, Scott Branden,
	Greg Kroah-Hartman, Rafael J. Wysocki, Alexander Viro, Jessica Yu,
	Dmitry Kasatkin, Serge E. Hallyn, Casey Schaufler,
	Eric W. Biederman, Peter Zijlstra, Matthew Garrett, David Howells,
	Mauro Carvalho Chehab, Randy Dunlap, Joel Fernandes (Google),
	KP Singh, Dave Olsthoorn, Hans de Goede, Peter Jones,
	Andrew Morton, Stephen Boyd, Paul Moore, linux-kernel,
	linux-fsdevel, linux-integrity, linux-security-module
In-Reply-To: <20200707081926.3688096-1-keescook@chromium.org>

FIRMWARE_PREALLOC_BUFFER is a "how", not a "what", and confuses the LSMs
that are interested in filtering between types of things. The "how"
should be an internal detail made uninteresting to the LSMs.

Fixes: a098ecd2fa7d ("firmware: support loading into a pre-allocated buffer")
Fixes: fd90bc559bfb ("ima: based on policy verify firmware signatures (pre-allocated buffer)")
Fixes: 4f0496d8ffa3 ("ima: based on policy warn about loading firmware (pre-allocated buffer)")
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/base/firmware_loader/main.c | 5 ++---
 fs/exec.c                           | 7 ++++---
 include/linux/fs.h                  | 2 +-
 security/integrity/ima/ima_main.c   | 6 ++----
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index ca871b13524e..c2f57cedcd6f 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -465,14 +465,12 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
 	int i, len;
 	int rc = -ENOENT;
 	char *path;
-	enum kernel_read_file_id id = READING_FIRMWARE;
 	size_t msize = INT_MAX;
 	void *buffer = NULL;
 
 	/* Already populated data member means we're loading into a buffer */
 	if (!decompress && fw_priv->data) {
 		buffer = fw_priv->data;
-		id = READING_FIRMWARE_PREALLOC_BUFFER;
 		msize = fw_priv->allocated_size;
 	}
 
@@ -496,7 +494,8 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
 
 		/* load firmware files from the mount namespace of init */
 		rc = kernel_read_file_from_path_initns(path, &buffer,
-						       &size, msize, id);
+						       &size, msize,
+						       READING_FIRMWARE);
 		if (rc) {
 			if (rc != -ENOENT)
 				dev_warn(device, "loading %s failed with error %d\n",
diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a70327..2bf549757ce7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -927,6 +927,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
 {
 	loff_t i_size, pos;
 	ssize_t bytes = 0;
+	void *allocated = NULL;
 	int ret;
 
 	if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
@@ -950,8 +951,8 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
 		goto out;
 	}
 
-	if (id != READING_FIRMWARE_PREALLOC_BUFFER)
-		*buf = vmalloc(i_size);
+	if (!*buf)
+		*buf = allocated = vmalloc(i_size);
 	if (!*buf) {
 		ret = -ENOMEM;
 		goto out;
@@ -980,7 +981,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
 
 out_free:
 	if (ret < 0) {
-		if (id != READING_FIRMWARE_PREALLOC_BUFFER) {
+		if (allocated) {
 			vfree(*buf);
 			*buf = NULL;
 		}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f881a892ea7..95fc775ed937 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2993,10 +2993,10 @@ static inline void i_readcount_inc(struct inode *inode)
 #endif
 extern int do_pipe_flags(int *, int);
 
+/* This is a list of *what* is being read, not *how*. */
 #define __kernel_read_file_id(id) \
 	id(UNKNOWN, unknown)		\
 	id(FIRMWARE, firmware)		\
-	id(FIRMWARE_PREALLOC_BUFFER, firmware)	\
 	id(FIRMWARE_EFI_EMBEDDED, firmware)	\
 	id(MODULE, kernel-module)		\
 	id(KEXEC_IMAGE, kexec-image)		\
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index c1583d98c5e5..f80ee4ce4669 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -611,19 +611,17 @@ void ima_post_path_mknod(struct dentry *dentry)
 int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
 {
 	/*
-	 * READING_FIRMWARE_PREALLOC_BUFFER
-	 *
 	 * Do devices using pre-allocated memory run the risk of the
 	 * firmware being accessible to the device prior to the completion
 	 * of IMA's signature verification any more than when using two
-	 * buffers?
+	 * buffers? It may be desirable to include the buffer address
+	 * in this API and walk all the dma_map_single() mappings to check.
 	 */
 	return 0;
 }
 
 const int read_idmap[READING_MAX_ID] = {
 	[READING_FIRMWARE] = FIRMWARE_CHECK,
-	[READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
 	[READING_MODULE] = MODULE_CHECK,
 	[READING_KEXEC_IMAGE] = KEXEC_KERNEL_CHECK,
 	[READING_KEXEC_INITRAMFS] = KEXEC_INITRAMFS_CHECK,
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver
From: Sean Christopherson @ 2020-07-07  4:46 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Jarkko Sakkinen, x86, linux-sgx, linux-kernel,
	linux-security-module, linux-mm, Andrew Morton, Jethro Beekman,
	Haitao Huang, Chunyang Hui, Jordan Hand, Nathaniel McCallum,
	Seth Moore, Suresh Siddha, andriy.shevchenko, asapek, bp,
	cedric.xing, chenalexchen, conradparker, cyhanish, dave.hansen,
	haitao.huang, josh, kai.huang, kai.svahn, kmoy, ludloff, luto,
	nhorman, puiterwijk, rientjes, tglx, yaozhangx
In-Reply-To: <20200707043904.GJ25523@casper.infradead.org>

On Tue, Jul 07, 2020 at 05:39:04AM +0100, Matthew Wilcox wrote:
> although I think you have a simpler task.
> 
> 	XA_STATE(xas, ..., start_index);
> 
> 	for (;;) {
> 		struct page *page = xas_next(&xas);
> 
> 		if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
> 			return -EACCES;
> 	}
> 
> 	return 0;
> 
> should do the trick, I think.

Ah, neato.  Thanks!

^ permalink raw reply

* Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver
From: Matthew Wilcox @ 2020-07-07  4:39 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Jarkko Sakkinen, x86, linux-sgx, linux-kernel,
	linux-security-module, linux-mm, Andrew Morton, Jethro Beekman,
	Haitao Huang, Chunyang Hui, Jordan Hand, Nathaniel McCallum,
	Seth Moore, Suresh Siddha, andriy.shevchenko, asapek, bp,
	cedric.xing, chenalexchen, conradparker, cyhanish, dave.hansen,
	haitao.huang, josh, kai.huang, kai.svahn, kmoy, ludloff, luto,
	nhorman, puiterwijk, rientjes, tglx, yaozhangx
In-Reply-To: <20200707042904.GD5208@linux.intel.com>

On Mon, Jul 06, 2020 at 09:29:04PM -0700, Sean Christopherson wrote:
> > > > +	idx_start = PFN_DOWN(start);
> > > > +	idx_end = PFN_DOWN(end - 1);
> > > > +
> > > > +	for (idx = idx_start; idx <= idx_end; ++idx) {
> > > > +		mutex_lock(&encl->lock);
> > > > +		page = radix_tree_lookup(&encl->page_tree, idx);
> > > > +		mutex_unlock(&encl->lock);
> > > > +
> > > > +		if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
> > > > +			return -EACCES;
> > > 
> > > You should really use an iterator here instead of repeated lookups.
> > > xas_for_each() will probably be what you want.
> > 
> > Thank you for your remarks. I'll look into using xarray for this.
> 
> Question for Matthew:
> 
> To enforce the "page must be populated" rule, is there a clean way to retrieve
> the index of the current entry?  Our entries/pages don't have information
> about their index.  Or should we just count the number of entries and check
> 'em at the end? E.g.
> 
>         xas_for_each(...) {
>                 if (~page->vm_max_prot_bits & vm_prot_bits)
>                         return -EACCES;
>                 nr_entries++;
>         }
> 
>         if (nr_entries != (end_index - start_index))
>                 return -EACCES;

Probably best just to steal the implementation from here:

pgoff_t page_cache_next_miss(struct address_space *mapping,
                             pgoff_t index, unsigned long max_scan)
{
        XA_STATE(xas, &mapping->i_pages, index);

        while (max_scan--) {
                void *entry = xas_next(&xas);
                if (!entry || xa_is_value(entry))
                        break;
                if (xas.xa_index == 0)
                        break;
        }

        return xas.xa_index;
}

although I think you have a simpler task.

	XA_STATE(xas, ..., start_index);

	for (;;) {
		struct page *page = xas_next(&xas);

		if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
			return -EACCES;
	}

	return 0;

should do the trick, I think.

^ permalink raw reply

* Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver
From: Sean Christopherson @ 2020-07-07  4:29 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Matthew Wilcox, x86, linux-sgx, linux-kernel,
	linux-security-module, linux-mm, Andrew Morton, Jethro Beekman,
	Haitao Huang, Chunyang Hui, Jordan Hand, Nathaniel McCallum,
	Seth Moore, Suresh Siddha, andriy.shevchenko, asapek, bp,
	cedric.xing, chenalexchen, conradparker, cyhanish, dave.hansen,
	haitao.huang, josh, kai.huang, kai.svahn, kmoy, ludloff, luto,
	nhorman, puiterwijk, rientjes, tglx, yaozhangx
In-Reply-To: <20200707041151.GE143804@linux.intel.com>

Man, I really need to type faster.

On Tue, Jul 07, 2020 at 07:11:51AM +0300, Jarkko Sakkinen wrote:
> On Tue, Jul 07, 2020 at 04:36:17AM +0100, Matthew Wilcox wrote:
> > What's a leaf function?  Is it like a CPU instruction?
> 
> Yeah, the opcode is ENCLS for ring-0 (enclave management and
> construction) and ENCLU for ring-3 (entrance to the enclave etc).
> The leaf function number goes to EAX.

To add to Jarkko's comments, for all intents and purposes they are individual
instructions, e.g. all of their own entries in the SDM, but are buried behind
a single opcode that switches on EAX, e.g. ECREATE is EAX=0,  EADD is EAX=1,
EINIT is EAX=2.  It's purely a way to save opcode space when the extra
overhead is a non-issue, e.g. SMX/TXT's GETSEC does the same shenanigans.

> > > +	atomic_set(&encl->flags, 0);
> > > +	kref_init(&encl->refcount);
> > > +	INIT_RADIX_TREE(&encl->page_tree, GFP_KERNEL);
> > 
> > Why are you using a radix tree instead of an xarray?
> 
> Because xarray did not exist in 2017 and nobody has pointed out to use
> it. Now I know it exists (yet do not know what it is).

I've followed xarrays a little, but obviously not closely enough to
understand their advantages over radix trees.  At a glance, range-based
iteration alone is probably justification enough to switch.

> > > +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
> > > +		     unsigned long end, unsigned long vm_prot_bits)
> > > +{
> > > +	unsigned long idx, idx_start, idx_end;
> > > +	struct sgx_encl_page *page;
> > > +
> > > +	/*
> > > +	 * Disallow RIE tasks as their VMA permissions might conflict with the
> > > +	 * enclave page permissions.
> > > +	 */
> > > +	if (!!(current->personality & READ_IMPLIES_EXEC))
> > > +		return -EACCES;
> > > +
> > > +	idx_start = PFN_DOWN(start);
> > > +	idx_end = PFN_DOWN(end - 1);
> > > +
> > > +	for (idx = idx_start; idx <= idx_end; ++idx) {
> > > +		mutex_lock(&encl->lock);
> > > +		page = radix_tree_lookup(&encl->page_tree, idx);
> > > +		mutex_unlock(&encl->lock);
> > > +
> > > +		if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
> > > +			return -EACCES;
> > 
> > You should really use an iterator here instead of repeated lookups.
> > xas_for_each() will probably be what you want.
> 
> Thank you for your remarks. I'll look into using xarray for this.

Question for Matthew:

To enforce the "page must be populated" rule, is there a clean way to retrieve
the index of the current entry?  Our entries/pages don't have information
about their index.  Or should we just count the number of entries and check
'em at the end? E.g.

        xas_for_each(...) {
                if (~page->vm_max_prot_bits & vm_prot_bits)
                        return -EACCES;
                nr_entries++;
        }

        if (nr_entries != (end_index - start_index))
                return -EACCES;

^ permalink raw reply

* Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver
From: Jarkko Sakkinen @ 2020-07-07  4:11 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: x86, linux-sgx, linux-kernel, linux-security-module, linux-mm,
	Andrew Morton, Jethro Beekman, Haitao Huang, Chunyang Hui,
	Jordan Hand, Nathaniel McCallum, Seth Moore, Sean Christopherson,
	Suresh Siddha, andriy.shevchenko, asapek, bp, cedric.xing,
	chenalexchen, conradparker, cyhanish, dave.hansen, haitao.huang,
	josh, kai.huang, kai.svahn, kmoy, ludloff, luto, nhorman,
	puiterwijk, rientjes, tglx, yaozhangx
In-Reply-To: <20200707033617.GF25523@casper.infradead.org>

On Tue, Jul 07, 2020 at 04:36:17AM +0100, Matthew Wilcox wrote:
> On Tue, Jul 07, 2020 at 06:01:51AM +0300, Jarkko Sakkinen wrote:
> > Intel Software Guard eXtensions (SGX) is a set of CPU instructions that
> > can be used by applications to set aside private regions of code and
> > data. The code outside the SGX hosted software entity is disallowed to
> 
> s/disallowed to/prevented from/
> 
> > access the memory inside the enclave enforced by the CPU. We call these
> 
> s/enforced//
> 
> > entities enclaves.
> > 
> > Add a driver that provides an ioctl API to construct and run enclaves.
> > Enclaves are constructed from pages residing in reserved physical memory
> > areas. The contents of these pages can only be accessed when they are
> > mapped as part of an enclave, by a hardware thread running inside the
> > enclave.
> > 
> > The starting state of an enclave consists of a fixed measured set of
> > pages that are copied to the EPC during the construction process by
> > using ENCLS leaf functions and Software Enclave Control Structure (SECS)
> > that defines the enclave properties.
> > 
> > Enclaves are constructed by using ENCLS leaf functions ECREATE, EADD and
> > EINIT. ECREATE initializes SECS, EADD copies pages from system memory to
> > the EPC and EINIT checks a given signed measurement and moves the enclave
> > into a state ready for execution.
> 
> What's a leaf function?  Is it like a CPU instruction?

Yeah, the opcode is ENCLS for ring-0 (enclave management and
construction) and ENCLU for ring-3 (entrance to the enclave etc).
The leaf function number goes to EAX.

> 
> > The mmap() permissions are capped by the contained enclave page
> > permissions. The mapped areas must also be opaque, i.e. each page address
> > must contain a page. This logic is implemented in sgx_encl_may_map().
> 
> do you mean "populated" instead of "opaque"?

Yes, that would be a better word to use. I'll change this.

> 
> > +	atomic_set(&encl->flags, 0);
> > +	kref_init(&encl->refcount);
> > +	INIT_RADIX_TREE(&encl->page_tree, GFP_KERNEL);
> 
> Why are you using a radix tree instead of an xarray?

Because xarray did not exist in 2017 and nobody has pointed out to use
it. Now I know it exists (yet do not know what it is).

> 
> > +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
> > +		     unsigned long end, unsigned long vm_prot_bits)
> > +{
> > +	unsigned long idx, idx_start, idx_end;
> > +	struct sgx_encl_page *page;
> > +
> > +	/*
> > +	 * Disallow RIE tasks as their VMA permissions might conflict with the
> > +	 * enclave page permissions.
> > +	 */
> > +	if (!!(current->personality & READ_IMPLIES_EXEC))
> > +		return -EACCES;
> > +
> > +	idx_start = PFN_DOWN(start);
> > +	idx_end = PFN_DOWN(end - 1);
> > +
> > +	for (idx = idx_start; idx <= idx_end; ++idx) {
> > +		mutex_lock(&encl->lock);
> > +		page = radix_tree_lookup(&encl->page_tree, idx);
> > +		mutex_unlock(&encl->lock);
> > +
> > +		if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
> > +			return -EACCES;
> 
> You should really use an iterator here instead of repeated lookups.
> xas_for_each() will probably be what you want.

Thank you for your remarks. I'll look into using xarray for this.

/Jarkko

^ permalink raw reply

* Re: [PATCH v9 2/2] tpm: Add support for event log pointer found in TPM2 ACPI table
From: Stefan Berger @ 2020-07-07  4:09 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Stefan Berger, linux-integrity, linux-kernel, linux-acpi,
	linux-security-module
In-Reply-To: <20200707040325.GB143804@linux.intel.com>

On 7/7/20 12:03 AM, Jarkko Sakkinen wrote:
> On Mon, Jul 06, 2020 at 11:08:12PM -0400, Stefan Berger wrote:
>> On 7/6/20 10:24 PM, Jarkko Sakkinen wrote:
>>> On Mon, Jul 06, 2020 at 07:55:26PM -0400, Stefan Berger wrote:
>>>> On 7/6/20 7:09 PM, Jarkko Sakkinen wrote:
>>>>> On Mon, Jul 06, 2020 at 02:19:53PM -0400, Stefan Berger wrote:
>>>>>> From: Stefan Berger <stefanb@linux.ibm.com>
>>>>>>
>>>>>> In case a TPM2 is attached, search for a TPM2 ACPI table when trying
>>>>>> to get the event log from ACPI. If one is found, use it to get the
>>>>>> start and length of the log area. This allows non-UEFI systems, such
>>>>>> as SeaBIOS, to pass an event log when using a TPM2.
>>>>>>
>>>>>> Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
>>>>> Do you think that QEMU with TPM 1.2 emulator turned on would be a viable
>>>>> way to test this?
>>>> Yes.
>>> Is the emulator bundled with QEMU or does it have to be installed
>>> separately?
>> It has to be installed separately. On Fedora 31 it would just be a `sudo dnf
>> -y install swtpm-tools` and you should be good to go with libvirt /
>> virt-manager.
> Is there some packaging for Debian/Ubuntu available?


So far may not be available yet. I had *experimented* with a PPA once: 
https://launchpad.net/~stefanberger/+archive/ubuntu/swtpm-focal



> /Jarkko



^ permalink raw reply

* Re: [PATCH v9 2/2] tpm: Add support for event log pointer found in TPM2 ACPI table
From: Jarkko Sakkinen @ 2020-07-07  4:03 UTC (permalink / raw)
  To: Stefan Berger
  Cc: Stefan Berger, linux-integrity, linux-kernel, linux-acpi,
	linux-security-module
In-Reply-To: <f3e0fb50-8617-da40-1456-158531a070cb@linux.ibm.com>

On Mon, Jul 06, 2020 at 11:08:12PM -0400, Stefan Berger wrote:
> On 7/6/20 10:24 PM, Jarkko Sakkinen wrote:
> > On Mon, Jul 06, 2020 at 07:55:26PM -0400, Stefan Berger wrote:
> > > On 7/6/20 7:09 PM, Jarkko Sakkinen wrote:
> > > > On Mon, Jul 06, 2020 at 02:19:53PM -0400, Stefan Berger wrote:
> > > > > From: Stefan Berger <stefanb@linux.ibm.com>
> > > > > 
> > > > > In case a TPM2 is attached, search for a TPM2 ACPI table when trying
> > > > > to get the event log from ACPI. If one is found, use it to get the
> > > > > start and length of the log area. This allows non-UEFI systems, such
> > > > > as SeaBIOS, to pass an event log when using a TPM2.
> > > > > 
> > > > > Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
> > > > Do you think that QEMU with TPM 1.2 emulator turned on would be a viable
> > > > way to test this?
> > > 
> > > Yes.
> > Is the emulator bundled with QEMU or does it have to be installed
> > separately?
> 
> It has to be installed separately. On Fedora 31 it would just be a `sudo dnf
> -y install swtpm-tools` and you should be good to go with libvirt /
> virt-manager.

Is there some packaging for Debian/Ubuntu available?

/Jarkko

^ permalink raw reply

* [PATCH v35 15/24] x86/sgx: Allow a limited use of ATTRIBUTE.PROVISIONKEY for attestation
From: Jarkko Sakkinen @ 2020-07-07  3:37 UTC (permalink / raw)
  To: x86, linux-sgx
  Cc: linux-kernel, Jarkko Sakkinen, linux-security-module,
	Jethro Beekman, Andy Lutomirski, akpm, andriy.shevchenko, asapek,
	bp, cedric.xing, chenalexchen, conradparker, cyhanish,
	dave.hansen, haitao.huang, josh, kai.huang, kai.svahn, kmoy,
	ludloff, nhorman, npmccallum, puiterwijk, rientjes,
	sean.j.christopherson, tglx, yaozhangx
In-Reply-To: <20200707033747.142828-1-jarkko.sakkinen@linux.intel.com>

Provisioning Certification Enclave (PCE), the root of trust for other
enclaves, generates a signing key from a fused key called Provisioning
Certification Key. PCE can then use this key to certify an attestation key
of a Quoting Enclave (QE), e.g. we get the chain of trust down to the
hardware if the Intel signed PCE is used.

To use the needed keys, ATTRIBUTE.PROVISIONKEY is required but should be
only allowed for those who actually need it so that only the trusted
parties can certify QE's.

Obviously the attestation service should know the public key of the used
PCE and that way detect illegit attestation, but whitelisting the legit
users still adds an additional layer of defence.

Add new device file called /dev/sgx/provision. The sole purpose of this
file is to provide file descriptors that act as privilege tokens to allow
to build enclaves with ATTRIBUTE.PROVISIONKEY set. A new ioctl called
SGX_IOC_ENCLAVE_SET_ATTRIBUTE is used to assign this token to an enclave.

Cc: linux-security-module@vger.kernel.org
Acked-by: Jethro Beekman <jethro@fortanix.com>
Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/include/uapi/asm/sgx.h  | 11 ++++++++
 arch/x86/kernel/cpu/sgx/driver.c | 18 ++++++++++++
 arch/x86/kernel/cpu/sgx/driver.h |  2 ++
 arch/x86/kernel/cpu/sgx/ioctl.c  | 47 ++++++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+)

diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 5edb08ab8fd0..57d0d30c79b3 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -25,6 +25,8 @@ enum sgx_page_flags {
 	_IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages)
 #define SGX_IOC_ENCLAVE_INIT \
 	_IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init)
+#define SGX_IOC_ENCLAVE_SET_ATTRIBUTE \
+	_IOW(SGX_MAGIC, 0x03, struct sgx_enclave_set_attribute)
 
 /**
  * struct sgx_enclave_create - parameter structure for the
@@ -63,4 +65,13 @@ struct sgx_enclave_init {
 	__u64 sigstruct;
 };
 
+/**
+ * struct sgx_enclave_set_attribute - parameter structure for the
+ *				      %SGX_IOC_ENCLAVE_SET_ATTRIBUTE ioctl
+ * @attribute_fd:	file handle of the attribute file in the securityfs
+ */
+struct sgx_enclave_set_attribute {
+	__u64 attribute_fd;
+};
+
 #endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 20c3254675e9..1cebb6e9c9b7 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -139,6 +139,10 @@ static const struct file_operations sgx_encl_fops = {
 	.get_unmapped_area	= sgx_get_unmapped_area,
 };
 
+const struct file_operations sgx_provision_fops = {
+	.owner			= THIS_MODULE,
+};
+
 static struct miscdevice sgx_dev_enclave = {
 	.minor = MISC_DYNAMIC_MINOR,
 	.name = "enclave",
@@ -146,6 +150,13 @@ static struct miscdevice sgx_dev_enclave = {
 	.fops = &sgx_encl_fops,
 };
 
+static struct miscdevice sgx_dev_provision = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "provision",
+	.nodename = "sgx/provision",
+	.fops = &sgx_provision_fops,
+};
+
 int __init sgx_drv_init(void)
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -186,5 +197,12 @@ int __init sgx_drv_init(void)
 		return ret;
 	}
 
+	ret = misc_register(&sgx_dev_provision);
+	if (ret) {
+		pr_err("Creating /dev/sgx/provision failed with %d.\n", ret);
+		misc_deregister(&sgx_dev_enclave);
+		return ret;
+	}
+
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h
index e4063923115b..72747d01c046 100644
--- a/arch/x86/kernel/cpu/sgx/driver.h
+++ b/arch/x86/kernel/cpu/sgx/driver.h
@@ -23,6 +23,8 @@ extern u64 sgx_attributes_reserved_mask;
 extern u64 sgx_xfrm_reserved_mask;
 extern u32 sgx_xsave_size_tbl[64];
 
+extern const struct file_operations sgx_provision_fops;
+
 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 
 int sgx_drv_init(void);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 599bd30c6d05..07aa45e77dd0 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -670,6 +670,50 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
 	return ret;
 }
 
+/**
+ * sgx_ioc_enclave_set_attribute - handler for %SGX_IOC_ENCLAVE_SET_ATTRIBUTE
+ * @filep:	open file to /dev/sgx
+ * @arg:	userspace pointer to a struct sgx_enclave_set_attribute instance
+ *
+ * Mark the enclave as being allowed to access a restricted attribute bit.
+ * The requested attribute is specified via the attribute_fd field in the
+ * provided struct sgx_enclave_set_attribute.  The attribute_fd must be a
+ * handle to an SGX attribute file, e.g. "/dev/sgx/provision".
+ *
+ * Failure to explicitly request access to a restricted attribute will cause
+ * sgx_ioc_enclave_init() to fail.  Currently, the only restricted attribute
+ * is access to the PROVISION_KEY.
+ *
+ * Note, access to the EINITTOKEN_KEY is disallowed entirely.
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+static long sgx_ioc_enclave_set_attribute(struct sgx_encl *encl,
+					  void __user *arg)
+{
+	struct sgx_enclave_set_attribute params;
+	struct file *attribute_file;
+	int ret;
+
+	if (copy_from_user(&params, arg, sizeof(params)))
+		return -EFAULT;
+
+	attribute_file = fget(params.attribute_fd);
+	if (!attribute_file)
+		return -EINVAL;
+
+	if (attribute_file->f_op != &sgx_provision_fops) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	encl->allowed_attributes |= SGX_ATTR_PROVISIONKEY;
+	ret = 0;
+
+out:
+	fput(attribute_file);
+	return ret;
+}
 
 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 {
@@ -695,6 +739,9 @@ long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 	case SGX_IOC_ENCLAVE_INIT:
 		ret = sgx_ioc_enclave_init(encl, (void __user *)arg);
 		break;
+	case SGX_IOC_ENCLAVE_SET_ATTRIBUTE:
+		ret = sgx_ioc_enclave_set_attribute(encl, (void __user *)arg);
+		break;
 	default:
 		ret = -ENOIOCTLCMD;
 		break;
-- 
2.25.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox