Linux userland API discussions
 help / color / mirror / Atom feed
* [PATCH bpf-next v9 09/10] bpf,landlock: Add tests for Landlock
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

Test basic context access, ptrace protection and filesystem hooks.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Will Drewry <wad@chromium.org>
---

Changes since v8:
* update eBPF include path for macros
* use TEST_GEN_PROGS and use the generic "clean" target
* add more verbose errors
* update the bpf/verifier files
* remove chain tests (from landlock and bpf/verifier)
* replace the whitelist tests with blacklist tests (because of stateless
  Landlock programs): remove "dotdot" tests and other depth tests
* sync the landlock Makefile with its bpf sibling directory and use
  bpf_load_program_xattr()

Changes since v7:
* update tests and add new ones for filesystem hierarchy and Landlock
  chains.

Changes since v6:
* use the new kselftest_harness.h
* use const variables
* replace ASSERT_STEP with ASSERT_*
* rename BPF_PROG_TYPE_LANDLOCK to BPF_PROG_TYPE_LANDLOCK_RULE
* force sample library rebuild
* fix install target

Changes since v5:
* add subtype test
* add ptrace tests
* split and rename files
* cleanup and rebase
---
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/bpf/bpf_helpers.h     |   2 +
 tools/testing/selftests/bpf/test_verifier.c   |   1 +
 .../testing/selftests/bpf/verifier/landlock.c |  35 +++
 .../testing/selftests/bpf/verifier/subtype.c  |  10 +
 tools/testing/selftests/landlock/.gitignore   |   4 +
 tools/testing/selftests/landlock/Makefile     |  39 +++
 tools/testing/selftests/landlock/test.h       |  48 ++++
 tools/testing/selftests/landlock/test_base.c  |  24 ++
 tools/testing/selftests/landlock/test_fs.c    | 257 ++++++++++++++++++
 .../testing/selftests/landlock/test_ptrace.c  | 154 +++++++++++
 11 files changed, 575 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/landlock.c
 create mode 100644 tools/testing/selftests/landlock/.gitignore
 create mode 100644 tools/testing/selftests/landlock/Makefile
 create mode 100644 tools/testing/selftests/landlock/test.h
 create mode 100644 tools/testing/selftests/landlock/test_base.c
 create mode 100644 tools/testing/selftests/landlock/test_fs.c
 create mode 100644 tools/testing/selftests/landlock/test_ptrace.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 9781ca79794a..342a7d714fb9 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -21,6 +21,7 @@ TARGETS += ir
 TARGETS += kcmp
 TARGETS += kexec
 TARGETS += kvm
+TARGETS += landlock
 TARGETS += lib
 TARGETS += livepatch
 TARGETS += membarrier
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 1a5b1accf091..0b15c49fac3f 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -225,6 +225,8 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
 static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
 	(void *)BPF_FUNC_sk_storage_delete;
 static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
+static unsigned long long (*bpf_inode_map_lookup)(void *map, void *key) =
+	(void *) BPF_FUNC_inode_map_lookup;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 93faffd31fc3..c67218ffebf9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -30,6 +30,7 @@
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/btf.h>
+#include <linux/landlock.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
diff --git a/tools/testing/selftests/bpf/verifier/landlock.c b/tools/testing/selftests/bpf/verifier/landlock.c
new file mode 100644
index 000000000000..7ed4e24c0a88
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/landlock.c
@@ -0,0 +1,35 @@
+{
+	"landlock/fs_pick: always accept",
+	.insns = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK,
+	.has_prog_subtype = true,
+	.prog_subtype = {
+		.landlock_hook = {
+			.type = LANDLOCK_HOOK_FS_PICK,
+			.triggers = LANDLOCK_TRIGGER_FS_PICK_READ,
+		}
+	},
+},
+{
+	"landlock/fs_pick: read context",
+	.insns = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+		BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6,
+			offsetof(struct landlock_ctx_fs_pick, inode)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK,
+	.has_prog_subtype = true,
+	.prog_subtype = {
+		.landlock_hook = {
+			.type = LANDLOCK_HOOK_FS_PICK,
+			.triggers = LANDLOCK_TRIGGER_FS_PICK_READ,
+		}
+	},
+},
diff --git a/tools/testing/selftests/bpf/verifier/subtype.c b/tools/testing/selftests/bpf/verifier/subtype.c
index cf614223d53f..6bb7ef4b39b5 100644
--- a/tools/testing/selftests/bpf/verifier/subtype.c
+++ b/tools/testing/selftests/bpf/verifier/subtype.c
@@ -8,3 +8,13 @@
 	.result = REJECT,
 	.has_prog_subtype = true,
 },
+{
+	"missing subtype",
+	.insns = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK,
+},
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
new file mode 100644
index 000000000000..25b9cd834c3c
--- /dev/null
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -0,0 +1,4 @@
+/test_base
+/test_fs
+/test_ptrace
+/tmp_*
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
new file mode 100644
index 000000000000..7a253bf6d580
--- /dev/null
+++ b/tools/testing/selftests/landlock/Makefile
@@ -0,0 +1,39 @@
+LIBDIR := ../../../lib
+BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
+
+ifneq ($(wildcard $(GENHDR)),)
+  GENFLAGS := -DHAVE_GENHDR
+endif
+
+BPFOBJS := $(BPFDIR)/bpf.o $(BPFDIR)/nlattr.o
+LOADOBJ := ../../../../samples/bpf/bpf_load.o
+
+CFLAGS += -Wl,-no-as-needed -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+LDFLAGS += -lelf
+
+test_src = $(wildcard test_*.c)
+
+test_objs := $(test_src:.c=)
+
+TEST_GEN_PROGS := $(test_objs)
+
+.PHONY: all clean force
+
+all: $(test_objs)
+
+# force a rebuild of BPFOBJS when its dependencies are updated
+force:
+
+# rebuild bpf.o as a workaround for the samples/bpf bug
+$(BPFOBJS): $(LOADOBJ) force
+	$(MAKE) -C $(BPFDIR)
+
+$(LOADOBJ): force
+	$(MAKE) -C $(dir $(LOADOBJ))
+
+$(test_objs): $(BPFOBJS) $(LOADOBJ) ../kselftest_harness.h
+
+include ../lib.mk
diff --git a/tools/testing/selftests/landlock/test.h b/tools/testing/selftests/landlock/test.h
new file mode 100644
index 000000000000..7d412d94148c
--- /dev/null
+++ b/tools/testing/selftests/landlock/test.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock helpers
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019 ANSSI
+ */
+
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <linux/filter.h>
+#include <linux/landlock.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+#include "../../../../samples/bpf/bpf_load.h"
+
+#ifndef SECCOMP_PREPEND_LANDLOCK_PROG
+#define SECCOMP_PREPEND_LANDLOCK_PROG	4
+#endif
+
+#ifndef seccomp
+static int __attribute__((unused)) seccomp(unsigned int op, unsigned int flags,
+		void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
+/* bpf_load_program() with subtype */
+static int __attribute__((unused)) ll_bpf_load_program(
+		const struct bpf_insn *insns, size_t insns_cnt, char *log_buf,
+		size_t log_buf_sz, const union bpf_prog_subtype *subtype)
+{
+	struct bpf_load_program_attr load_attr;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK;
+	load_attr.prog_subtype = subtype;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = "GPL";
+
+	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
diff --git a/tools/testing/selftests/landlock/test_base.c b/tools/testing/selftests/landlock/test_base.c
new file mode 100644
index 000000000000..db46f39048cb
--- /dev/null
+++ b/tools/testing/selftests/landlock/test_base.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - base
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+
+#include "test.h"
+
+TEST(seccomp_landlock)
+{
+	int ret;
+
+	ret = seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Kernel does not support CONFIG_SECURITY_LANDLOCK");
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/test_fs.c b/tools/testing/selftests/landlock/test_fs.c
new file mode 100644
index 000000000000..dba726ea4994
--- /dev/null
+++ b/tools/testing/selftests/landlock/test_fs.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - file system
+ *
+ * Copyright © 2018-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#include <fcntl.h> /* O_DIRECTORY */
+#include <sys/stat.h> /* statbuf */
+#include <unistd.h> /* faccessat() */
+
+#include "test.h"
+
+#define TEST_PATH_TRIGGERS ( \
+		LANDLOCK_TRIGGER_FS_PICK_OPEN | \
+		LANDLOCK_TRIGGER_FS_PICK_READDIR | \
+		LANDLOCK_TRIGGER_FS_PICK_EXECUTE | \
+		LANDLOCK_TRIGGER_FS_PICK_GETATTR)
+
+static void test_path_rel(struct __test_metadata *_metadata, int dirfd,
+		const char *path, int ret)
+{
+	int fd;
+	struct stat statbuf;
+
+	ASSERT_EQ(ret, faccessat(dirfd, path, R_OK | X_OK, 0));
+	ASSERT_EQ(ret, fstatat(dirfd, path, &statbuf, 0));
+	fd = openat(dirfd, path, O_DIRECTORY);
+	if (ret) {
+		ASSERT_EQ(-1, fd);
+	} else {
+		ASSERT_NE(-1, fd);
+		EXPECT_EQ(0, close(fd));
+	}
+}
+
+static void test_path(struct __test_metadata *_metadata, const char *path,
+		int ret)
+{
+	return test_path_rel(_metadata, AT_FDCWD, path, ret);
+}
+
+static const char d1[] = "/usr";
+static const char d2[] = "/usr/share";
+static const char d3[] = "/usr/share/doc";
+
+TEST(fs_base)
+{
+	test_path(_metadata, d1, 0);
+	test_path(_metadata, d2, 0);
+	test_path(_metadata, d3, 0);
+}
+
+#define MAP_VALUE_DENY 1
+
+static int create_denied_inode_map(struct __test_metadata *_metadata,
+		const char *const dirs[])
+{
+	int map, key, dirs_len, i;
+	__u64 value = MAP_VALUE_DENY;
+
+	ASSERT_NE(NULL, dirs) {
+		TH_LOG("No directory list\n");
+	}
+	ASSERT_NE(NULL, dirs[0]) {
+		TH_LOG("Empty directory list\n");
+	}
+
+	/* get the number of dir entries */
+	for (dirs_len = 0; dirs[dirs_len]; dirs_len++);
+	map = bpf_create_map(BPF_MAP_TYPE_INODE, sizeof(key), sizeof(value),
+			dirs_len, 0);
+	ASSERT_NE(-1, map) {
+		TH_LOG("Failed to create a map of %d elements: %s\n", dirs_len,
+				strerror(errno));
+	}
+
+	for (i = 0; dirs[i]; i++) {
+		key = open(dirs[i], O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+		ASSERT_NE(-1, key) {
+			TH_LOG("Failed to open directory \"%s\": %s\n", dirs[i],
+					strerror(errno));
+		}
+		ASSERT_EQ(0, bpf_map_update_elem(map, &key, &value, BPF_ANY)) {
+			TH_LOG("Failed to update the map with \"%s\": %s\n",
+					dirs[i], strerror(errno));
+		}
+		close(key);
+	}
+	return map;
+}
+
+static void enforce_map(struct __test_metadata *_metadata, int map,
+		bool subpath)
+{
+	const struct bpf_insn prog_deny[] = {
+		BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+		/* look for the requested inode in the map */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6,
+			offsetof(struct landlock_ctx_fs_walk, inode)),
+		BPF_LD_MAP_FD(BPF_REG_1, map), /* 2 instructions */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				BPF_FUNC_inode_map_lookup),
+		/* if it is there, then deny access to the inode, otherwise
+		 * allow it */
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, MAP_VALUE_DENY, 2),
+		BPF_MOV32_IMM(BPF_REG_0, LANDLOCK_RET_ALLOW),
+		BPF_EXIT_INSN(),
+		BPF_MOV32_IMM(BPF_REG_0, LANDLOCK_RET_DENY),
+		BPF_EXIT_INSN(),
+	};
+	union bpf_prog_subtype subtype = {};
+	int fd_walk = -1, fd_pick;
+	char log[1024] = "";
+
+	if (subpath) {
+		subtype.landlock_hook.type = LANDLOCK_HOOK_FS_WALK;
+		fd_walk = ll_bpf_load_program((const struct bpf_insn *)&prog_deny,
+				sizeof(prog_deny) / sizeof(struct bpf_insn),
+				log, sizeof(log), &subtype);
+		ASSERT_NE(-1, fd_walk) {
+			TH_LOG("Failed to load fs_walk program: %s\n%s",
+					strerror(errno), log);
+		}
+		ASSERT_EQ(0, seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, &fd_walk)) {
+			TH_LOG("Failed to apply Landlock program: %s", strerror(errno));
+		}
+		EXPECT_EQ(0, close(fd_walk));
+	}
+
+	subtype.landlock_hook.type = LANDLOCK_HOOK_FS_PICK;
+	subtype.landlock_hook.triggers = TEST_PATH_TRIGGERS;
+	fd_pick = ll_bpf_load_program((const struct bpf_insn *)&prog_deny,
+			sizeof(prog_deny) / sizeof(struct bpf_insn), log,
+			sizeof(log), &subtype);
+	ASSERT_NE(-1, fd_pick) {
+		TH_LOG("Failed to load fs_pick program: %s\n%s",
+				strerror(errno), log);
+	}
+	ASSERT_EQ(0, seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, &fd_pick)) {
+		TH_LOG("Failed to apply Landlock program: %s", strerror(errno));
+	}
+	EXPECT_EQ(0, close(fd_pick));
+}
+
+static void check_map_blacklist(struct __test_metadata *_metadata,
+		bool subpath)
+{
+	int map = create_denied_inode_map(_metadata, (const char *const [])
+			{ d2, NULL });
+	ASSERT_NE(-1, map);
+	enforce_map(_metadata, map, subpath);
+	test_path(_metadata, d1, 0);
+	test_path(_metadata, d2, -1);
+	test_path(_metadata, d3, subpath ? -1 : 0);
+	EXPECT_EQ(0, close(map));
+}
+
+TEST(fs_map_blacklist_literal)
+{
+	check_map_blacklist(_metadata, false);
+}
+
+TEST(fs_map_blacklist_subpath)
+{
+	check_map_blacklist(_metadata, true);
+}
+
+static const char r2[] = ".";
+static const char r3[] = "./doc";
+
+enum relative_access {
+	REL_OPEN,
+	REL_CHDIR,
+	REL_CHROOT,
+};
+
+static void check_access(struct __test_metadata *_metadata,
+		bool enforce, enum relative_access rel)
+{
+	int dirfd;
+	int map = -1;
+
+	if (rel == REL_CHROOT)
+		ASSERT_NE(-1, chdir(d2));
+	if (enforce) {
+		map = create_denied_inode_map(_metadata, (const char *const [])
+				{ d3, NULL });
+		ASSERT_NE(-1, map);
+		enforce_map(_metadata, map, true);
+	}
+	switch (rel) {
+	case REL_OPEN:
+		dirfd = open(d2, O_DIRECTORY);
+		ASSERT_NE(-1, dirfd);
+		break;
+	case REL_CHDIR:
+		ASSERT_NE(-1, chdir(d2));
+		dirfd = AT_FDCWD;
+		break;
+	case REL_CHROOT:
+		ASSERT_NE(-1, chroot(d2)) {
+			TH_LOG("Failed to chroot: %s\n", strerror(errno));
+		}
+		dirfd = AT_FDCWD;
+		break;
+	default:
+		ASSERT_TRUE(false);
+		return;
+	}
+
+	test_path_rel(_metadata, dirfd, r2, 0);
+	test_path_rel(_metadata, dirfd, r3, enforce ? -1 : 0);
+
+	if (rel == REL_OPEN)
+		EXPECT_EQ(0, close(dirfd));
+	if (enforce)
+		EXPECT_EQ(0, close(map));
+}
+
+TEST(fs_allow_open)
+{
+	/* no enforcement, via open */
+	check_access(_metadata, false, REL_OPEN);
+}
+
+TEST(fs_allow_chdir)
+{
+	/* no enforcement, via chdir */
+	check_access(_metadata, false, REL_CHDIR);
+}
+
+TEST(fs_allow_chroot)
+{
+	/* no enforcement, via chroot */
+	check_access(_metadata, false, REL_CHROOT);
+}
+
+TEST(fs_deny_open)
+{
+	/* enforcement without tag, via open */
+	check_access(_metadata, true, REL_OPEN);
+}
+
+TEST(fs_deny_chdir)
+{
+	/* enforcement without tag, via chdir */
+	check_access(_metadata, true, REL_CHDIR);
+}
+
+TEST(fs_deny_chroot)
+{
+	/* enforcement without tag, via chroot */
+	check_access(_metadata, true, REL_CHROOT);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/test_ptrace.c b/tools/testing/selftests/landlock/test_ptrace.c
new file mode 100644
index 000000000000..2f3e346288bc
--- /dev/null
+++ b/tools/testing/selftests/landlock/test_ptrace.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - ptrace
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#define _GNU_SOURCE
+#include <signal.h> /* raise */
+#include <sys/ptrace.h>
+#include <sys/types.h> /* waitpid */
+#include <sys/wait.h> /* waitpid */
+#include <unistd.h> /* fork, pipe */
+
+#include "test.h"
+
+static void apply_null_sandbox(struct __test_metadata *_metadata)
+{
+	const struct bpf_insn prog_accept[] = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	const union bpf_prog_subtype subtype = {
+		.landlock_hook = {
+			.type = LANDLOCK_HOOK_FS_PICK,
+			.triggers = LANDLOCK_TRIGGER_FS_PICK_OPEN,
+		}
+	};
+	int prog;
+	char log[256] = "";
+
+	prog = ll_bpf_load_program((const struct bpf_insn *)&prog_accept,
+			sizeof(prog_accept) / sizeof(struct bpf_insn), log,
+			sizeof(log), &subtype);
+	ASSERT_NE(-1, prog) {
+		TH_LOG("Failed to load minimal rule: %s\n%s",
+				strerror(errno), log);
+	}
+	ASSERT_EQ(0, seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, &prog)) {
+		TH_LOG("Failed to apply minimal rule: %s", strerror(errno));
+	}
+	EXPECT_EQ(0, close(prog));
+}
+
+/* PTRACE_TRACEME and PTRACE_ATTACH without Landlock rules effect */
+static void check_ptrace(struct __test_metadata *_metadata,
+		int sandbox_both, int sandbox_parent, int sandbox_child,
+		int expect_ptrace)
+{
+	pid_t child;
+	int status;
+	int pipefd[2];
+
+	ASSERT_EQ(0, pipe(pipefd));
+	if (sandbox_both)
+		apply_null_sandbox(_metadata);
+
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		char buf;
+
+		EXPECT_EQ(0, close(pipefd[1]));
+		if (sandbox_child)
+			apply_null_sandbox(_metadata);
+
+		/* test traceme */
+		ASSERT_EQ(expect_ptrace, ptrace(PTRACE_TRACEME));
+		if (expect_ptrace) {
+			ASSERT_EQ(EPERM, errno);
+		} else {
+			ASSERT_EQ(0, raise(SIGSTOP));
+		}
+
+		/* sync */
+		ASSERT_EQ(1, read(pipefd[0], &buf, 1)) {
+			TH_LOG("Failed to read() sync from parent");
+		}
+		ASSERT_EQ('.', buf);
+		_exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+	}
+
+	EXPECT_EQ(0, close(pipefd[0]));
+	if (sandbox_parent)
+		apply_null_sandbox(_metadata);
+
+	/* test traceme */
+	if (!expect_ptrace) {
+		ASSERT_EQ(child, waitpid(child, &status, 0));
+		ASSERT_EQ(1, WIFSTOPPED(status));
+		ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+	}
+	/* test attach */
+	ASSERT_EQ(expect_ptrace, ptrace(PTRACE_ATTACH, child, NULL, 0));
+	if (expect_ptrace) {
+		ASSERT_EQ(EPERM, errno);
+	} else {
+		ASSERT_EQ(child, waitpid(child, &status, 0));
+		ASSERT_EQ(1, WIFSTOPPED(status));
+		ASSERT_EQ(0, ptrace(PTRACE_CONT, child, NULL, 0));
+	}
+
+	/* sync */
+	ASSERT_EQ(1, write(pipefd[1], ".", 1)) {
+		TH_LOG("Failed to write() sync to child");
+	}
+	ASSERT_EQ(child, waitpid(child, &status, 0));
+	if (WIFSIGNALED(status) || WEXITSTATUS(status))
+		_metadata->passed = 0;
+}
+
+TEST(ptrace_allow_without_sandbox)
+{
+	/* no sandbox */
+	check_ptrace(_metadata, 0, 0, 0, 0);
+}
+
+TEST(ptrace_allow_with_one_sandbox)
+{
+	/* child sandbox */
+	check_ptrace(_metadata, 0, 0, 1, 0);
+}
+
+TEST(ptrace_allow_with_nested_sandbox)
+{
+	/* inherited and child sandbox */
+	check_ptrace(_metadata, 1, 0, 1, 0);
+}
+
+TEST(ptrace_deny_with_parent_sandbox)
+{
+	/* parent sandbox */
+	check_ptrace(_metadata, 0, 1, 0, -1);
+}
+
+TEST(ptrace_deny_with_nested_and_parent_sandbox)
+{
+	/* inherited and parent sandbox */
+	check_ptrace(_metadata, 1, 1, 0, -1);
+}
+
+TEST(ptrace_deny_with_forked_sandbox)
+{
+	/* inherited, parent and child sandbox */
+	check_ptrace(_metadata, 1, 1, 1, -1);
+}
+
+TEST(ptrace_deny_with_sibling_sandbox)
+{
+	/* parent and child sandbox */
+	check_ptrace(_metadata, 0, 1, 1, -1);
+}
+
+TEST_HARNESS_MAIN
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 08/10] bpf: Add a Landlock sandbox example
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

Add a basic sandbox tool to launch a command which is denied access to a
list of files and directories.

Add to the bpf_load library the ability to handle a BPF program subtype.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v8:
* rewrite the landlock1 sample which deny access to a set of files or
  directories (i.e. simple blacklist) to fit with the previous patches
* add "landlock1" to .gitignore
* in bpf_load.c, pass the subtype with a call to
  bpf_load_program_xattr()

Changes since v7:
* rewrite the example using an inode map
* add to bpf_load the ability to handle subtypes per program type

Changes since v6:
* check return value of load_and_attach()
* allow to write on pipes
* rename BPF_PROG_TYPE_LANDLOCK to BPF_PROG_TYPE_LANDLOCK_RULE
* rename Landlock version to ABI to better reflect its purpose
* use const variable (suggested by Kees Cook)
* remove useless definitions (suggested by Kees Cook)
* add detailed explanations (suggested by Kees Cook)

Changes since v5:
* cosmetic fixes
* rebase

Changes since v4:
* write Landlock rule in C and compiled it with LLVM
* remove cgroup handling
* remove path handling: only handle a read-only environment
* remove errno return codes

Changes since v3:
* remove seccomp and origin field: completely free from seccomp programs
* handle more FS-related hooks
* handle inode hooks and directory traversal
* add faked but consistent view thanks to ENOENT
* add /lib64 in the example
* fix spelling
* rename some types and definitions (e.g. SECCOMP_ADD_LANDLOCK_RULE)

Changes since v2:
* use BPF_PROG_ATTACH for cgroup handling
---
 samples/bpf/.gitignore       |   1 +
 samples/bpf/Makefile         |   3 +
 samples/bpf/bpf_load.c       |  76 ++++++++++++++++-
 samples/bpf/bpf_load.h       |   7 ++
 samples/bpf/landlock1.h      |   8 ++
 samples/bpf/landlock1_kern.c | 104 +++++++++++++++++++++++
 samples/bpf/landlock1_user.c | 157 +++++++++++++++++++++++++++++++++++
 7 files changed, 352 insertions(+), 4 deletions(-)
 create mode 100644 samples/bpf/landlock1.h
 create mode 100644 samples/bpf/landlock1_kern.c
 create mode 100644 samples/bpf/landlock1_user.c

diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 74d31fd3c99c..a4c9c806f739 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -2,6 +2,7 @@ cpustat
 fds_example
 hbm
 ibumad
+landlock1
 lathist
 lwt_len_hist
 map_perf_test
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0917f8cf4fab..da246eaa8bf8 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += task_fd_query
 hostprogs-y += xdp_sample_pkts
 hostprogs-y += ibumad
 hostprogs-y += hbm
+hostprogs-y += landlock1
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
 ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS)
 hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
+landlock1-objs := bpf_load.o landlock1_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -168,6 +170,7 @@ always += task_fd_query_kern.o
 always += xdp_sample_pkts_kern.o
 always += ibumad_kern.o
 always += hbm_out_kern.o
+always += landlock1_kern.o
 
 KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 4574b1939e49..bf62d965f606 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/landlock.h>
 #include <linux/perf_event.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
@@ -42,6 +43,9 @@ int prog_array_fd = -1;
 struct bpf_map_data map_data[MAX_MAPS];
 int map_data_count;
 
+struct bpf_subtype_data subtype_data[MAX_PROGS];
+int subtype_data_count;
+
 static int populate_prog_array(const char *event, int prog_fd)
 {
 	int ind = atoi(event), err;
@@ -87,11 +91,15 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 	bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
 	bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
+	bool is_landlock = strncmp(event, "landlock", 8) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
 	int fd, efd, err, id;
 	struct perf_event_attr attr = {};
+	union bpf_prog_subtype *st = NULL;
+	struct bpf_subtype_data *sd = NULL;
+	struct bpf_load_program_attr load_attr;
 
 	attr.type = PERF_TYPE_TRACEPOINT;
 	attr.sample_type = PERF_SAMPLE_RAW;
@@ -120,6 +128,32 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_SK_SKB;
 	} else if (is_sk_msg) {
 		prog_type = BPF_PROG_TYPE_SK_MSG;
+	} else if (is_landlock) {
+		int i, prog_id;
+		const char *event_id = (event + 8);
+
+		if (!isdigit(*event_id)) {
+			printf("invalid prog number\n");
+			return -1;
+		}
+		prog_id = atoi(event_id);
+		for (i = 0; i < subtype_data_count; i++) {
+			if (subtype_data[i].name && strcmp(event,
+						subtype_data[i].name) == 0) {
+				/* save the prog_id for a next program */
+				sd = &subtype_data[i];
+				sd->prog_id = prog_id;
+				st = &sd->subtype;
+				free(sd->name);
+				sd->name = NULL;
+				break;
+			}
+		}
+		if (!st) {
+			printf("missing subtype\n");
+			return -1;
+		}
+		prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -128,16 +162,25 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (prog_cnt == MAX_PROGS)
 		return -1;
 
-	fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
-			      bpf_log_buf, BPF_LOG_BUF_SIZE);
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = prog_type;
+	load_attr.prog_subtype = st;
+	load_attr.insns = prog;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	load_attr.kern_version = kern_version;
+	fd = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
 	if (fd < 0) {
 		printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
 		return -1;
 	}
+	if (sd)
+		sd->prog_fd = fd;
 
 	prog_fd[prog_cnt++] = fd;
 
-	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
+	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk ||
+	    is_landlock)
 		return 0;
 
 	if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
@@ -519,6 +562,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 	kern_version = 0;
 	memset(license, 0, sizeof(license));
 	memset(processed_sec, 0, sizeof(processed_sec));
+	subtype_data_count = 0;
 
 	if (elf_version(EV_CURRENT) == EV_NONE)
 		return 1;
@@ -567,6 +611,29 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 			data_maps = data;
 			for (j = 0; j < MAX_MAPS; j++)
 				map_data[j].fd = -1;
+		} else if (strncmp(shname, "subtype", 7) == 0) {
+			processed_sec[i] = true;
+			if (*(shname + 7) != '/') {
+				printf("invalid name of subtype section");
+				return 1;
+			}
+			if (data->d_size != sizeof(union bpf_prog_subtype)) {
+				printf("invalid size of subtype section: %zd\n",
+				       data->d_size);
+				printf("ref: %zd\n",
+				       sizeof(union bpf_prog_subtype));
+				return 1;
+			}
+			if (subtype_data_count >= MAX_PROGS) {
+				printf("too many subtype sections");
+				return 1;
+			}
+			memcpy(&subtype_data[subtype_data_count].subtype,
+					data->d_buf,
+					sizeof(union bpf_prog_subtype));
+			subtype_data[subtype_data_count].name =
+				strdup((shname + 8));
+			subtype_data_count++;
 		} else if (shdr.sh_type == SHT_SYMTAB) {
 			strtabidx = shdr.sh_link;
 			symbols = data;
@@ -643,7 +710,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "cgroup/", 7) == 0 ||
 		    memcmp(shname, "sockops", 7) == 0 ||
 		    memcmp(shname, "sk_skb", 6) == 0 ||
-		    memcmp(shname, "sk_msg", 6) == 0) {
+		    memcmp(shname, "sk_msg", 6) == 0 ||
+		    memcmp(shname, "landlock", 8) == 0) {
 			ret = load_and_attach(shname, data->d_buf,
 					      data->d_size);
 			if (ret != 0)
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 814894a12974..e210b5fdf8ee 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -24,6 +24,13 @@ struct bpf_map_data {
 	struct bpf_load_map_def def;
 };
 
+struct bpf_subtype_data {
+	char *name;
+	int prog_id;
+	int prog_fd;
+	union bpf_prog_subtype subtype;
+};
+
 typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
 
 extern int prog_fd[MAX_PROGS];
diff --git a/samples/bpf/landlock1.h b/samples/bpf/landlock1.h
new file mode 100644
index 000000000000..53b0a9447855
--- /dev/null
+++ b/samples/bpf/landlock1.h
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock sample 1 - common header
+ *
+ * Copyright © 2018-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#define MAP_FLAG_DENY		(1ULL << 0)
diff --git a/samples/bpf/landlock1_kern.c b/samples/bpf/landlock1_kern.c
new file mode 100644
index 000000000000..0298d98dd06a
--- /dev/null
+++ b/samples/bpf/landlock1_kern.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock sample 1 - whitelist of read only or read-write file hierarchy
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+/*
+ * This file contains a function that will be compiled to eBPF bytecode thanks
+ * to LLVM/Clang.
+ *
+ * Each SEC() means that the following function or variable will be part of a
+ * custom ELF section. This sections are then processed by the userspace part
+ * (see landlock1_user.c) to extract eBPF bytecode and take into account
+ * variables describing the eBPF program subtype or its license.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/landlock.h>
+
+#include "bpf_helpers.h"
+#include "landlock1.h" /* MAP_FLAG_DENY */
+
+#define MAP_MAX_ENTRIES		20
+
+SEC("maps")
+struct bpf_map_def inode_map = {
+	.type = BPF_MAP_TYPE_INODE,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAP_MAX_ENTRIES,
+};
+
+static __always_inline __u64 get_access(void *inode)
+{
+	if (bpf_inode_map_lookup(&inode_map, inode) & MAP_FLAG_DENY)
+		return LANDLOCK_RET_DENY;
+	return LANDLOCK_RET_ALLOW;
+}
+
+SEC("subtype/landlock1")
+static union bpf_prog_subtype _subtype1 = {
+	.landlock_hook = {
+		.type = LANDLOCK_HOOK_FS_WALK,
+	}
+};
+
+/*
+ * The function fs_walk() is a simple Landlock program enforced on a set of
+ * processes. This program will be run for each walk through a file path.
+ *
+ * The argument ctx contains the context of the program when it is run, which
+ * enable to evaluate the file path.  This context can change for each run of
+ * the program.
+ */
+SEC("landlock1")
+int fs_walk(struct landlock_ctx_fs_walk *ctx)
+{
+	return get_access((void *)ctx->inode);
+}
+
+SEC("subtype/landlock2")
+static union bpf_prog_subtype _subtype2 = {
+	.landlock_hook = {
+		.type = LANDLOCK_HOOK_FS_PICK,
+		/*
+		 * allowed:
+		 * - LANDLOCK_TRIGGER_FS_PICK_LINK
+		 * - LANDLOCK_TRIGGER_FS_PICK_LINKTO
+		 * - LANDLOCK_TRIGGER_FS_PICK_RECEIVE
+		 * - LANDLOCK_TRIGGER_FS_PICK_MOUNTON
+		 */
+		.triggers =
+			    LANDLOCK_TRIGGER_FS_PICK_APPEND |
+			    LANDLOCK_TRIGGER_FS_PICK_CHDIR |
+			    LANDLOCK_TRIGGER_FS_PICK_CHROOT |
+			    LANDLOCK_TRIGGER_FS_PICK_CREATE |
+			    LANDLOCK_TRIGGER_FS_PICK_EXECUTE |
+			    LANDLOCK_TRIGGER_FS_PICK_FCNTL |
+			    LANDLOCK_TRIGGER_FS_PICK_GETATTR |
+			    LANDLOCK_TRIGGER_FS_PICK_IOCTL |
+			    LANDLOCK_TRIGGER_FS_PICK_LOCK |
+			    LANDLOCK_TRIGGER_FS_PICK_MAP |
+			    LANDLOCK_TRIGGER_FS_PICK_OPEN |
+			    LANDLOCK_TRIGGER_FS_PICK_READ |
+			    LANDLOCK_TRIGGER_FS_PICK_READDIR |
+			    LANDLOCK_TRIGGER_FS_PICK_RENAME |
+			    LANDLOCK_TRIGGER_FS_PICK_RENAMETO |
+			    LANDLOCK_TRIGGER_FS_PICK_RMDIR |
+			    LANDLOCK_TRIGGER_FS_PICK_SETATTR |
+			    LANDLOCK_TRIGGER_FS_PICK_TRANSFER |
+			    LANDLOCK_TRIGGER_FS_PICK_UNLINK |
+			    LANDLOCK_TRIGGER_FS_PICK_WRITE,
+	}
+};
+
+SEC("landlock2")
+int fs_pick_ro(struct landlock_ctx_fs_pick *ctx)
+{
+	return get_access((void *)ctx->inode);
+}
+
+SEC("license")
+static const char _license[] = "GPL";
diff --git a/samples/bpf/landlock1_user.c b/samples/bpf/landlock1_user.c
new file mode 100644
index 000000000000..aa45932d36a8
--- /dev/null
+++ b/samples/bpf/landlock1_user.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock sample 1 - deny access to a set of directories (blacklisting)
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+#include "landlock1.h" /* MAP_FLAG_DENY */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h> /* open() */
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/landlock.h>
+#include <linux/prctl.h>
+#include <linux/seccomp.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#ifndef seccomp
+static int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
+static int apply_sandbox(int prog_fd)
+{
+	int ret = 0;
+
+	/* set up the test sandbox */
+	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		perror("prctl(no_new_priv)");
+		return 1;
+	}
+	if (seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, &prog_fd)) {
+		perror("seccomp(set_hook)");
+		ret = 1;
+	}
+	close(prog_fd);
+
+	return ret;
+}
+
+#define ENV_FS_PATH_DENY_NAME "LL_PATH_DENY"
+#define ENV_PATH_TOKEN ":"
+
+static int parse_path(char *env_path, const char ***path_list)
+{
+	int i, path_nb = 0;
+
+	if (env_path) {
+		path_nb++;
+		for (i = 0; env_path[i]; i++) {
+			if (env_path[i] == ENV_PATH_TOKEN[0])
+				path_nb++;
+		}
+	}
+	*path_list = malloc(path_nb * sizeof(**path_list));
+	for (i = 0; i < path_nb; i++)
+		(*path_list)[i] = strsep(&env_path, ENV_PATH_TOKEN);
+
+	return path_nb;
+}
+
+static int populate_map(const char *env_var, unsigned long long value,
+		int map_fd)
+{
+	int path_nb, ref_fd, i;
+	char *env_path_name;
+	const char **path_list = NULL;
+
+	env_path_name = getenv(env_var);
+	if (!env_path_name)
+		return 0;
+	env_path_name = strdup(env_path_name);
+	path_nb = parse_path(env_path_name, &path_list);
+
+	for (i = 0; i < path_nb; i++) {
+		ref_fd = open(path_list[i], O_RDONLY | O_CLOEXEC);
+		if (ref_fd < 0) {
+			fprintf(stderr, "Failed to open \"%s\": %s\n",
+					path_list[i],
+					strerror(errno));
+			return 1;
+		}
+		if (bpf_map_update_elem(map_fd, &ref_fd, &value, BPF_ANY)) {
+			fprintf(stderr, "Failed to update the map with"
+					" \"%s\": %s\n", path_list[i],
+					strerror(errno));
+			return 1;
+		}
+		close(ref_fd);
+	}
+	free(env_path_name);
+	return 0;
+}
+
+int main(int argc, char * const argv[], char * const *envp)
+{
+	char filename[256];
+	char *cmd_path;
+	char * const *cmd_argv;
+	int ll_prog_walk, ll_prog_pick;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s <cmd> [args]...\n\n", argv[0]);
+		fprintf(stderr, "Launch a command in a restricted environment.\n\n");
+		fprintf(stderr, "Environment variables containing paths, each separated by a colon:\n");
+		fprintf(stderr, "* %s: list of files and directories which are denied\n",
+				ENV_FS_PATH_DENY_NAME);
+		fprintf(stderr, "\nexample:\n"
+				"%s=\"${HOME}/.ssh:${HOME}/Images\" "
+				"%s /bin/sh -i\n",
+				ENV_FS_PATH_DENY_NAME, argv[0]);
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+	ll_prog_walk = prog_fd[0]; /* fs_walk */
+	ll_prog_pick = prog_fd[1]; /* fs_pick */
+	if (!ll_prog_walk || !ll_prog_pick) {
+		if (errno)
+			printf("load_bpf_file: %s\n", strerror(errno));
+		else
+			printf("load_bpf_file: Error\n");
+		return 1;
+	}
+
+	if (populate_map(ENV_FS_PATH_DENY_NAME, MAP_FLAG_DENY, map_fd[0]))
+		return 1;
+	close(map_fd[0]);
+
+	fprintf(stderr, "Launching a new sandboxed process\n");
+	if (apply_sandbox(ll_prog_walk))
+		return 1;
+	if (apply_sandbox(ll_prog_pick))
+		return 1;
+	cmd_path = argv[1];
+	cmd_argv = argv + 1;
+	execve(cmd_path, cmd_argv, envp);
+	perror("Failed to call execve");
+	return 1;
+}
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 07/10] landlock: Add ptrace restrictions
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

A landlocked process has less privileges than a non-landlocked process
and must then be subject to additional restrictions when manipulating
processes. To be allowed to use ptrace(2) and related syscalls on a
target process, a landlocked process must have a subset of the target
process' rules.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v6:
* factor out ptrace check
* constify pointers
* cleanup headers
* use the new security_add_hooks()
---
 security/landlock/Makefile       |   2 +-
 security/landlock/hooks_ptrace.c | 121 +++++++++++++++++++++++++++++++
 security/landlock/hooks_ptrace.h |   8 ++
 security/landlock/init.c         |   2 +
 4 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 security/landlock/hooks_ptrace.c
 create mode 100644 security/landlock/hooks_ptrace.h

diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 270ece5d93de..4500ddb0767e 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
 
 landlock-y := init.o \
 	enforce.o enforce_seccomp.o \
-	hooks.o hooks_fs.o
+	hooks.o hooks_fs.o hooks_ptrace.o
diff --git a/security/landlock/hooks_ptrace.c b/security/landlock/hooks_ptrace.c
new file mode 100644
index 000000000000..7f5e8b994e93
--- /dev/null
+++ b/security/landlock/hooks_ptrace.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - ptrace hooks
+ *
+ * Copyright © 2017 Mickaël Salaün <mic@digikod.net>
+ */
+
+#include <asm/current.h>
+#include <linux/errno.h>
+#include <linux/kernel.h> /* ARRAY_SIZE */
+#include <linux/lsm_hooks.h>
+#include <linux/sched.h> /* struct task_struct */
+#include <linux/seccomp.h>
+
+#include "common.h" /* struct landlock_prog_set */
+#include "hooks.h" /* landlocked() */
+#include "hooks_ptrace.h"
+
+static bool progs_are_subset(const struct landlock_prog_set *parent,
+		const struct landlock_prog_set *child)
+{
+	size_t i;
+
+	if (!parent || !child)
+		return false;
+	if (parent == child)
+		return true;
+
+	for (i = 0; i < ARRAY_SIZE(child->programs); i++) {
+		struct landlock_prog_list *walker;
+		bool found_parent = false;
+
+		if (!parent->programs[i])
+			continue;
+		for (walker = child->programs[i]; walker;
+				walker = walker->prev) {
+			if (walker == parent->programs[i]) {
+				found_parent = true;
+				break;
+			}
+		}
+		if (!found_parent)
+			return false;
+	}
+	return true;
+}
+
+static bool task_has_subset_progs(const struct task_struct *parent,
+		const struct task_struct *child)
+{
+#ifdef CONFIG_SECCOMP_FILTER
+	if (progs_are_subset(parent->seccomp.landlock_prog_set,
+				child->seccomp.landlock_prog_set))
+		/* must be ANDed with other providers (i.e. cgroup) */
+		return true;
+#endif /* CONFIG_SECCOMP_FILTER */
+	return false;
+}
+
+static int task_ptrace(const struct task_struct *parent,
+		const struct task_struct *child)
+{
+	if (!landlocked(parent))
+		return 0;
+
+	if (!landlocked(child))
+		return -EPERM;
+
+	if (task_has_subset_progs(parent, child))
+		return 0;
+
+	return -EPERM;
+}
+
+/**
+ * hook_ptrace_access_check - determine whether the current process may access
+ *			      another
+ *
+ * @child: the process to be accessed
+ * @mode: the mode of attachment
+ *
+ * If the current task has Landlock programs, then the child must have at least
+ * the same programs.  Else denied.
+ *
+ * Determine whether a process may access another, returning 0 if permission
+ * granted, -errno if denied.
+ */
+static int hook_ptrace_access_check(struct task_struct *child,
+		unsigned int mode)
+{
+	return task_ptrace(current, child);
+}
+
+/**
+ * hook_ptrace_traceme - determine whether another process may trace the
+ *			 current one
+ *
+ * @parent: the task proposed to be the tracer
+ *
+ * If the parent has Landlock programs, then the current task must have the
+ * same or more programs.
+ * Else denied.
+ *
+ * Determine whether the nominated task is permitted to trace the current
+ * process, returning 0 if permission is granted, -errno if denied.
+ */
+static int hook_ptrace_traceme(struct task_struct *parent)
+{
+	return task_ptrace(parent, current);
+}
+
+static struct security_hook_list landlock_hooks[] = {
+	LSM_HOOK_INIT(ptrace_access_check, hook_ptrace_access_check),
+	LSM_HOOK_INIT(ptrace_traceme, hook_ptrace_traceme),
+};
+
+__init void landlock_add_hooks_ptrace(void)
+{
+	security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+			LANDLOCK_NAME);
+}
diff --git a/security/landlock/hooks_ptrace.h b/security/landlock/hooks_ptrace.h
new file mode 100644
index 000000000000..2c2b8a13037f
--- /dev/null
+++ b/security/landlock/hooks_ptrace.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - ptrace hooks
+ *
+ * Copyright © 2017 Mickaël Salaün <mic@digikod.net>
+ */
+
+__init void landlock_add_hooks_ptrace(void);
diff --git a/security/landlock/init.c b/security/landlock/init.c
index 68def2a7af71..d58aa94124b0 100644
--- a/security/landlock/init.c
+++ b/security/landlock/init.c
@@ -13,6 +13,7 @@
 
 #include "common.h" /* LANDLOCK_* */
 #include "hooks_fs.h"
+#include "hooks_ptrace.h"
 
 static bool bpf_landlock_is_valid_access(int off, int size,
 		enum bpf_access_type type, const struct bpf_prog *prog,
@@ -143,6 +144,7 @@ const struct bpf_prog_ops landlock_prog_ops = {};
 static int __init landlock_init(void)
 {
 	pr_info(LANDLOCK_NAME ": Initializing (sandbox with seccomp)\n");
+	landlock_add_hooks_ptrace();
 	landlock_add_hooks_fs();
 	return 0;
 }
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 06/10] landlock: Handle filesystem access control
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

This add two Landlock hooks: FS_WALK and FS_PICK.

The FS_WALK hook is used to walk through a file path. A program tied to
this hook will be evaluated for each directory traversal except the last
one if it is the leaf of the path.  It is important to differentiate
this hook from FS_PICK to enable more powerful path evaluation in the
future (cf. Landlock patch v8).

The FS_PICK hook is used to validate a set of actions requested on a
file. This actions are defined with triggers (e.g. read, write, open,
append...).

The Landlock LSM hook registration is done after other LSM to only run
actions from user-space, via eBPF programs, if the access was granted by
major (privileged) LSMs.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v8:
* add a new LSM_ORDER_LAST, cf. commit e2bc445b66ca ("LSM: Introduce
  enum lsm_order")
* add WARN_ON() for pointer dereferencement
* remove the FS_GET subtype which rely on program chaining
* remove the subtype option which was only used for chaining (with the
  "previous" field)
* remove inode_lookup which depends on the (removed) nameidata security
  blob
* remove eBPF helpers to get and set Landlock inode tags
* do not use task LSM credentials (for now)

Changes since v7:
* major rewrite with clean Landlock hooks able to deal with file paths

Changes since v6:
* add 3 more sub-events: IOCTL, LOCK, FCNTL
  https://lkml.kernel.org/r/2fbc99a6-f190-f335-bd14-04bdeed35571@digikod.net
* use the new security_add_hooks()
* explain the -Werror=unused-function
* constify pointers
* cleanup headers

Changes since v5:
* split hooks.[ch] into hooks.[ch] and hooks_fs.[ch]
* add more documentation
* cosmetic fixes
* rebase (SCALAR_VALUE)

Changes since v4:
* add LSM hook abstraction called Landlock event
  * use the compiler type checking to verify hooks use by an event
  * handle all filesystem related LSM hooks (e.g. file_permission,
    mmap_file, sb_mount...)
* register BPF programs for Landlock just after LSM hooks registration
* move hooks registration after other LSMs
* add failsafes to check if a hook is not used by the kernel
* allow partial raw value access form the context (needed for programs
  generated by LLVM)

Changes since v3:
* split commit
* add hooks dealing with struct inode and struct path pointers:
  inode_permission and inode_getattr
* add abstraction over eBPF helper arguments thanks to wrapping structs
---
 include/linux/lsm_hooks.h    |   1 +
 security/landlock/Makefile   |   3 +-
 security/landlock/common.h   |  10 +
 security/landlock/hooks.c    |  95 ++++++
 security/landlock/hooks.h    |  31 ++
 security/landlock/hooks_fs.c | 568 +++++++++++++++++++++++++++++++++++
 security/landlock/hooks_fs.h |  31 ++
 security/landlock/init.c     |  47 +++
 security/security.c          |  15 +
 9 files changed, 800 insertions(+), 1 deletion(-)
 create mode 100644 security/landlock/hooks.c
 create mode 100644 security/landlock/hooks.h
 create mode 100644 security/landlock/hooks_fs.c
 create mode 100644 security/landlock/hooks_fs.h

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 47f58cfb6a19..eefe9f214f05 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2092,6 +2092,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count,
 enum lsm_order {
 	LSM_ORDER_FIRST = -1,	/* This is only for capabilities. */
 	LSM_ORDER_MUTABLE = 0,
+	LSM_ORDER_LAST = 1,	/* potentially-unprivileged LSM */
 };
 
 struct lsm_info {
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 2a1a7082a365..270ece5d93de 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
 
 landlock-y := init.o \
-	enforce.o enforce_seccomp.o
+	enforce.o enforce_seccomp.o \
+	hooks.o hooks_fs.o
diff --git a/security/landlock/common.h b/security/landlock/common.h
index 0c9b5904e7f5..49b892515144 100644
--- a/security/landlock/common.h
+++ b/security/landlock/common.h
@@ -11,6 +11,7 @@
 
 #include <linux/bpf.h> /* enum bpf_prog_aux */
 #include <linux/filter.h> /* bpf_prog */
+#include <linux/lsm_hooks.h> /* lsm_blob_sizes */
 #include <linux/refcount.h> /* refcount_t */
 #include <uapi/linux/landlock.h> /* enum landlock_hook_type */
 
@@ -68,4 +69,13 @@ static inline enum landlock_hook_type get_type(struct bpf_prog *prog)
 	return prog->aux->extra->subtype.landlock_hook.type;
 }
 
+__maybe_unused
+static bool current_has_prog_type(enum landlock_hook_type hook_type)
+{
+	struct landlock_prog_set *prog_set;
+
+	prog_set = current->seccomp.landlock_prog_set;
+	return (prog_set && prog_set->programs[get_index(hook_type)]);
+}
+
 #endif /* _SECURITY_LANDLOCK_COMMON_H */
diff --git a/security/landlock/hooks.c b/security/landlock/hooks.c
new file mode 100644
index 000000000000..a1620d0481eb
--- /dev/null
+++ b/security/landlock/hooks.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - hook helpers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/bpf.h> /* enum bpf_prog_aux */
+#include <linux/errno.h>
+#include <linux/filter.h> /* BPF_PROG_RUN() */
+#include <linux/rculist.h> /* list_add_tail_rcu */
+#include <uapi/linux/landlock.h> /* struct landlock_context */
+
+#include "common.h" /* struct landlock_rule, get_index() */
+#include "hooks.h" /* landlock_hook_ctx */
+
+#include "hooks_fs.h"
+
+/* return a Landlock program context (e.g. hook_ctx->fs_walk.prog_ctx) */
+static const void *get_ctx(enum landlock_hook_type hook_type,
+		struct landlock_hook_ctx *hook_ctx)
+{
+	switch (hook_type) {
+	case LANDLOCK_HOOK_FS_WALK:
+		return landlock_get_ctx_fs_walk(hook_ctx->fs_walk);
+	case LANDLOCK_HOOK_FS_PICK:
+		return landlock_get_ctx_fs_pick(hook_ctx->fs_pick);
+	}
+	WARN_ON(1);
+	return NULL;
+}
+
+/**
+ * landlock_access_deny - run Landlock programs tied to a hook
+ *
+ * @hook_idx: hook index in the programs array
+ * @ctx: non-NULL valid eBPF context
+ * @prog_set: Landlock program set pointer
+ * @triggers: a bitmask to check if a program should be run
+ *
+ * Return true if at least one program return deny.
+ */
+static bool landlock_access_deny(enum landlock_hook_type hook_type,
+		struct landlock_hook_ctx *hook_ctx,
+		struct landlock_prog_set *prog_set, u64 triggers)
+{
+	struct landlock_prog_list *prog_list, *prev_list = NULL;
+	u32 hook_idx = get_index(hook_type);
+
+	if (!prog_set)
+		return false;
+
+	for (prog_list = prog_set->programs[hook_idx];
+			prog_list; prog_list = prog_list->prev) {
+		u32 ret;
+		const void *prog_ctx;
+
+		/* check if @prog expect at least one of this triggers */
+		if (triggers && !(triggers & prog_list->prog->aux->extra->
+					subtype.landlock_hook.triggers))
+			continue;
+		prog_ctx = get_ctx(hook_type, hook_ctx);
+		if (!prog_ctx || WARN_ON(IS_ERR(prog_ctx)))
+			return true;
+		rcu_read_lock();
+		ret = BPF_PROG_RUN(prog_list->prog, prog_ctx);
+		rcu_read_unlock();
+		/* deny access if a program returns a value different than 0 */
+		if (ret)
+			return true;
+		if (prev_list && prog_list->prev && prog_list->prev->prog->
+				aux->extra->subtype.landlock_hook.type ==
+				prev_list->prog->aux->extra->
+				subtype.landlock_hook.type)
+			WARN_ON(prog_list->prev != prev_list);
+		prev_list = prog_list;
+	}
+	return false;
+}
+
+int landlock_decide(enum landlock_hook_type hook_type,
+		struct landlock_hook_ctx *hook_ctx, u64 triggers)
+{
+	bool deny = false;
+
+#ifdef CONFIG_SECCOMP_FILTER
+	deny = landlock_access_deny(hook_type, hook_ctx,
+			current->seccomp.landlock_prog_set, triggers);
+#endif /* CONFIG_SECCOMP_FILTER */
+
+	/* should we use -EPERM or -EACCES? */
+	return deny ? -EACCES : 0;
+}
diff --git a/security/landlock/hooks.h b/security/landlock/hooks.h
new file mode 100644
index 000000000000..31446e6629fb
--- /dev/null
+++ b/security/landlock/hooks.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - hooks helpers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/sched.h> /* struct task_struct */
+#include <linux/seccomp.h>
+
+#include "hooks_fs.h"
+
+struct landlock_hook_ctx {
+	union {
+		struct landlock_hook_ctx_fs_walk *fs_walk;
+		struct landlock_hook_ctx_fs_pick *fs_pick;
+	};
+};
+
+static inline bool landlocked(const struct task_struct *task)
+{
+#ifdef CONFIG_SECCOMP_FILTER
+	return !!(task->seccomp.landlock_prog_set);
+#else
+	return false;
+#endif /* CONFIG_SECCOMP_FILTER */
+}
+
+int landlock_decide(enum landlock_hook_type, struct landlock_hook_ctx *, u64);
diff --git a/security/landlock/hooks_fs.c b/security/landlock/hooks_fs.c
new file mode 100644
index 000000000000..c3f0f60d72a7
--- /dev/null
+++ b/security/landlock/hooks_fs.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - filesystem hooks
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <linux/bpf.h> /* enum bpf_access_type */
+#include <linux/kernel.h> /* ARRAY_SIZE */
+#include <linux/lsm_hooks.h>
+#include <linux/rcupdate.h> /* synchronize_rcu() */
+#include <linux/stat.h> /* S_ISDIR */
+#include <linux/stddef.h> /* offsetof */
+#include <linux/types.h> /* uintptr_t */
+#include <linux/workqueue.h> /* INIT_WORK() */
+
+/* permissions translation */
+#include <linux/fs.h> /* MAY_* */
+#include <linux/mman.h> /* PROT_* */
+#include <linux/namei.h>
+
+/* hook arguments */
+#include <linux/dcache.h> /* struct dentry */
+#include <linux/fs.h> /* struct inode, struct iattr */
+#include <linux/mm_types.h> /* struct vm_area_struct */
+#include <linux/mount.h> /* struct vfsmount */
+#include <linux/path.h> /* struct path */
+#include <linux/sched.h> /* struct task_struct */
+#include <linux/time.h> /* struct timespec */
+
+#include "common.h"
+#include "hooks_fs.h"
+#include "hooks.h"
+
+/* fs_pick */
+
+#include <asm/page.h> /* PAGE_SIZE */
+#include <asm/syscall.h>
+#include <linux/dcache.h> /* d_path, dentry_path_raw */
+#include <linux/err.h> /* *_ERR */
+#include <linux/gfp.h> /* __get_free_page, GFP_KERNEL */
+#include <linux/path.h> /* struct path */
+
+bool landlock_is_valid_access_fs_pick(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size)
+{
+	switch (off) {
+	case offsetof(struct landlock_ctx_fs_pick, inode):
+		if (type != BPF_READ)
+			return false;
+		*reg_type = PTR_TO_INODE;
+		*max_size = sizeof(u64);
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool landlock_is_valid_access_fs_walk(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size)
+{
+	switch (off) {
+	case offsetof(struct landlock_ctx_fs_walk, inode):
+		if (type != BPF_READ)
+			return false;
+		*reg_type = PTR_TO_INODE;
+		*max_size = sizeof(u64);
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* fs_walk */
+
+struct landlock_hook_ctx_fs_walk {
+	struct landlock_ctx_fs_walk prog_ctx;
+};
+
+const struct landlock_ctx_fs_walk *landlock_get_ctx_fs_walk(
+		const struct landlock_hook_ctx_fs_walk *hook_ctx)
+{
+	if (WARN_ON(!hook_ctx))
+		return NULL;
+
+	return &hook_ctx->prog_ctx;
+}
+
+static int decide_fs_walk(int may_mask, struct inode *inode)
+{
+	struct landlock_hook_ctx_fs_walk fs_walk = {};
+	struct landlock_hook_ctx hook_ctx = {
+		.fs_walk = &fs_walk,
+	};
+	const enum landlock_hook_type hook_type = LANDLOCK_HOOK_FS_WALK;
+
+	if (!current_has_prog_type(hook_type))
+		/* no fs_walk */
+		return 0;
+	if (WARN_ON(!inode))
+		return -EFAULT;
+
+	/* init common data: inode, is_dot, is_dotdot, is_root */
+	fs_walk.prog_ctx.inode = (uintptr_t)inode;
+	return landlock_decide(hook_type, &hook_ctx, 0);
+}
+
+/* fs_pick */
+
+struct landlock_hook_ctx_fs_pick {
+	__u64 triggers;
+	struct landlock_ctx_fs_pick prog_ctx;
+};
+
+const struct landlock_ctx_fs_pick *landlock_get_ctx_fs_pick(
+		const struct landlock_hook_ctx_fs_pick *hook_ctx)
+{
+	if (WARN_ON(!hook_ctx))
+		return NULL;
+
+	return &hook_ctx->prog_ctx;
+}
+
+static int decide_fs_pick(__u64 triggers, struct inode *inode)
+{
+	struct landlock_hook_ctx_fs_pick fs_pick = {};
+	struct landlock_hook_ctx hook_ctx = {
+		.fs_pick = &fs_pick,
+	};
+	const enum landlock_hook_type hook_type = LANDLOCK_HOOK_FS_PICK;
+
+	if (WARN_ON(!triggers))
+		return 0;
+	if (!current_has_prog_type(hook_type))
+		/* no fs_pick */
+		return 0;
+	if (WARN_ON(!inode))
+		return -EFAULT;
+
+	fs_pick.triggers = triggers,
+	/* init common data: inode */
+	fs_pick.prog_ctx.inode = (uintptr_t)inode;
+	return landlock_decide(hook_type, &hook_ctx, fs_pick.triggers);
+}
+
+/* helpers */
+
+static u64 fs_may_to_triggers(int may_mask, umode_t mode)
+{
+	u64 ret = 0;
+
+	if (may_mask & MAY_EXEC)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_EXECUTE;
+	if (may_mask & MAY_READ) {
+		if (S_ISDIR(mode))
+			ret |= LANDLOCK_TRIGGER_FS_PICK_READDIR;
+		else
+			ret |= LANDLOCK_TRIGGER_FS_PICK_READ;
+	}
+	if (may_mask & MAY_WRITE)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_WRITE;
+	if (may_mask & MAY_APPEND)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_APPEND;
+	if (may_mask & MAY_OPEN)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_OPEN;
+	if (may_mask & MAY_CHROOT)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_CHROOT;
+	else if (may_mask & MAY_CHDIR)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_CHDIR;
+	/* XXX: ignore MAY_ACCESS */
+	WARN_ON(!ret);
+	return ret;
+}
+
+static inline u64 mem_prot_to_triggers(unsigned long prot, bool private)
+{
+	u64 ret = LANDLOCK_TRIGGER_FS_PICK_MAP;
+
+	/* private mapping do not write to files */
+	if (!private && (prot & PROT_WRITE))
+		ret |= LANDLOCK_TRIGGER_FS_PICK_WRITE;
+	if (prot & PROT_READ)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_READ;
+	if (prot & PROT_EXEC)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_EXECUTE;
+	WARN_ON(!ret);
+	return ret;
+}
+
+/* binder hooks */
+
+static int hook_binder_transfer_file(struct task_struct *from,
+		struct task_struct *to, struct file *file)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_TRANSFER,
+			file_inode(file));
+}
+
+/* sb hooks */
+
+static int hook_sb_statfs(struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			dentry->d_inode);
+}
+
+/* TODO: handle mount source and remount */
+static int hook_sb_mount(const char *dev_name, const struct path *path,
+		const char *type, unsigned long flags, void *data)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!path))
+		return 0;
+	if (WARN_ON(!path->dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_MOUNTON,
+			path->dentry->d_inode);
+}
+
+/*
+ * The @old_path is similar to a destination mount point.
+ */
+static int hook_sb_pivotroot(const struct path *old_path,
+		const struct path *new_path)
+{
+	int err;
+
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!old_path))
+		return 0;
+	if (WARN_ON(!old_path->dentry))
+		return 0;
+	err = decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_MOUNTON,
+			old_path->dentry->d_inode);
+	if (err)
+		return err;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CHROOT,
+			new_path->dentry->d_inode);
+}
+
+/* inode hooks */
+
+/* a directory inode contains only one dentry */
+static int hook_inode_create(struct inode *dir, struct dentry *dentry,
+		umode_t mode)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_link(struct dentry *old_dentry, struct inode *dir,
+		struct dentry *new_dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!old_dentry)) {
+		int ret = decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_LINK,
+				old_dentry->d_inode);
+		if (ret)
+			return ret;
+	}
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_LINKTO, dir);
+}
+
+static int hook_inode_unlink(struct inode *dir, struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_UNLINK,
+			dentry->d_inode);
+}
+
+static int hook_inode_symlink(struct inode *dir, struct dentry *dentry,
+		const char *old_name)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_mkdir(struct inode *dir, struct dentry *dentry,
+		umode_t mode)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RMDIR, dentry->d_inode);
+}
+
+static int hook_inode_mknod(struct inode *dir, struct dentry *dentry,
+		umode_t mode, dev_t dev)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	/* TODO: add artificial walk session from old_dir to old_dentry */
+	if (!WARN_ON(!old_dentry)) {
+		int ret = decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RENAME,
+				old_dentry->d_inode);
+		if (ret)
+			return ret;
+	}
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RENAMETO, new_dir);
+}
+
+static int hook_inode_readlink(struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_READ, dentry->d_inode);
+}
+
+/*
+ * ignore the inode_follow_link hook (could set is_symlink in the fs_walk
+ * context)
+ */
+
+static int hook_inode_permission(struct inode *inode, int mask)
+{
+	u64 triggers;
+
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!inode))
+		return 0;
+
+	triggers = fs_may_to_triggers(mask, inode->i_mode);
+	/*
+	 * decide_fs_walk() is exclusive with decide_fs_pick(): in a path walk,
+	 * ignore execute-only access on directory for any fs_pick program
+	 */
+	if (triggers == LANDLOCK_TRIGGER_FS_PICK_EXECUTE &&
+			S_ISDIR(inode->i_mode))
+		return decide_fs_walk(mask, inode);
+
+	return decide_fs_pick(triggers, inode);
+}
+
+static int hook_inode_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_getattr(const struct path *path)
+{
+	/* TODO: link parent inode and path */
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!path))
+		return 0;
+	if (WARN_ON(!path->dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			path->dentry->d_inode);
+}
+
+static int hook_inode_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_getxattr(struct dentry *dentry, const char *name)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_listxattr(struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_removexattr(struct dentry *dentry, const char *name)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_getsecurity(struct inode *inode, const char *name,
+		void **buffer, bool alloc)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR, inode);
+}
+
+static int hook_inode_setsecurity(struct inode *inode, const char *name,
+		const void *value, size_t size, int flag)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR, inode);
+}
+
+static int hook_inode_listsecurity(struct inode *inode, char *buffer,
+		size_t buffer_size)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR, inode);
+}
+
+/* file hooks */
+
+static int hook_file_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_IOCTL,
+			file_inode(file));
+}
+
+static int hook_file_lock(struct file *file, unsigned int cmd)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_LOCK, file_inode(file));
+}
+
+static int hook_file_fcntl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_FCNTL,
+			file_inode(file));
+}
+
+static int hook_mmap_file(struct file *file, unsigned long reqprot,
+		unsigned long prot, unsigned long flags)
+{
+	if (!landlocked(current))
+		return 0;
+	/* file can be null for anonymous mmap */
+	if (!file)
+		return 0;
+	return decide_fs_pick(mem_prot_to_triggers(prot, flags & MAP_PRIVATE),
+			file_inode(file));
+}
+
+static int hook_file_mprotect(struct vm_area_struct *vma,
+		unsigned long reqprot, unsigned long prot)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!vma))
+		return 0;
+	if (!vma->vm_file)
+		return 0;
+	return decide_fs_pick(mem_prot_to_triggers(prot,
+				!(vma->vm_flags & VM_SHARED)),
+			file_inode(vma->vm_file));
+}
+
+static int hook_file_receive(struct file *file)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RECEIVE,
+			file_inode(file));
+}
+
+static struct security_hook_list landlock_hooks[] = {
+	LSM_HOOK_INIT(binder_transfer_file, hook_binder_transfer_file),
+
+	LSM_HOOK_INIT(sb_statfs, hook_sb_statfs),
+	LSM_HOOK_INIT(sb_mount, hook_sb_mount),
+	LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
+
+	LSM_HOOK_INIT(inode_create, hook_inode_create),
+	LSM_HOOK_INIT(inode_link, hook_inode_link),
+	LSM_HOOK_INIT(inode_unlink, hook_inode_unlink),
+	LSM_HOOK_INIT(inode_symlink, hook_inode_symlink),
+	LSM_HOOK_INIT(inode_mkdir, hook_inode_mkdir),
+	LSM_HOOK_INIT(inode_rmdir, hook_inode_rmdir),
+	LSM_HOOK_INIT(inode_mknod, hook_inode_mknod),
+	LSM_HOOK_INIT(inode_rename, hook_inode_rename),
+	LSM_HOOK_INIT(inode_readlink, hook_inode_readlink),
+	LSM_HOOK_INIT(inode_permission, hook_inode_permission),
+	LSM_HOOK_INIT(inode_setattr, hook_inode_setattr),
+	LSM_HOOK_INIT(inode_getattr, hook_inode_getattr),
+	LSM_HOOK_INIT(inode_setxattr, hook_inode_setxattr),
+	LSM_HOOK_INIT(inode_getxattr, hook_inode_getxattr),
+	LSM_HOOK_INIT(inode_listxattr, hook_inode_listxattr),
+	LSM_HOOK_INIT(inode_removexattr, hook_inode_removexattr),
+	LSM_HOOK_INIT(inode_getsecurity, hook_inode_getsecurity),
+	LSM_HOOK_INIT(inode_setsecurity, hook_inode_setsecurity),
+	LSM_HOOK_INIT(inode_listsecurity, hook_inode_listsecurity),
+
+	/* do not handle file_permission for now */
+	LSM_HOOK_INIT(file_ioctl, hook_file_ioctl),
+	LSM_HOOK_INIT(file_lock, hook_file_lock),
+	LSM_HOOK_INIT(file_fcntl, hook_file_fcntl),
+	LSM_HOOK_INIT(mmap_file, hook_mmap_file),
+	LSM_HOOK_INIT(file_mprotect, hook_file_mprotect),
+	LSM_HOOK_INIT(file_receive, hook_file_receive),
+	/* file_open is not handled, use inode_permission instead */
+};
+
+__init void landlock_add_hooks_fs(void)
+{
+	security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+			LANDLOCK_NAME);
+}
diff --git a/security/landlock/hooks_fs.h b/security/landlock/hooks_fs.h
new file mode 100644
index 000000000000..eeae4dcd842f
--- /dev/null
+++ b/security/landlock/hooks_fs.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - filesystem hooks
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <linux/bpf.h> /* enum bpf_access_type */
+
+__init void landlock_add_hooks_fs(void);
+
+/* fs_pick */
+
+struct landlock_hook_ctx_fs_pick;
+
+bool landlock_is_valid_access_fs_pick(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size);
+
+const struct landlock_ctx_fs_pick *landlock_get_ctx_fs_pick(
+		const struct landlock_hook_ctx_fs_pick *hook_ctx);
+
+/* fs_walk */
+
+struct landlock_hook_ctx_fs_walk;
+
+bool landlock_is_valid_access_fs_walk(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size);
+
+const struct landlock_ctx_fs_walk *landlock_get_ctx_fs_walk(
+		const struct landlock_hook_ctx_fs_walk *hook_ctx);
diff --git a/security/landlock/init.c b/security/landlock/init.c
index 03073cd0fc4e..68def2a7af71 100644
--- a/security/landlock/init.c
+++ b/security/landlock/init.c
@@ -9,8 +9,10 @@
 #include <linux/bpf.h> /* enum bpf_access_type */
 #include <linux/capability.h> /* capable */
 #include <linux/filter.h> /* struct bpf_prog */
+#include <linux/lsm_hooks.h>
 
 #include "common.h" /* LANDLOCK_* */
+#include "hooks_fs.h"
 
 static bool bpf_landlock_is_valid_access(int off, int size,
 		enum bpf_access_type type, const struct bpf_prog *prog,
@@ -29,6 +31,23 @@ static bool bpf_landlock_is_valid_access(int off, int size,
 	if (size <= 0 || size > sizeof(__u64))
 		return false;
 
+	/* set register type and max size */
+	switch (prog_subtype->landlock_hook.type) {
+	case LANDLOCK_HOOK_FS_PICK:
+		if (!landlock_is_valid_access_fs_pick(off, type, &reg_type,
+					&max_size))
+			return false;
+		break;
+	case LANDLOCK_HOOK_FS_WALK:
+		if (!landlock_is_valid_access_fs_walk(off, type, &reg_type,
+					&max_size))
+			return false;
+		break;
+	default:
+		WARN_ON(1);
+		return false;
+	}
+
 	/* check memory range access */
 	switch (reg_type) {
 	case NOT_INIT:
@@ -98,6 +117,18 @@ static const struct bpf_func_proto *bpf_landlock_func_proto(
 	default:
 		break;
 	}
+
+	switch (hook_type) {
+	case LANDLOCK_HOOK_FS_WALK:
+	case LANDLOCK_HOOK_FS_PICK:
+		switch (func_id) {
+		case BPF_FUNC_inode_map_lookup:
+			return &bpf_inode_map_lookup_proto;
+		default:
+			break;
+		}
+		break;
+	}
 	return NULL;
 }
 
@@ -108,3 +139,19 @@ const struct bpf_verifier_ops landlock_verifier_ops = {
 };
 
 const struct bpf_prog_ops landlock_prog_ops = {};
+
+static int __init landlock_init(void)
+{
+	pr_info(LANDLOCK_NAME ": Initializing (sandbox with seccomp)\n");
+	landlock_add_hooks_fs();
+	return 0;
+}
+
+struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = {};
+
+DEFINE_LSM(LANDLOCK_NAME) = {
+	.name = LANDLOCK_NAME,
+	.order = LSM_ORDER_LAST,
+	.blobs = &landlock_blob_sizes,
+	.init = landlock_init,
+};
diff --git a/security/security.c b/security/security.c
index f493db0bf62a..05a23995407d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -263,6 +263,21 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 		}
 	}
 
+	/*
+	 * In case of an unprivileged access-control, we don't want to give the
+	 * ability to any process to do some checks (e.g. through an eBPF
+	 * program) on kernel objects (e.g. files) if a privileged security
+	 * policy forbid their access.  We must then load
+	 * potentially-unprivileged security modules after all other LSMs.
+	 *
+	 * LSM_ORDER_LAST is always last and does not appear in the modifiable
+	 * ordered list of enabled LSMs.
+	 */
+	for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
+		if (lsm->order == LSM_ORDER_LAST)
+			append_ordered_lsm(lsm, "last");
+	}
+
 	/* Disable all LSMs not in the ordered list. */
 	for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
 		if (exists_ordered_lsm(lsm))
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 05/10] bpf,landlock: Add a new map type: inode
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

This new map store arbitrary 64-bits values referenced by inode keys.
The map can be updated from user space with file descriptor pointing to
inodes tied to a file system.  From an eBPF (Landlock) program point of
view, such a map is read-only and can only be used to retrieved a
64-bits value tied to a given inode.  This is useful to recognize an
inode tagged by user space, without access right to this inode (i.e. no
need to have a write access to this inode).

Add dedicated BPF functions to handle this type of map:
* bpf_inode_map_update_elem()
* bpf_inode_map_lookup_elem()
* bpf_inode_map_delete_elem()

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: Jann Horn <jann@thejh.net>
---

Changes since v8:
* remove prog chaining and object tagging to ease review
* use bpf_map_init_from_attr()

Changes since v7:
* new design with a dedicated map and a BPF function to tie a value to
  an inode
* add the ability to set or get a tag on an inode from a Landlock
  program

Changes since v6:
* remove WARN_ON() for missing dentry->d_inode
* refactor bpf_landlock_func_proto() (suggested by Kees Cook)

Changes since v5:
* cosmetic fixes and rebase

Changes since v4:
* use a file abstraction (handle) to wrap inode, dentry, path and file
  structs
* remove bpf_landlock_cmp_fs_beneath()
* rename the BPF helper and move it to kernel/bpf/
* tighten helpers accessible by a Landlock rule

Changes since v3:
* remove bpf_landlock_cmp_fs_prop() (suggested by Alexei Starovoitov)
* add hooks dealing with struct inode and struct path pointers:
  inode_permission and inode_getattr
* add abstraction over eBPF helper arguments thanks to wrapping structs
* add bpf_landlock_get_fs_mode() helper to check file type and mode
* merge WARN_ON() (suggested by Kees Cook)
* fix and update bpf_helpers.h
* use BPF_CALL_* for eBPF helpers (suggested by Alexei Starovoitov)
* make handle arraymap safe (RCU) and remove buggy synchronize_rcu()
* factor out the arraymay walk
* use size_t to index array (suggested by Jann Horn)

Changes since v2:
* add MNT_INTERNAL check to only add file handle from user-visible FS
  (e.g. no anonymous inode)
* replace struct file* with struct path* in map_landlock_handle
* add BPF protos
* fix bpf_landlock_cmp_fs_prop_with_struct_file()
---
 include/linux/bpf.h            |   9 +
 include/linux/bpf_types.h      |   3 +
 include/uapi/linux/bpf.h       |  12 +-
 kernel/bpf/Makefile            |   3 +
 kernel/bpf/core.c              |   2 +
 kernel/bpf/inodemap.c          | 315 +++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  27 ++-
 kernel/bpf/verifier.c          |  14 ++
 tools/include/uapi/linux/bpf.h |  12 +-
 tools/lib/bpf/libbpf_probes.c  |   1 +
 10 files changed, 395 insertions(+), 3 deletions(-)
 create mode 100644 kernel/bpf/inodemap.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index da167d3afecc..cc72ec18f0f6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -208,6 +208,8 @@ enum bpf_arg_type {
 	ARG_PTR_TO_INT,		/* pointer to int */
 	ARG_PTR_TO_LONG,	/* pointer to long */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
+
+	ARG_PTR_TO_INODE,	/* pointer to a struct inode */
 };
 
 /* type of values returned from helper functions */
@@ -278,6 +280,7 @@ enum bpf_reg_type {
 	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
+	PTR_TO_INODE,		 /* reg points to struct inode */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -485,6 +488,7 @@ struct bpf_event_entry {
 	struct rcu_head rcu;
 };
 
+
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
 int bpf_prog_calc_tag(struct bpf_prog *fp);
 
@@ -689,6 +693,10 @@ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags);
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+int bpf_inode_map_update_elem(struct bpf_map *map, int *key, u64 *value,
+			      u64 flags);
+int bpf_inode_map_lookup_elem(struct bpf_map *map, int *key, u64 *value);
+int bpf_inode_map_delete_elem(struct bpf_map *map, int *key);
 
 int bpf_get_file_flag(int flags);
 int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
@@ -1059,6 +1067,7 @@ extern const struct bpf_func_proto bpf_spin_unlock_proto;
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
 extern const struct bpf_func_proto bpf_strtol_proto;
 extern const struct bpf_func_proto bpf_strtoul_proto;
+extern const struct bpf_func_proto bpf_inode_map_lookup_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index dee8b82e31b1..9e385473b57a 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -79,3 +79,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
+#ifdef CONFIG_SECURITY_LANDLOCK
+BPF_MAP_TYPE(BPF_MAP_TYPE_INODE, inode_ops)
+#endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 50145d448bc3..08ff720835ba 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -134,6 +134,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
+	BPF_MAP_TYPE_INODE,
 };
 
 /* Note that tracing related programs such as
@@ -2716,6 +2717,14 @@ union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * u64 bpf_inode_map_lookup(map, key)
+ * 	Description
+ * 		Perform a lookup in *map* for an entry associated to an inode
+ * 		*key*.
+ * 	Return
+ * 		Map value associated to *key*, or **NULL** if no entry was
+ * 		found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2827,7 +2836,8 @@ union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(inode_map_lookup),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 29d781061cd5..e6fe613b3105 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o
 ifeq ($(CONFIG_INET),y)
 obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
 endif
+ifeq ($(CONFIG_SECURITY_LANDLOCK),y)
+obj-$(CONFIG_BPF_SYSCALL) += inodemap.o
+endif
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8ad392e52328..3cf5d16a8496 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2032,6 +2032,8 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 
+const struct bpf_func_proto bpf_inode_map_update_proto __weak;
+
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
 	return NULL;
diff --git a/kernel/bpf/inodemap.c b/kernel/bpf/inodemap.c
new file mode 100644
index 000000000000..fcad0de51557
--- /dev/null
+++ b/kernel/bpf/inodemap.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * inode map for Landlock
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019 ANSSI
+ */
+
+#include <asm/resource.h> /* RLIMIT_NOFILE */
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/file.h> /* fput() */
+#include <linux/filter.h> /* BPF_CALL_2() */
+#include <linux/fs.h> /* struct file */
+#include <linux/mm.h>
+#include <linux/mount.h> /* MNT_INTERNAL */
+#include <linux/path.h> /* struct path */
+#include <linux/sched/signal.h> /* rlimit() */
+#include <linux/security.h>
+#include <linux/slab.h>
+
+struct inode_elem {
+	struct inode *inode;
+	u64 value;
+};
+
+struct inode_array {
+	struct bpf_map map;
+	size_t nb_entries;
+	struct inode_elem elems[0];
+};
+
+/* must call iput(inode) after this call */
+static struct inode *inode_from_fd(int ufd, bool check_access)
+{
+	struct inode *ret;
+	struct fd f;
+	int deny;
+
+	f = fdget(ufd);
+	if (unlikely(!f.file || !file_inode(f.file))) {
+		ret = ERR_PTR(-EBADF);
+		goto put_fd;
+	}
+	/* TODO: add this check when called from an eBPF program too (already
+	 * checked by the LSM parent hooks anyway) */
+	if (unlikely(IS_PRIVATE(file_inode(f.file)))) {
+		ret = ERR_PTR(-EINVAL);
+		goto put_fd;
+	}
+	/* check if the FD is tied to a mount point */
+	/* TODO: add this check when called from an eBPF program too */
+	if (unlikely(!f.file->f_path.mnt || f.file->f_path.mnt->mnt_flags &
+				MNT_INTERNAL)) {
+		ret = ERR_PTR(-EINVAL);
+		goto put_fd;
+	}
+	if (check_access) {
+		/*
+		 * need to be allowed to access attributes from this file to
+		 * then be able to compare an inode to this entry
+		 */
+		deny = security_inode_getattr(&f.file->f_path);
+		if (deny) {
+			ret = ERR_PTR(deny);
+			goto put_fd;
+		}
+	}
+	ret = file_inode(f.file);
+	ihold(ret);
+
+put_fd:
+	fdput(f);
+	return ret;
+}
+
+/* (never) called from eBPF program */
+static int fake_map_delete_elem(struct bpf_map *map, void *key)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+/* called from syscall */
+static int sys_inode_map_delete_elem(struct bpf_map *map, struct inode *key)
+{
+	struct inode_array *array = container_of(map, struct inode_array, map);
+	struct inode *inode;
+	int i;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	for (i = 0; i < array->map.max_entries; i++) {
+		if (array->elems[i].inode == key) {
+			inode = xchg(&array->elems[i].inode, NULL);
+			array->nb_entries--;
+			iput(inode);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+/* called from syscall */
+int bpf_inode_map_delete_elem(struct bpf_map *map, int *key)
+{
+	struct inode *inode;
+	int err;
+
+	inode = inode_from_fd(*key, false);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	err = sys_inode_map_delete_elem(map, inode);
+	iput(inode);
+	return err;
+}
+
+static void inode_map_free(struct bpf_map *map)
+{
+	struct inode_array *array = container_of(map, struct inode_array, map);
+	int i;
+
+	synchronize_rcu();
+	for (i = 0; i < array->map.max_entries; i++)
+		iput(array->elems[i].inode);
+	bpf_map_area_free(array);
+}
+
+static struct bpf_map *inode_map_alloc(union bpf_attr *attr)
+{
+	int numa_node = bpf_map_attr_numa_node(attr);
+	struct inode_array *array;
+	u64 array_size;
+
+	/* only allow root to create this type of map (for now), should be
+	 * removed when Landlock will be usable by unprivileged users */
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	/* the key is a file descriptor and the value must be 64-bits (for
+	 * now) */
+	if (attr->max_entries == 0 || attr->key_size != sizeof(u32) ||
+	    attr->value_size != FIELD_SIZEOF(struct inode_elem, value) ||
+	    attr->map_flags & ~(BPF_F_RDONLY | BPF_F_WRONLY) ||
+	    numa_node != NUMA_NO_NODE)
+		return ERR_PTR(-EINVAL);
+
+	if (attr->value_size > KMALLOC_MAX_SIZE)
+		/* if value_size is bigger, the user space won't be able to
+		 * access the elements.
+		 */
+		return ERR_PTR(-E2BIG);
+
+	/*
+	 * Limit number of entries in an inode map to the maximum number of
+	 * open files for the current process. The maximum number of file
+	 * references (including all inode maps) for a process is then
+	 * (RLIMIT_NOFILE - 1) * RLIMIT_NOFILE. If the process' RLIMIT_NOFILE
+	 * is 0, then any entry update is forbidden.
+	 *
+	 * An eBPF program can inherit all the inode map FD. The worse case is
+	 * to fill a bunch of arraymaps, create an eBPF program, close the
+	 * inode map FDs, and start again. The maximum number of inode map
+	 * entries can then be close to RLIMIT_NOFILE^3.
+	 */
+	if (attr->max_entries > rlimit(RLIMIT_NOFILE))
+		return ERR_PTR(-EMFILE);
+
+	array_size = sizeof(*array);
+	array_size += (u64) attr->max_entries * sizeof(struct inode_elem);
+
+	/* make sure there is no u32 overflow later in round_up() */
+	if (array_size >= U32_MAX - PAGE_SIZE)
+		return ERR_PTR(-ENOMEM);
+
+	/* allocate all map elements and zero-initialize them */
+	array = bpf_map_area_alloc(array_size, numa_node);
+	if (!array)
+		return ERR_PTR(-ENOMEM);
+
+	/* copy mandatory map attributes */
+	bpf_map_init_from_attr(&array->map, attr);
+	array->map.memory.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
+
+	return &array->map;
+}
+
+/* (never) called from eBPF program */
+static void *fake_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	WARN_ON(1);
+	return ERR_PTR(-EINVAL);
+}
+
+/* called from syscall (wrapped) and eBPF program */
+static u64 inode_map_lookup_elem(struct bpf_map *map, struct inode *key)
+{
+	struct inode_array *array = container_of(map, struct inode_array, map);
+	size_t i;
+	u64 ret = 0;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	/* TODO: use rbtree to switch to O(log n) */
+	for (i = 0; i < array->map.max_entries; i++) {
+		if (array->elems[i].inode == key) {
+			ret = array->elems[i].value;
+			break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * The key is a FD when called from a syscall, but an inode pointer when called
+ * from an eBPF program.
+ */
+
+/* called from syscall */
+int bpf_inode_map_lookup_elem(struct bpf_map *map, int *key, u64 *value)
+{
+	struct inode *inode;
+
+	inode = inode_from_fd(*key, false);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	*value = inode_map_lookup_elem(map, inode);
+	iput(inode);
+	if (!value)
+		return -ENOENT;
+	return 0;
+}
+
+/* (never) called from eBPF program */
+static int fake_map_update_elem(struct bpf_map *map, void *key, void *value,
+				u64 flags)
+{
+	WARN_ON(1);
+	/* do not leak an inode accessed by a Landlock program */
+	return -EINVAL;
+}
+
+/* called from syscall */
+static int sys_inode_map_update_elem(struct bpf_map *map, struct inode *key,
+		u64 *value, u64 flags)
+{
+	struct inode_array *array = container_of(map, struct inode_array, map);
+	size_t i;
+
+	if (unlikely(flags != BPF_ANY))
+		return -EINVAL;
+
+	if (unlikely(array->nb_entries >= array->map.max_entries))
+		/* all elements were pre-allocated, cannot insert a new one */
+		return -E2BIG;
+
+	for (i = 0; i < array->map.max_entries; i++) {
+		if (!array->elems[i].inode) {
+			/* the inode (key) is already grabbed by the caller */
+			ihold(key);
+			array->elems[i].inode = key;
+			array->elems[i].value = *value;
+			array->nb_entries++;
+			return 0;
+		}
+	}
+	WARN_ON(1);
+	return -ENOENT;
+}
+
+/* called from syscall */
+int bpf_inode_map_update_elem(struct bpf_map *map, int *key, u64 *value,
+			      u64 flags)
+{
+	struct inode *inode;
+	int err;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	inode = inode_from_fd(*key, true);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	err = sys_inode_map_update_elem(map, inode, value, flags);
+	iput(inode);
+	return err;
+}
+
+/* called from syscall or (never) from eBPF program */
+static int fake_map_get_next_key(struct bpf_map *map, void *key,
+				 void *next_key)
+{
+	/* do not leak a file descriptor */
+	return -EINVAL;
+}
+
+/* void map for eBPF program */
+const struct bpf_map_ops inode_ops = {
+	.map_alloc = inode_map_alloc,
+	.map_free = inode_map_free,
+	.map_get_next_key = fake_map_get_next_key,
+	.map_lookup_elem = fake_map_lookup_elem,
+	.map_delete_elem = fake_map_delete_elem,
+	.map_update_elem = fake_map_update_elem,
+};
+
+BPF_CALL_2(bpf_inode_map_lookup, struct bpf_map *, map, void *, key)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return inode_map_lookup_elem(map, key);
+}
+
+const struct bpf_func_proto bpf_inode_map_lookup_proto = {
+	.func		= bpf_inode_map_lookup,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_INODE,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7dd3376904d4..ba2a09a7f813 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -720,6 +720,22 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 	return NULL;
 }
 
+int __weak bpf_inode_map_update_elem(struct bpf_map *map, int *key,
+				     u64 *value, u64 flags)
+{
+	return -ENOTSUPP;
+}
+
+int __weak bpf_inode_map_lookup_elem(struct bpf_map *map, int *key, u64 *value)
+{
+	return -ENOTSUPP;
+}
+
+int __weak bpf_inode_map_delete_elem(struct bpf_map *map, int *key)
+{
+	return -ENOTSUPP;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
 
@@ -801,6 +817,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 		   map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_peek_elem(map, value);
+	} else if (map->map_type == BPF_MAP_TYPE_INODE) {
+		err = bpf_inode_map_lookup_elem(map, key, value);
 	} else {
 		rcu_read_lock();
 		if (map->ops->map_lookup_elem_sys_only)
@@ -951,6 +969,10 @@ static int map_update_elem(union bpf_attr *attr)
 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 		   map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_push_elem(map, value, attr->flags);
+	} else if (map->map_type == BPF_MAP_TYPE_INODE) {
+		rcu_read_lock();
+		err = bpf_inode_map_update_elem(map, key, value, attr->flags);
+		rcu_read_unlock();
 	} else {
 		rcu_read_lock();
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -1006,7 +1028,10 @@ static int map_delete_elem(union bpf_attr *attr)
 	preempt_disable();
 	__this_cpu_inc(bpf_prog_active);
 	rcu_read_lock();
-	err = map->ops->map_delete_elem(map, key);
+	if (map->map_type == BPF_MAP_TYPE_INODE)
+		err = bpf_inode_map_delete_elem(map, key);
+	else
+		err = map->ops->map_delete_elem(map, key);
 	rcu_read_unlock();
 	__this_cpu_dec(bpf_prog_active);
 	preempt_enable();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 930260683d0a..ce3cd7fd8882 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -400,6 +400,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 	[PTR_TO_TP_BUFFER]	= "tp_buffer",
 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
+	[PTR_TO_INODE]		= "inode",
 };
 
 static char slot_type_char[] = {
@@ -1801,6 +1802,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_TCP_SOCK:
 	case PTR_TO_TCP_SOCK_OR_NULL:
 	case PTR_TO_XDP_SOCK:
+	case PTR_TO_INODE:
 		return true;
 	default:
 		return false;
@@ -3254,6 +3256,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			verbose(env, "verifier internal error\n");
 			return -EFAULT;
 		}
+	} else if (arg_type == ARG_PTR_TO_INODE) {
+		expected_type = PTR_TO_INODE;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
@@ -3462,6 +3468,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_sk_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_INODE:
+		if (func_id != BPF_FUNC_inode_map_lookup)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -3530,6 +3540,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_inode_map_lookup:
+		if (map->map_type != BPF_MAP_TYPE_INODE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 50145d448bc3..08ff720835ba 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -134,6 +134,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
+	BPF_MAP_TYPE_INODE,
 };
 
 /* Note that tracing related programs such as
@@ -2716,6 +2717,14 @@ union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * u64 bpf_inode_map_lookup(map, key)
+ * 	Description
+ * 		Perform a lookup in *map* for an entry associated to an inode
+ * 		*key*.
+ * 	Return
+ * 		Map value associated to *key*, or **NULL** if no entry was
+ * 		found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2827,7 +2836,8 @@ union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(inode_map_lookup),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index f4f34cb8869a..000319a95bfb 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -249,6 +249,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_XSKMAP:
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+	case BPF_MAP_TYPE_INODE:
 	default:
 		break;
 	}
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 04/10] seccomp,landlock: Enforce Landlock programs per process hierarchy
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

The seccomp(2) syscall can be used by a task to apply a Landlock program
to itself. As a seccomp filter, a Landlock program is enforced for the
current task and all its future children. A program is immutable and a
task can only add new restricting programs to itself, forming a list of
programss.

A Landlock program is tied to a Landlock hook. If the action on a kernel
object is allowed by the other Linux security mechanisms (e.g. DAC,
capabilities, other LSM), then a Landlock hook related to this kind of
object is triggered. The list of programs for this hook is then
evaluated. Each program return a 32-bit value which can deny the action
on a kernel object with a non-zero value. If every programs of the list
return zero, then the action on the object is allowed.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: Will Drewry <wad@chromium.org>
Link: https://lkml.kernel.org/r/c10a503d-5e35-7785-2f3d-25ed8dd63fab@digikod.net
---

Changes since v8:
* Remove the chaining concept from the eBPF program contexts (chain and
  cookie). We need to keep these subtypes this way to be able to make
  them evolve, though.

Changes since v7:
* handle and verify program chains
* split and rename providers.c to enforce.c and enforce_seccomp.c
* rename LANDLOCK_SUBTYPE_* to LANDLOCK_*

Changes since v6:
* rename some functions with more accurate names to reflect that an eBPF
  program for Landlock could be used for something else than a rule
* reword rule "appending" to "prepending" and explain it
* remove the superfluous no_new_privs check, only check global
  CAP_SYS_ADMIN when prepending a Landlock rule (needed for containers)
* create and use {get,put}_seccomp_landlock() (suggested by Kees Cook)
* replace ifdef with static inlined function (suggested by Kees Cook)
* use get_user() (suggested by Kees Cook)
* replace atomic_t with refcount_t (requested by Kees Cook)
* move struct landlock_{rule,events} from landlock.h to common.h
* cleanup headers

Changes since v5:
* remove struct landlock_node and use a similar inheritance mechanisme
  as seccomp-bpf (requested by Andy Lutomirski)
* rename SECCOMP_ADD_LANDLOCK_RULE to SECCOMP_APPEND_LANDLOCK_RULE
* rename file manager.c to providers.c
* add comments
* typo and cosmetic fixes

Changes since v4:
* merge manager and seccomp patches
* return -EFAULT in seccomp(2) when user_bpf_fd is null to easely check
  if Landlock is supported
* only allow a process with the global CAP_SYS_ADMIN to use Landlock
  (will be lifted in the future)
* add an early check to exit as soon as possible if the current process
  does not have Landlock rules

Changes since v3:
* remove the hard link with seccomp (suggested by Andy Lutomirski and
  Kees Cook):
  * remove the cookie which could imply multiple evaluation of Landlock
    rules
  * remove the origin field in struct landlock_data
* remove documentation fix (merged upstream)
* rename the new seccomp command to SECCOMP_ADD_LANDLOCK_RULE
* internal renaming
* split commit
* new design to be able to inherit on the fly the parent rules

Changes since v2:
* Landlock programs can now be run without seccomp filter but for any
  syscall (from the process) or interruption
* move Landlock related functions and structs into security/landlock/*
  (to manage cgroups as well)
* fix seccomp filter handling: run Landlock programs for each of their
  legitimate seccomp filter
* properly clean up all seccomp results
* cosmetic changes to ease the understanding
* fix some ifdef
---
 include/linux/landlock.h            |  34 ++++
 include/linux/seccomp.h             |   5 +
 include/uapi/linux/seccomp.h        |   1 +
 kernel/fork.c                       |   8 +-
 kernel/seccomp.c                    |   4 +
 security/landlock/Makefile          |   3 +-
 security/landlock/common.h          |  45 +++++
 security/landlock/enforce.c         | 272 ++++++++++++++++++++++++++++
 security/landlock/enforce.h         |  18 ++
 security/landlock/enforce_seccomp.c |  92 ++++++++++
 10 files changed, 480 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/landlock.h
 create mode 100644 security/landlock/enforce.c
 create mode 100644 security/landlock/enforce.h
 create mode 100644 security/landlock/enforce_seccomp.c

diff --git a/include/linux/landlock.h b/include/linux/landlock.h
new file mode 100644
index 000000000000..8ac7942f50fc
--- /dev/null
+++ b/include/linux/landlock.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock LSM - public kernel headers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#ifndef _LINUX_LANDLOCK_H
+#define _LINUX_LANDLOCK_H
+
+#include <linux/errno.h>
+#include <linux/sched.h> /* task_struct */
+
+#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_SECURITY_LANDLOCK)
+extern int landlock_seccomp_prepend_prog(unsigned int flags,
+		const int __user *user_bpf_fd);
+extern void put_seccomp_landlock(struct task_struct *tsk);
+extern void get_seccomp_landlock(struct task_struct *tsk);
+#else /* CONFIG_SECCOMP_FILTER && CONFIG_SECURITY_LANDLOCK */
+static inline int landlock_seccomp_prepend_prog(unsigned int flags,
+		const int __user *user_bpf_fd)
+{
+		return -EINVAL;
+}
+static inline void put_seccomp_landlock(struct task_struct *tsk)
+{
+}
+static inline void get_seccomp_landlock(struct task_struct *tsk)
+{
+}
+#endif /* CONFIG_SECCOMP_FILTER && CONFIG_SECURITY_LANDLOCK */
+
+#endif /* _LINUX_LANDLOCK_H */
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 84868d37b35d..106a0ceff3d7 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -11,6 +11,7 @@
 
 #ifdef CONFIG_SECCOMP
 
+#include <linux/landlock.h>
 #include <linux/thread_info.h>
 #include <asm/seccomp.h>
 
@@ -22,6 +23,7 @@ struct seccomp_filter;
  *         system calls available to a process.
  * @filter: must always point to a valid seccomp-filter or NULL as it is
  *          accessed without locking during system call entry.
+ * @landlock_prog_set: contains a set of Landlock programs.
  *
  *          @filter must only be accessed from the context of current as there
  *          is no read locking.
@@ -29,6 +31,9 @@ struct seccomp_filter;
 struct seccomp {
 	int mode;
 	struct seccomp_filter *filter;
+#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_SECURITY_LANDLOCK)
+	struct landlock_prog_set *landlock_prog_set;
+#endif /* CONFIG_SECCOMP_FILTER && CONFIG_SECURITY_LANDLOCK */
 };
 
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 90734aa5aa36..bce6534e7feb 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -16,6 +16,7 @@
 #define SECCOMP_SET_MODE_FILTER		1
 #define SECCOMP_GET_ACTION_AVAIL	2
 #define SECCOMP_GET_NOTIF_SIZES		3
+#define SECCOMP_PREPEND_LANDLOCK_PROG	4
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC		(1UL << 0)
diff --git a/kernel/fork.c b/kernel/fork.c
index 75675b9bf6df..a1ad5e80611b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -51,6 +51,7 @@
 #include <linux/security.h>
 #include <linux/hugetlb.h>
 #include <linux/seccomp.h>
+#include <linux/landlock.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
@@ -454,6 +455,7 @@ void free_task(struct task_struct *tsk)
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
+	put_seccomp_landlock(tsk);
 	arch_release_task_struct(tsk);
 	if (tsk->flags & PF_KTHREAD)
 		free_kthread_struct(tsk);
@@ -884,7 +886,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	 * the usage counts on the error path calling free_task.
 	 */
 	tsk->seccomp.filter = NULL;
-#endif
+#ifdef CONFIG_SECURITY_LANDLOCK
+	tsk->seccomp.landlock_prog_set = NULL;
+#endif /* CONFIG_SECURITY_LANDLOCK */
+#endif /* CONFIG_SECCOMP */
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
@@ -1598,6 +1603,7 @@ static void copy_seccomp(struct task_struct *p)
 
 	/* Ref-count the new filter user, and assign it. */
 	get_seccomp_filter(current);
+	get_seccomp_landlock(current);
 	p->seccomp = current->seccomp;
 
 	/*
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 811b4a86cdf6..e5005a644b23 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -41,6 +41,7 @@
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 #include <linux/anon_inodes.h>
+#include <linux/landlock.h>
 
 enum notify_state {
 	SECCOMP_NOTIFY_INIT,
@@ -1397,6 +1398,9 @@ static long do_seccomp(unsigned int op, unsigned int flags,
 			return -EINVAL;
 
 		return seccomp_get_notif_sizes(uargs);
+	case SECCOMP_PREPEND_LANDLOCK_PROG:
+		return landlock_seccomp_prepend_prog(flags,
+				(const int __user *)uargs);
 	default:
 		return -EINVAL;
 	}
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 7205f9a7a2ee..2a1a7082a365 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
 
-landlock-y := init.o
+landlock-y := init.o \
+	enforce.o enforce_seccomp.o
diff --git a/security/landlock/common.h b/security/landlock/common.h
index fd63ed1592a7..0c9b5904e7f5 100644
--- a/security/landlock/common.h
+++ b/security/landlock/common.h
@@ -23,4 +23,49 @@
 #define _LANDLOCK_TRIGGER_FS_PICK_LAST	LANDLOCK_TRIGGER_FS_PICK_WRITE
 #define _LANDLOCK_TRIGGER_FS_PICK_MASK	((_LANDLOCK_TRIGGER_FS_PICK_LAST << 1ULL) - 1)
 
+extern struct lsm_blob_sizes landlock_blob_sizes;
+
+struct landlock_prog_list {
+	struct landlock_prog_list *prev;
+	struct bpf_prog *prog;
+	refcount_t usage;
+};
+
+/**
+ * struct landlock_prog_set - Landlock programs enforced on a thread
+ *
+ * This is used for low performance impact when forking a process. Instead of
+ * copying the full array and incrementing the usage of each entries, only
+ * create a pointer to &struct landlock_prog_set and increments its usage. When
+ * prepending a new program, if &struct landlock_prog_set is shared with other
+ * tasks, then duplicate it and prepend the program to this new &struct
+ * landlock_prog_set.
+ *
+ * @usage: reference count to manage the object lifetime. When a thread need to
+ *	   add Landlock programs and if @usage is greater than 1, then the
+ *	   thread must duplicate &struct landlock_prog_set to not change the
+ *	   children's programs as well.
+ * @programs: array of non-NULL &struct landlock_prog_list pointers
+ */
+struct landlock_prog_set {
+	struct landlock_prog_list *programs[_LANDLOCK_HOOK_LAST];
+	refcount_t usage;
+};
+
+/**
+ * get_index - get an index for the programs of struct landlock_prog_set
+ *
+ * @type: a Landlock hook type
+ */
+static inline int get_index(enum landlock_hook_type type)
+{
+	/* type ID > 0 for loaded programs */
+	return type - 1;
+}
+
+static inline enum landlock_hook_type get_type(struct bpf_prog *prog)
+{
+	return prog->aux->extra->subtype.landlock_hook.type;
+}
+
 #endif /* _SECURITY_LANDLOCK_COMMON_H */
diff --git a/security/landlock/enforce.c b/security/landlock/enforce.c
new file mode 100644
index 000000000000..c06063d9d43d
--- /dev/null
+++ b/security/landlock/enforce.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - enforcing helpers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <asm/barrier.h> /* smp_store_release() */
+#include <asm/page.h> /* PAGE_SIZE */
+#include <linux/bpf.h> /* bpf_prog_put() */
+#include <linux/compiler.h> /* READ_ONCE() */
+#include <linux/err.h> /* PTR_ERR() */
+#include <linux/errno.h>
+#include <linux/filter.h> /* struct bpf_prog */
+#include <linux/refcount.h>
+#include <linux/slab.h> /* alloc(), kfree() */
+
+#include "common.h" /* struct landlock_prog_list */
+
+/* TODO: use a dedicated kmem_cache_alloc() instead of k*alloc() */
+
+static void put_landlock_prog_list(struct landlock_prog_list *prog_list)
+{
+	struct landlock_prog_list *orig = prog_list;
+
+	/* clean up single-reference branches iteratively */
+	while (orig && refcount_dec_and_test(&orig->usage)) {
+		struct landlock_prog_list *freeme = orig;
+
+		if (orig->prog)
+			bpf_prog_put(orig->prog);
+		orig = orig->prev;
+		kfree(freeme);
+	}
+}
+
+void landlock_put_prog_set(struct landlock_prog_set *prog_set)
+{
+	if (prog_set && refcount_dec_and_test(&prog_set->usage)) {
+		size_t i;
+
+		for (i = 0; i < ARRAY_SIZE(prog_set->programs); i++)
+			put_landlock_prog_list(prog_set->programs[i]);
+		kfree(prog_set);
+	}
+}
+
+void landlock_get_prog_set(struct landlock_prog_set *prog_set)
+{
+	if (!prog_set)
+		return;
+	refcount_inc(&prog_set->usage);
+}
+
+static struct landlock_prog_set *new_landlock_prog_set(void)
+{
+	struct landlock_prog_set *ret;
+
+	/* array filled with NULL values */
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+	refcount_set(&ret->usage, 1);
+	return ret;
+}
+
+/**
+ * store_landlock_prog - prepend and deduplicate a Landlock prog_list
+ *
+ * Prepend @prog to @init_prog_set while ignoring @prog
+ * if they are already in @ref_prog_set.  Whatever is the result of this
+ * function call, you can call bpf_prog_put(@prog) after.
+ *
+ * @init_prog_set: empty prog_set to prepend to
+ * @ref_prog_set: prog_set to check for duplicate programs
+ * @prog: program to prepend
+ *
+ * Return -errno on error or 0 if @prog was successfully stored.
+ */
+static int store_landlock_prog(struct landlock_prog_set *init_prog_set,
+		const struct landlock_prog_set *ref_prog_set,
+		struct bpf_prog *prog)
+{
+	struct landlock_prog_list *tmp_list = NULL;
+	int err;
+	u32 hook_idx;
+	enum landlock_hook_type last_type;
+	struct bpf_prog *new = prog;
+
+	/* allocate all the memory we need */
+	struct landlock_prog_list *new_list;
+
+	last_type = get_type(new);
+
+	/* ignore duplicate programs */
+	if (ref_prog_set) {
+		struct landlock_prog_list *ref;
+
+		hook_idx = get_index(get_type(new));
+		for (ref = ref_prog_set->programs[hook_idx];
+				ref; ref = ref->prev) {
+			if (ref->prog == new)
+				return -EINVAL;
+		}
+	}
+
+	new = bpf_prog_inc(new);
+	if (IS_ERR(new)) {
+		err = PTR_ERR(new);
+		goto put_tmp_list;
+	}
+	new_list = kzalloc(sizeof(*new_list), GFP_KERNEL);
+	if (!new_list) {
+		bpf_prog_put(new);
+		err = -ENOMEM;
+		goto put_tmp_list;
+	}
+	/* ignore Landlock types in this tmp_list */
+	new_list->prog = new;
+	new_list->prev = tmp_list;
+	refcount_set(&new_list->usage, 1);
+	tmp_list = new_list;
+
+	if (!tmp_list)
+		/* inform user space that this program was already added */
+		return -EEXIST;
+
+	/* properly store the list (without error cases) */
+	while (tmp_list) {
+		struct landlock_prog_list *new_list;
+
+		new_list = tmp_list;
+		tmp_list = tmp_list->prev;
+		/* do not increment the previous prog list usage */
+		hook_idx = get_index(get_type(new_list->prog));
+		new_list->prev = init_prog_set->programs[hook_idx];
+		/* no need to add from the last program to the first because
+		 * each of them are a different Landlock type */
+		smp_store_release(&init_prog_set->programs[hook_idx], new_list);
+	}
+	return 0;
+
+put_tmp_list:
+	put_landlock_prog_list(tmp_list);
+	return err;
+}
+
+/* limit Landlock programs set to 256KB */
+#define LANDLOCK_PROGRAMS_MAX_PAGES (1 << 6)
+
+/**
+ * landlock_prepend_prog - attach a Landlock prog_list to @current_prog_set
+ *
+ * Whatever is the result of this function call, you can call
+ * bpf_prog_put(@prog) after.
+ *
+ * @current_prog_set: landlock_prog_set pointer, must be locked (if needed) to
+ *                    prevent a concurrent put/free. This pointer must not be
+ *                    freed after the call.
+ * @prog: non-NULL Landlock prog_list to prepend to @current_prog_set. @prog
+ *	  will be owned by landlock_prepend_prog() and freed if an error
+ *	  happened.
+ *
+ * Return @current_prog_set or a new pointer when OK. Return a pointer error
+ * otherwise.
+ */
+struct landlock_prog_set *landlock_prepend_prog(
+		struct landlock_prog_set *current_prog_set,
+		struct bpf_prog *prog)
+{
+	struct landlock_prog_set *new_prog_set = current_prog_set;
+	unsigned long pages;
+	int err;
+	size_t i;
+	struct landlock_prog_set tmp_prog_set = {};
+
+	if (prog->type != BPF_PROG_TYPE_LANDLOCK_HOOK)
+		return ERR_PTR(-EINVAL);
+
+	/* validate memory size allocation */
+	pages = prog->pages;
+	if (current_prog_set) {
+		size_t i;
+
+		for (i = 0; i < ARRAY_SIZE(current_prog_set->programs); i++) {
+			struct landlock_prog_list *walker_p;
+
+			for (walker_p = current_prog_set->programs[i];
+					walker_p; walker_p = walker_p->prev)
+				pages += walker_p->prog->pages;
+		}
+		/* count a struct landlock_prog_set if we need to allocate one */
+		if (refcount_read(&current_prog_set->usage) != 1)
+			pages += round_up(sizeof(*current_prog_set), PAGE_SIZE)
+				/ PAGE_SIZE;
+	}
+	if (pages > LANDLOCK_PROGRAMS_MAX_PAGES)
+		return ERR_PTR(-E2BIG);
+
+	/* ensure early that we can allocate enough memory for the new
+	 * prog_lists */
+	err = store_landlock_prog(&tmp_prog_set, current_prog_set, prog);
+	if (err)
+		return ERR_PTR(err);
+
+	/*
+	 * Each task_struct points to an array of prog list pointers.  These
+	 * tables are duplicated when additions are made (which means each
+	 * table needs to be refcounted for the processes using it). When a new
+	 * table is created, all the refcounters on the prog_list are bumped (to
+	 * track each table that references the prog). When a new prog is
+	 * added, it's just prepended to the list for the new table to point
+	 * at.
+	 *
+	 * Manage all the possible errors before this step to not uselessly
+	 * duplicate current_prog_set and avoid a rollback.
+	 */
+	if (!new_prog_set) {
+		/*
+		 * If there is no Landlock program set used by the current task,
+		 * then create a new one.
+		 */
+		new_prog_set = new_landlock_prog_set();
+		if (IS_ERR(new_prog_set))
+			goto put_tmp_lists;
+	} else if (refcount_read(&current_prog_set->usage) > 1) {
+		/*
+		 * If the current task is not the sole user of its Landlock
+		 * program set, then duplicate them.
+		 */
+		new_prog_set = new_landlock_prog_set();
+		if (IS_ERR(new_prog_set))
+			goto put_tmp_lists;
+		for (i = 0; i < ARRAY_SIZE(new_prog_set->programs); i++) {
+			new_prog_set->programs[i] =
+				READ_ONCE(current_prog_set->programs[i]);
+			if (new_prog_set->programs[i])
+				refcount_inc(&new_prog_set->programs[i]->usage);
+		}
+
+		/*
+		 * Landlock program set from the current task will not be freed
+		 * here because the usage is strictly greater than 1. It is
+		 * only prevented to be freed by another task thanks to the
+		 * caller of landlock_prepend_prog() which should be locked if
+		 * needed.
+		 */
+		landlock_put_prog_set(current_prog_set);
+	}
+
+	/* prepend tmp_prog_set to new_prog_set */
+	for (i = 0; i < ARRAY_SIZE(tmp_prog_set.programs); i++) {
+		/* get the last new list */
+		struct landlock_prog_list *last_list =
+			tmp_prog_set.programs[i];
+
+		if (last_list) {
+			while (last_list->prev)
+				last_list = last_list->prev;
+			/* no need to increment usage (pointer replacement) */
+			last_list->prev = new_prog_set->programs[i];
+			new_prog_set->programs[i] = tmp_prog_set.programs[i];
+		}
+	}
+	return new_prog_set;
+
+put_tmp_lists:
+	for (i = 0; i < ARRAY_SIZE(tmp_prog_set.programs); i++)
+		put_landlock_prog_list(tmp_prog_set.programs[i]);
+	return new_prog_set;
+}
diff --git a/security/landlock/enforce.h b/security/landlock/enforce.h
new file mode 100644
index 000000000000..39b800d9999f
--- /dev/null
+++ b/security/landlock/enforce.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - enforcing helpers headers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_ENFORCE_H
+#define _SECURITY_LANDLOCK_ENFORCE_H
+
+struct landlock_prog_set *landlock_prepend_prog(
+		struct landlock_prog_set *current_prog_set,
+		struct bpf_prog *prog);
+void landlock_put_prog_set(struct landlock_prog_set *prog_set);
+void landlock_get_prog_set(struct landlock_prog_set *prog_set);
+
+#endif /* _SECURITY_LANDLOCK_ENFORCE_H */
diff --git a/security/landlock/enforce_seccomp.c b/security/landlock/enforce_seccomp.c
new file mode 100644
index 000000000000..c38c81e6b01a
--- /dev/null
+++ b/security/landlock/enforce_seccomp.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - enforcing with seccomp
+ *
+ * Copyright © 2016-2018 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#ifdef CONFIG_SECCOMP_FILTER
+
+#include <linux/bpf.h> /* bpf_prog_put() */
+#include <linux/capability.h>
+#include <linux/err.h> /* PTR_ERR() */
+#include <linux/errno.h>
+#include <linux/filter.h> /* struct bpf_prog */
+#include <linux/landlock.h>
+#include <linux/refcount.h>
+#include <linux/sched.h> /* current */
+#include <linux/uaccess.h> /* get_user() */
+
+#include "enforce.h"
+
+/* headers in include/linux/landlock.h */
+
+/**
+ * landlock_seccomp_prepend_prog - attach a Landlock program to the current
+ *                                 process
+ *
+ * current->seccomp.landlock_state->prog_set is lazily allocated. When a
+ * process fork, only a pointer is copied.  When a new program is added by a
+ * process, if there is other references to this process' prog_set, then a new
+ * allocation is made to contain an array pointing to Landlock program lists.
+ * This design enable low-performance impact and is memory efficient while
+ * keeping the property of prepend-only programs.
+ *
+ * For now, installing a Landlock prog requires that the requesting task has
+ * the global CAP_SYS_ADMIN. We cannot force the use of no_new_privs to not
+ * exclude containers where a process may legitimately acquire more privileges
+ * thanks to an SUID binary.
+ *
+ * @flags: not used for now, but could be used for TSYNC
+ * @user_bpf_fd: file descriptor pointing to a loaded Landlock prog
+ */
+int landlock_seccomp_prepend_prog(unsigned int flags,
+		const int __user *user_bpf_fd)
+{
+	struct landlock_prog_set *new_prog_set;
+	struct bpf_prog *prog;
+	int bpf_fd, err;
+
+	/* planned to be replaced with a no_new_privs check to allow
+	 * unprivileged tasks */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	/* enable to check if Landlock is supported with early EFAULT */
+	if (!user_bpf_fd)
+		return -EFAULT;
+	if (flags)
+		return -EINVAL;
+	err = get_user(bpf_fd, user_bpf_fd);
+	if (err)
+		return err;
+
+	prog = bpf_prog_get(bpf_fd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	/*
+	 * We don't need to lock anything for the current process hierarchy,
+	 * everything is guarded by the atomic counters.
+	 */
+	new_prog_set = landlock_prepend_prog(
+			current->seccomp.landlock_prog_set, prog);
+	bpf_prog_put(prog);
+	/* @prog is managed/freed by landlock_prepend_prog() */
+	if (IS_ERR(new_prog_set))
+		return PTR_ERR(new_prog_set);
+	current->seccomp.landlock_prog_set = new_prog_set;
+	return 0;
+}
+
+void put_seccomp_landlock(struct task_struct *tsk)
+{
+	landlock_put_prog_set(tsk->seccomp.landlock_prog_set);
+}
+
+void get_seccomp_landlock(struct task_struct *tsk)
+{
+	landlock_get_prog_set(tsk->seccomp.landlock_prog_set);
+}
+
+#endif /* CONFIG_SECCOMP_FILTER */
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 03/10] bpf,landlock: Define an eBPF program type for Landlock hooks
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

Add a new type of eBPF program used by Landlock hooks. This type of
program can be chained with the same eBPF program type (according to
subtype rules). A state can be kept with a value available in the
program's context (e.g. named "cookie" for Landlock programs).

This new BPF program type will be registered with the Landlock LSM
initialization.

Add an initial Landlock Kconfig and update the MAINTAINERS file.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v8:
* Remove the chaining concept from the eBPF program contexts (chain and
  cookie). We need to keep these subtypes this way to be able to make
  them evolve, though.
* remove bpf_landlock_put_extra() because there is no more a "previous"
  field to free (for now)

Changes since v7:
* cosmetic fixes
* rename LANDLOCK_SUBTYPE_* to LANDLOCK_*
* cleanup UAPI definitions and move them from bpf.h to landlock.h
  (suggested by Alexei Starovoitov)
* disable Landlock by default (suggested by Alexei Starovoitov)
* rename BPF_PROG_TYPE_LANDLOCK_{RULE,HOOK}
* update the Kconfig
* update the MAINTAINERS file
* replace the IOCTL, LOCK and FCNTL events with FS_PICK, FS_WALK and
  FS_GET hook types
* add the ability to chain programs with an eBPF program file descriptor
  (i.e. the "previous" field in a Landlock subtype) and keep a state
  with a "cookie" value available from the context
* add a "triggers" subtype bitfield to match specific actions (e.g.
  append, chdir, read...)

Changes since v6:
* add 3 more sub-events: IOCTL, LOCK, FCNTL
  https://lkml.kernel.org/r/2fbc99a6-f190-f335-bd14-04bdeed35571@digikod.net
* rename LANDLOCK_VERSION to LANDLOCK_ABI to better reflect its purpose,
  and move it from landlock.h to common.h
* rename BPF_PROG_TYPE_LANDLOCK to BPF_PROG_TYPE_LANDLOCK_RULE: an eBPF
  program could be used for something else than a rule
* simplify struct landlock_context by removing the arch and syscall_nr fields
* remove all eBPF map functions call, remove ABILITY_WRITE
* refactor bpf_landlock_func_proto() (suggested by Kees Cook)
* constify pointers
* fix doc inclusion

Changes since v5:
* rename file hooks.c to init.c
* fix spelling

Changes since v4:
* merge a minimal (not enabled) LSM code and Kconfig in this commit

Changes since v3:
* split commit
* revamp the landlock_context:
  * add arch, syscall_nr and syscall_cmd (ioctl, fcntl…) to be able to
    cross-check action with the event type
  * replace args array with dedicated fields to ease the addition of new
    fields
---
 MAINTAINERS                         |  13 ++++
 include/linux/bpf_types.h           |   3 +
 include/uapi/linux/bpf.h            |   1 +
 include/uapi/linux/landlock.h       | 109 +++++++++++++++++++++++++++
 security/Kconfig                    |   1 +
 security/Makefile                   |   2 +
 security/landlock/Kconfig           |  18 +++++
 security/landlock/Makefile          |   3 +
 security/landlock/common.h          |  26 +++++++
 security/landlock/init.c            | 110 ++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h      |   1 +
 tools/include/uapi/linux/landlock.h | 109 +++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.c              |   1 +
 tools/lib/bpf/libbpf_probes.c       |   1 +
 14 files changed, 398 insertions(+)
 create mode 100644 include/uapi/linux/landlock.h
 create mode 100644 security/landlock/Kconfig
 create mode 100644 security/landlock/Makefile
 create mode 100644 security/landlock/common.h
 create mode 100644 security/landlock/init.c
 create mode 100644 tools/include/uapi/linux/landlock.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 606d1f80bc49..4a5edc14ee84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8807,6 +8807,19 @@ F:	net/core/skmsg.c
 F:	net/core/sock_map.c
 F:	net/ipv4/tcp_bpf.c
 
+LANDLOCK SECURITY MODULE
+M:	Mickaël Salaün <mic@digikod.net>
+S:	Supported
+F:	Documentation/security/landlock/
+F:	include/linux/landlock.h
+F:	include/uapi/linux/landlock.h
+F:	samples/bpf/landlock*
+F:	security/landlock/
+F:	tools/include/uapi/linux/landlock.h
+F:	tools/testing/selftests/landlock/
+K:	landlock
+K:	LANDLOCK
+
 LANTIQ / INTEL Ethernet drivers
 M:	Hauke Mehrtens <hauke@hauke-m.de>
 L:	netdev@vger.kernel.org
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5a9975678d6f..dee8b82e31b1 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -37,6 +37,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
+#ifdef CONFIG_SECURITY_LANDLOCK
+BPF_PROG_TYPE(BPF_PROG_TYPE_LANDLOCK_HOOK, landlock)
+#endif
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ddae50373d58..50145d448bc3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -170,6 +170,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
 	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+	BPF_PROG_TYPE_LANDLOCK_HOOK,
 };
 
 enum bpf_attach_type {
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
new file mode 100644
index 000000000000..9e6d8e10ec2c
--- /dev/null
+++ b/include/uapi/linux/landlock.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Landlock - UAPI headers
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#ifndef _UAPI__LINUX_LANDLOCK_H__
+#define _UAPI__LINUX_LANDLOCK_H__
+
+#include <linux/types.h>
+
+#define LANDLOCK_RET_ALLOW	0
+#define LANDLOCK_RET_DENY	1
+
+/**
+ * enum landlock_hook_type - hook type for which a Landlock program is called
+ *
+ * A hook is a policy decision point which exposes the same context type for
+ * each program evaluation.
+ *
+ * @LANDLOCK_HOOK_FS_PICK: called for the last element of a file path
+ * @LANDLOCK_HOOK_FS_WALK: called for each directory of a file path (excluding
+ *			   the directory passed to fs_pick, if any)
+ */
+enum landlock_hook_type {
+	LANDLOCK_HOOK_FS_PICK = 1,
+	LANDLOCK_HOOK_FS_WALK,
+};
+
+/**
+ * DOC: landlock_triggers
+ *
+ * A landlock trigger is used as a bitmask in subtype.landlock_hook.triggers
+ * for a fs_pick program.  It defines a set of actions for which the program
+ * should verify an access request.
+ *
+ * - %LANDLOCK_TRIGGER_FS_PICK_APPEND
+ * - %LANDLOCK_TRIGGER_FS_PICK_CHDIR
+ * - %LANDLOCK_TRIGGER_FS_PICK_CHROOT
+ * - %LANDLOCK_TRIGGER_FS_PICK_CREATE
+ * - %LANDLOCK_TRIGGER_FS_PICK_EXECUTE
+ * - %LANDLOCK_TRIGGER_FS_PICK_FCNTL
+ * - %LANDLOCK_TRIGGER_FS_PICK_GETATTR
+ * - %LANDLOCK_TRIGGER_FS_PICK_IOCTL
+ * - %LANDLOCK_TRIGGER_FS_PICK_LINK
+ * - %LANDLOCK_TRIGGER_FS_PICK_LINKTO
+ * - %LANDLOCK_TRIGGER_FS_PICK_LOCK
+ * - %LANDLOCK_TRIGGER_FS_PICK_MAP
+ * - %LANDLOCK_TRIGGER_FS_PICK_MOUNTON
+ * - %LANDLOCK_TRIGGER_FS_PICK_OPEN
+ * - %LANDLOCK_TRIGGER_FS_PICK_READ
+ * - %LANDLOCK_TRIGGER_FS_PICK_READDIR
+ * - %LANDLOCK_TRIGGER_FS_PICK_RECEIVE
+ * - %LANDLOCK_TRIGGER_FS_PICK_RENAME
+ * - %LANDLOCK_TRIGGER_FS_PICK_RENAMETO
+ * - %LANDLOCK_TRIGGER_FS_PICK_RMDIR
+ * - %LANDLOCK_TRIGGER_FS_PICK_SETATTR
+ * - %LANDLOCK_TRIGGER_FS_PICK_TRANSFER
+ * - %LANDLOCK_TRIGGER_FS_PICK_UNLINK
+ * - %LANDLOCK_TRIGGER_FS_PICK_WRITE
+ */
+#define LANDLOCK_TRIGGER_FS_PICK_APPEND			(1ULL << 0)
+#define LANDLOCK_TRIGGER_FS_PICK_CHDIR			(1ULL << 1)
+#define LANDLOCK_TRIGGER_FS_PICK_CHROOT			(1ULL << 2)
+#define LANDLOCK_TRIGGER_FS_PICK_CREATE			(1ULL << 3)
+#define LANDLOCK_TRIGGER_FS_PICK_EXECUTE		(1ULL << 4)
+#define LANDLOCK_TRIGGER_FS_PICK_FCNTL			(1ULL << 5)
+#define LANDLOCK_TRIGGER_FS_PICK_GETATTR		(1ULL << 6)
+#define LANDLOCK_TRIGGER_FS_PICK_IOCTL			(1ULL << 7)
+#define LANDLOCK_TRIGGER_FS_PICK_LINK			(1ULL << 8)
+#define LANDLOCK_TRIGGER_FS_PICK_LINKTO			(1ULL << 9)
+#define LANDLOCK_TRIGGER_FS_PICK_LOCK			(1ULL << 10)
+#define LANDLOCK_TRIGGER_FS_PICK_MAP			(1ULL << 11)
+#define LANDLOCK_TRIGGER_FS_PICK_MOUNTON		(1ULL << 12)
+#define LANDLOCK_TRIGGER_FS_PICK_OPEN			(1ULL << 13)
+#define LANDLOCK_TRIGGER_FS_PICK_READ			(1ULL << 14)
+#define LANDLOCK_TRIGGER_FS_PICK_READDIR		(1ULL << 15)
+#define LANDLOCK_TRIGGER_FS_PICK_RECEIVE		(1ULL << 16)
+#define LANDLOCK_TRIGGER_FS_PICK_RENAME			(1ULL << 17)
+#define LANDLOCK_TRIGGER_FS_PICK_RENAMETO		(1ULL << 18)
+#define LANDLOCK_TRIGGER_FS_PICK_RMDIR			(1ULL << 19)
+#define LANDLOCK_TRIGGER_FS_PICK_SETATTR		(1ULL << 20)
+#define LANDLOCK_TRIGGER_FS_PICK_TRANSFER		(1ULL << 21)
+#define LANDLOCK_TRIGGER_FS_PICK_UNLINK			(1ULL << 22)
+#define LANDLOCK_TRIGGER_FS_PICK_WRITE			(1ULL << 23)
+
+/**
+ * struct landlock_ctx_fs_pick - context accessible to a fs_pick program
+ *
+ * @inode: pointer to the current kernel object that can be used to compare
+ *         inodes from an inode map.
+ */
+struct landlock_ctx_fs_pick {
+	__u64 inode;
+};
+
+/**
+ * struct landlock_ctx_fs_walk - context accessible to a fs_walk program
+ *
+ * @inode: pointer to the current kernel object that can be used to compare
+ *         inodes from an inode map.
+ */
+struct landlock_ctx_fs_walk {
+	__u64 inode;
+};
+
+#endif /* _UAPI__LINUX_LANDLOCK_H__ */
diff --git a/security/Kconfig b/security/Kconfig
index 466cc1f8ffed..d3c070a01470 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -237,6 +237,7 @@ source "security/apparmor/Kconfig"
 source "security/loadpin/Kconfig"
 source "security/yama/Kconfig"
 source "security/safesetid/Kconfig"
+source "security/landlock/Kconfig"
 
 source "security/integrity/Kconfig"
 
diff --git a/security/Makefile b/security/Makefile
index c598b904938f..396ff107f70d 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -11,6 +11,7 @@ subdir-$(CONFIG_SECURITY_APPARMOR)	+= apparmor
 subdir-$(CONFIG_SECURITY_YAMA)		+= yama
 subdir-$(CONFIG_SECURITY_LOADPIN)	+= loadpin
 subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
+subdir-$(CONFIG_SECURITY_LANDLOCK)		+= landlock
 
 # always enable default capabilities
 obj-y					+= commoncap.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_SECURITY_APPARMOR)		+= apparmor/
 obj-$(CONFIG_SECURITY_YAMA)		+= yama/
 obj-$(CONFIG_SECURITY_LOADPIN)		+= loadpin/
 obj-$(CONFIG_SECURITY_SAFESETID)       += safesetid/
+obj-$(CONFIG_SECURITY_LANDLOCK)	+= landlock/
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
 
 # Object integrity file lists
diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
new file mode 100644
index 000000000000..8bd103102008
--- /dev/null
+++ b/security/landlock/Kconfig
@@ -0,0 +1,18 @@
+config SECURITY_LANDLOCK
+	bool "Landlock support"
+	depends on SECURITY
+	depends on BPF_SYSCALL
+	depends on SECCOMP_FILTER
+	default n
+	help
+	  This selects Landlock, a programmatic access control.  It enables to
+	  restrict processes on the fly (i.e. create a sandbox).  The security
+	  policy is a set of eBPF programs, dedicated to deny a list of actions
+	  on specific kernel objects (e.g. file).
+
+	  You need to enable seccomp filter to apply a security policy to a
+	  process hierarchy (e.g. application with built-in sandboxing).
+
+	  See Documentation/security/landlock/ for further information.
+
+	  If you are unsure how to answer this question, answer N.
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
new file mode 100644
index 000000000000..7205f9a7a2ee
--- /dev/null
+++ b/security/landlock/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
+
+landlock-y := init.o
diff --git a/security/landlock/common.h b/security/landlock/common.h
new file mode 100644
index 000000000000..fd63ed1592a7
--- /dev/null
+++ b/security/landlock/common.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - private headers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_COMMON_H
+#define _SECURITY_LANDLOCK_COMMON_H
+
+#include <linux/bpf.h> /* enum bpf_prog_aux */
+#include <linux/filter.h> /* bpf_prog */
+#include <linux/refcount.h> /* refcount_t */
+#include <uapi/linux/landlock.h> /* enum landlock_hook_type */
+
+#define LANDLOCK_NAME "landlock"
+
+/* UAPI bounds and bitmasks */
+
+#define _LANDLOCK_HOOK_LAST LANDLOCK_HOOK_FS_WALK
+
+#define _LANDLOCK_TRIGGER_FS_PICK_LAST	LANDLOCK_TRIGGER_FS_PICK_WRITE
+#define _LANDLOCK_TRIGGER_FS_PICK_MASK	((_LANDLOCK_TRIGGER_FS_PICK_LAST << 1ULL) - 1)
+
+#endif /* _SECURITY_LANDLOCK_COMMON_H */
diff --git a/security/landlock/init.c b/security/landlock/init.c
new file mode 100644
index 000000000000..03073cd0fc4e
--- /dev/null
+++ b/security/landlock/init.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - init
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <linux/bpf.h> /* enum bpf_access_type */
+#include <linux/capability.h> /* capable */
+#include <linux/filter.h> /* struct bpf_prog */
+
+#include "common.h" /* LANDLOCK_* */
+
+static bool bpf_landlock_is_valid_access(int off, int size,
+		enum bpf_access_type type, const struct bpf_prog *prog,
+		struct bpf_insn_access_aux *info)
+{
+	const union bpf_prog_subtype *prog_subtype;
+	enum bpf_reg_type reg_type = NOT_INIT;
+	int max_size = 0;
+
+	if (WARN_ON(!prog->aux->extra))
+		return false;
+	prog_subtype = &prog->aux->extra->subtype;
+
+	if (off < 0)
+		return false;
+	if (size <= 0 || size > sizeof(__u64))
+		return false;
+
+	/* check memory range access */
+	switch (reg_type) {
+	case NOT_INIT:
+		return false;
+	case SCALAR_VALUE:
+		/* allow partial raw value */
+		if (size > max_size)
+			return false;
+		info->ctx_field_size = max_size;
+		break;
+	default:
+		/* deny partial pointer */
+		if (size != max_size)
+			return false;
+	}
+
+	info->reg_type = reg_type;
+	return true;
+}
+
+static bool bpf_landlock_is_valid_subtype(struct bpf_prog_extra *prog_extra)
+{
+	const union bpf_prog_subtype *subtype;
+
+	if (!prog_extra)
+		return false;
+	subtype = &prog_extra->subtype;
+
+	switch (subtype->landlock_hook.type) {
+	case LANDLOCK_HOOK_FS_PICK:
+		if (!subtype->landlock_hook.triggers ||
+				subtype->landlock_hook.triggers &
+				~_LANDLOCK_TRIGGER_FS_PICK_MASK)
+			return false;
+		break;
+	case LANDLOCK_HOOK_FS_WALK:
+		if (subtype->landlock_hook.triggers)
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static const struct bpf_func_proto *bpf_landlock_func_proto(
+		enum bpf_func_id func_id,
+		const struct bpf_prog *prog)
+{
+	u64 hook_type;
+
+	if (WARN_ON(!prog->aux->extra))
+		return NULL;
+	hook_type = prog->aux->extra->subtype.landlock_hook.type;
+
+	/* generic functions */
+	/* TODO: do we need/want update/delete functions for every LL prog?
+	 * => impurity vs. audit */
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	default:
+		break;
+	}
+	return NULL;
+}
+
+const struct bpf_verifier_ops landlock_verifier_ops = {
+	.get_func_proto	= bpf_landlock_func_proto,
+	.is_valid_access = bpf_landlock_is_valid_access,
+	.is_valid_subtype = bpf_landlock_is_valid_subtype,
+};
+
+const struct bpf_prog_ops landlock_prog_ops = {};
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ddae50373d58..50145d448bc3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -170,6 +170,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
 	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+	BPF_PROG_TYPE_LANDLOCK_HOOK,
 };
 
 enum bpf_attach_type {
diff --git a/tools/include/uapi/linux/landlock.h b/tools/include/uapi/linux/landlock.h
new file mode 100644
index 000000000000..9e6d8e10ec2c
--- /dev/null
+++ b/tools/include/uapi/linux/landlock.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Landlock - UAPI headers
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#ifndef _UAPI__LINUX_LANDLOCK_H__
+#define _UAPI__LINUX_LANDLOCK_H__
+
+#include <linux/types.h>
+
+#define LANDLOCK_RET_ALLOW	0
+#define LANDLOCK_RET_DENY	1
+
+/**
+ * enum landlock_hook_type - hook type for which a Landlock program is called
+ *
+ * A hook is a policy decision point which exposes the same context type for
+ * each program evaluation.
+ *
+ * @LANDLOCK_HOOK_FS_PICK: called for the last element of a file path
+ * @LANDLOCK_HOOK_FS_WALK: called for each directory of a file path (excluding
+ *			   the directory passed to fs_pick, if any)
+ */
+enum landlock_hook_type {
+	LANDLOCK_HOOK_FS_PICK = 1,
+	LANDLOCK_HOOK_FS_WALK,
+};
+
+/**
+ * DOC: landlock_triggers
+ *
+ * A landlock trigger is used as a bitmask in subtype.landlock_hook.triggers
+ * for a fs_pick program.  It defines a set of actions for which the program
+ * should verify an access request.
+ *
+ * - %LANDLOCK_TRIGGER_FS_PICK_APPEND
+ * - %LANDLOCK_TRIGGER_FS_PICK_CHDIR
+ * - %LANDLOCK_TRIGGER_FS_PICK_CHROOT
+ * - %LANDLOCK_TRIGGER_FS_PICK_CREATE
+ * - %LANDLOCK_TRIGGER_FS_PICK_EXECUTE
+ * - %LANDLOCK_TRIGGER_FS_PICK_FCNTL
+ * - %LANDLOCK_TRIGGER_FS_PICK_GETATTR
+ * - %LANDLOCK_TRIGGER_FS_PICK_IOCTL
+ * - %LANDLOCK_TRIGGER_FS_PICK_LINK
+ * - %LANDLOCK_TRIGGER_FS_PICK_LINKTO
+ * - %LANDLOCK_TRIGGER_FS_PICK_LOCK
+ * - %LANDLOCK_TRIGGER_FS_PICK_MAP
+ * - %LANDLOCK_TRIGGER_FS_PICK_MOUNTON
+ * - %LANDLOCK_TRIGGER_FS_PICK_OPEN
+ * - %LANDLOCK_TRIGGER_FS_PICK_READ
+ * - %LANDLOCK_TRIGGER_FS_PICK_READDIR
+ * - %LANDLOCK_TRIGGER_FS_PICK_RECEIVE
+ * - %LANDLOCK_TRIGGER_FS_PICK_RENAME
+ * - %LANDLOCK_TRIGGER_FS_PICK_RENAMETO
+ * - %LANDLOCK_TRIGGER_FS_PICK_RMDIR
+ * - %LANDLOCK_TRIGGER_FS_PICK_SETATTR
+ * - %LANDLOCK_TRIGGER_FS_PICK_TRANSFER
+ * - %LANDLOCK_TRIGGER_FS_PICK_UNLINK
+ * - %LANDLOCK_TRIGGER_FS_PICK_WRITE
+ */
+#define LANDLOCK_TRIGGER_FS_PICK_APPEND			(1ULL << 0)
+#define LANDLOCK_TRIGGER_FS_PICK_CHDIR			(1ULL << 1)
+#define LANDLOCK_TRIGGER_FS_PICK_CHROOT			(1ULL << 2)
+#define LANDLOCK_TRIGGER_FS_PICK_CREATE			(1ULL << 3)
+#define LANDLOCK_TRIGGER_FS_PICK_EXECUTE		(1ULL << 4)
+#define LANDLOCK_TRIGGER_FS_PICK_FCNTL			(1ULL << 5)
+#define LANDLOCK_TRIGGER_FS_PICK_GETATTR		(1ULL << 6)
+#define LANDLOCK_TRIGGER_FS_PICK_IOCTL			(1ULL << 7)
+#define LANDLOCK_TRIGGER_FS_PICK_LINK			(1ULL << 8)
+#define LANDLOCK_TRIGGER_FS_PICK_LINKTO			(1ULL << 9)
+#define LANDLOCK_TRIGGER_FS_PICK_LOCK			(1ULL << 10)
+#define LANDLOCK_TRIGGER_FS_PICK_MAP			(1ULL << 11)
+#define LANDLOCK_TRIGGER_FS_PICK_MOUNTON		(1ULL << 12)
+#define LANDLOCK_TRIGGER_FS_PICK_OPEN			(1ULL << 13)
+#define LANDLOCK_TRIGGER_FS_PICK_READ			(1ULL << 14)
+#define LANDLOCK_TRIGGER_FS_PICK_READDIR		(1ULL << 15)
+#define LANDLOCK_TRIGGER_FS_PICK_RECEIVE		(1ULL << 16)
+#define LANDLOCK_TRIGGER_FS_PICK_RENAME			(1ULL << 17)
+#define LANDLOCK_TRIGGER_FS_PICK_RENAMETO		(1ULL << 18)
+#define LANDLOCK_TRIGGER_FS_PICK_RMDIR			(1ULL << 19)
+#define LANDLOCK_TRIGGER_FS_PICK_SETATTR		(1ULL << 20)
+#define LANDLOCK_TRIGGER_FS_PICK_TRANSFER		(1ULL << 21)
+#define LANDLOCK_TRIGGER_FS_PICK_UNLINK			(1ULL << 22)
+#define LANDLOCK_TRIGGER_FS_PICK_WRITE			(1ULL << 23)
+
+/**
+ * struct landlock_ctx_fs_pick - context accessible to a fs_pick program
+ *
+ * @inode: pointer to the current kernel object that can be used to compare
+ *         inodes from an inode map.
+ */
+struct landlock_ctx_fs_pick {
+	__u64 inode;
+};
+
+/**
+ * struct landlock_ctx_fs_walk - context accessible to a fs_walk program
+ *
+ * @inode: pointer to the current kernel object that can be used to compare
+ *         inodes from an inode map.
+ */
+struct landlock_ctx_fs_walk {
+	__u64 inode;
+};
+
+#endif /* _UAPI__LINUX_LANDLOCK_H__ */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 68f45a96769f..1b99c8da7a67 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2646,6 +2646,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_LANDLOCK_HOOK:
 		return false;
 	case BPF_PROG_TYPE_KPROBE:
 	default:
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 6635a31a7a16..f4f34cb8869a 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -101,6 +101,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_SK_REUSEPORT:
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_LANDLOCK_HOOK:
 	default:
 		break;
 	}
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 02/10] bpf: Add eBPF program subtype and is_valid_subtype() verifier
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

The goal of the program subtype is to be able to have different static
fine-grained verifications for a unique program type.

The struct bpf_verifier_ops gets a new optional function:
is_valid_subtype(). This new verifier is called at the beginning of the
eBPF program verification to check if the (optional) program subtype is
valid.

The new helper bpf_load_program_xattr() enables to verify a program with
subtypes.

For now, only Landlock eBPF programs are using a program subtype (see
next commits) but this could be used by other program types in the
future.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lkml.kernel.org/r/20160827205559.GA43880@ast-mbp.thefacebook.com
---

Changes since v8:
* use bpf_load_program_xattr() instead of bpf_load_program() and add
  bpf_verify_program_xattr() to deal with subtypes
* remove put_extra() since there is no more "previous" field (for now)

Changes since v7:
* rename LANDLOCK_SUBTYPE_* to LANDLOCK_*
* move subtype in bpf_prog_aux and use only one bit for has_subtype
  (suggested by Alexei Starovoitov)
* wrap the prog_subtype with a prog_extra to be able to reference kernel
  pointers:
  * add an optional put_extra() function to struct bpf_prog_ops to be
    able to free the pointed data
  * replace all the prog_subtype with prog_extra in the struct
    bpf_verifier_ops functions
* remove the ABI field (requested by Alexei Starovoitov)
* rename subtype fields

Changes since v6:
* rename Landlock version to ABI to better reflect its purpose
* fix unsigned integer checks
* fix pointer cast
* constify pointers
* rebase

Changes since v5:
* use a prog_subtype pointer and make it future-proof
* add subtype test
* constify bpf_load_program()'s subtype argument
* cleanup subtype initialization
* rebase

Changes since v4:
* replace the "status" field with "version" (more generic)
* replace the "access" field with "ability" (less confusing)

Changes since v3:
* remove the "origin" field
* add an "option" field
* cleanup comments
---
 include/linux/bpf.h                           |  8 +++++
 include/uapi/linux/bpf.h                      |  9 ++++++
 kernel/bpf/core.c                             |  1 +
 kernel/bpf/syscall.c                          | 32 ++++++++++++++++++-
 kernel/bpf/verifier.c                         | 11 +++++++
 net/core/filter.c                             | 25 ++++++++-------
 tools/include/uapi/linux/bpf.h                |  9 ++++++
 tools/lib/bpf/bpf.c                           | 10 +++++-
 tools/lib/bpf/bpf.h                           |  2 ++
 tools/lib/bpf/libbpf.map                      |  1 +
 tools/testing/selftests/bpf/test_verifier.c   | 26 +++++++++++++--
 .../testing/selftests/bpf/verifier/subtype.c  | 10 ++++++
 12 files changed, 128 insertions(+), 16 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/verifier/subtype.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a62e7889b0b6..da167d3afecc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -294,6 +294,11 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
 	aux->ctx_field_size = size;
 }
 
+/* specific data per program type */
+struct bpf_prog_extra {
+	union bpf_prog_subtype subtype;
+};
+
 struct bpf_prog_ops {
 	int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
 			union bpf_attr __user *uattr);
@@ -319,6 +324,8 @@ struct bpf_verifier_ops {
 				  const struct bpf_insn *src,
 				  struct bpf_insn *dst,
 				  struct bpf_prog *prog, u32 *target_size);
+	// TODO?: convert to bool (*is_valid_subtype)(struct bpf_prog *prog);
+	bool (*is_valid_subtype)(struct bpf_prog_extra *prog_extra);
 };
 
 struct bpf_prog_offload_ops {
@@ -418,6 +425,7 @@ struct bpf_prog_aux {
 		struct work_struct work;
 		struct rcu_head	rcu;
 	};
+	struct bpf_prog_extra *extra;
 };
 
 struct bpf_array {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b077507efa3f..ddae50373d58 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -355,6 +355,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+union bpf_prog_subtype {
+	struct {
+		__u32		type; /* enum landlock_hook_type */
+		__aligned_u64	triggers; /* LANDLOCK_TRIGGER_* */
+	} landlock_hook;
+} __attribute__((aligned(8)));
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -409,6 +416,8 @@ union bpf_attr {
 		__u32		line_info_rec_size;	/* userspace bpf_line_info size */
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
+		__aligned_u64	prog_subtype;	/* bpf_prog_subtype address */
+		__u32		prog_subtype_size;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ad3be85f1411..8ad392e52328 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -255,6 +255,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
 {
 	if (fp->aux) {
 		free_percpu(fp->aux->stats);
+		kfree(fp->aux->extra);
 		kfree(fp->aux);
 	}
 	vfree(fp);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7713cf39795a..7dd3376904d4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1596,7 +1596,7 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD line_info_cnt
+#define	BPF_PROG_LOAD_LAST_FIELD prog_subtype_size
 
 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 {
@@ -1686,6 +1686,36 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	if (err)
 		goto free_prog;
 
+	/* copy eBPF program subtype from user space */
+	if (attr->prog_subtype) {
+		u32 size;
+
+		err = bpf_check_uarg_tail_zero(
+				u64_to_user_ptr(attr->prog_subtype),
+				sizeof(prog->aux->extra->subtype),
+				attr->prog_subtype_size);
+		if (err)
+			goto free_prog;
+		size = min_t(u32, attr->prog_subtype_size,
+			     sizeof(prog->aux->extra->subtype));
+
+		prog->aux->extra = kzalloc(sizeof(*prog->aux->extra),
+					   GFP_KERNEL | GFP_USER);
+		if (!prog->aux->extra) {
+			err = -ENOMEM;
+			goto free_prog;
+		}
+		if (copy_from_user(&prog->aux->extra->subtype,
+				   u64_to_user_ptr(attr->prog_subtype), size)
+				   != 0) {
+			err = -EFAULT;
+			goto free_prog;
+		}
+	} else if (attr->prog_subtype_size != 0) {
+		err = -EINVAL;
+		goto free_prog;
+	}
+
 	/* run eBPF verifier */
 	err = bpf_check(&prog, attr, uattr);
 	if (err < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0e079b2298f8..930260683d0a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9167,6 +9167,17 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (ret < 0)
 		goto skip_full_check;
 
+	if (env->ops->is_valid_subtype) {
+		if (!env->ops->is_valid_subtype(env->prog->aux->extra)) {
+			ret = -EINVAL;
+			goto err_unlock;
+		}
+	} else if (env->prog->aux->extra) {
+		/* do not accept a subtype if the program does not handle it */
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
 		ret = bpf_prog_offload_verifier_prep(env->prog);
 		if (ret)
diff --git a/net/core/filter.c b/net/core/filter.c
index 2014d76e0d2a..0160237e5cd8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5846,7 +5846,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 }
 
 static const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id,
+		    const struct bpf_prog_extra *prog_extra)
 {
 	switch (func_id) {
 	case BPF_FUNC_map_lookup_elem:
@@ -5902,7 +5903,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -5942,7 +5943,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_proto;
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -5959,7 +5960,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_get_socket_uid:
 		return &bpf_get_socket_uid_proto;
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -6096,7 +6097,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_ecn_set_ce_proto;
 #endif
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -6135,7 +6136,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_check_syncookie_proto;
 #endif
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -6171,7 +6172,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_sock_proto;
 #endif /* CONFIG_INET */
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -6197,7 +6198,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_msg_pop_data:
 		return &bpf_msg_pop_data_proto;
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -6237,7 +6238,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skc_lookup_tcp_proto;
 #endif
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -6248,7 +6249,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_load_bytes:
 		return &bpf_flow_dissector_load_bytes_proto;
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -6275,7 +6276,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_under_cgroup:
 		return &bpf_skb_under_cgroup_proto;
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
@@ -8611,7 +8612,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
 	case BPF_FUNC_skb_load_bytes_relative:
 		return &sk_reuseport_load_bytes_relative_proto;
 	default:
-		return bpf_base_func_proto(func_id);
+		return bpf_base_func_proto(func_id, prog->aux->extra);
 	}
 }
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b077507efa3f..ddae50373d58 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -355,6 +355,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+union bpf_prog_subtype {
+	struct {
+		__u32		type; /* enum landlock_hook_type */
+		__aligned_u64	triggers; /* LANDLOCK_TRIGGER_* */
+	} landlock_hook;
+} __attribute__((aligned(8)));
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -409,6 +416,8 @@ union bpf_attr {
 		__u32		line_info_rec_size;	/* userspace bpf_line_info size */
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
+		__aligned_u64	prog_subtype;	/* bpf_prog_subtype address */
+		__u32		prog_subtype_size;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c7d7993c44bb..ecd894344bbd 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -227,6 +227,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
+	attr.prog_subtype = ptr_to_u64(load_attr->prog_subtype);
+	attr.prog_subtype_size = load_attr->prog_subtype ?
+		sizeof(*load_attr->prog_subtype) : 0;
 	attr.expected_attach_type = load_attr->expected_attach_type;
 	attr.insn_cnt = (__u32)load_attr->insns_cnt;
 	attr.insns = ptr_to_u64(load_attr->insns);
@@ -332,6 +335,11 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
 }
 
+int bpf_verify_program_xattr(union bpf_attr *attr, size_t attr_sz)
+{
+	return sys_bpf_prog_load(attr, attr_sz);
+}
+
 int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		       size_t insns_cnt, __u32 prog_flags, const char *license,
 		       __u32 kern_version, char *log_buf, size_t log_buf_sz,
@@ -351,7 +359,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	attr.kern_version = kern_version;
 	attr.prog_flags = prog_flags;
 
-	return sys_bpf_prog_load(&attr, sizeof(attr));
+	return bpf_verify_program_xattr(&attr, sizeof(attr));
 }
 
 int bpf_map_update_elem(int fd, const void *key, const void *value,
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ff42ca043dc8..7cd600d49f9e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -88,6 +88,7 @@ struct bpf_load_program_attr {
 	__u32 line_info_cnt;
 	__u32 log_level;
 	__u32 prog_flags;
+	const union bpf_prog_subtype *prog_subtype;
 };
 
 /* Flags to direct loading requirements */
@@ -102,6 +103,7 @@ LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
 				const struct bpf_insn *insns, size_t insns_cnt,
 				const char *license, __u32 kern_version,
 				char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_verify_program_xattr(union bpf_attr *attr, size_t attr_sz);
 LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
 				  const struct bpf_insn *insns,
 				  size_t insns_cnt, __u32 prog_flags,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 2c6d835620d2..48c9269a0496 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -107,6 +107,7 @@ LIBBPF_0.0.1 {
 		bpf_set_link_xdp_fd;
 		bpf_task_fd_query;
 		bpf_verify_program;
+		bpf_verify_program_xattr;
 		btf__fd;
 		btf__find_by_name;
 		btf__free;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c5514daf8865..93faffd31fc3 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -105,6 +105,8 @@ struct bpf_test {
 			__u64 data64[TEST_DATA_LEN / 8];
 		};
 	} retvals[MAX_TEST_RUNS];
+	bool has_prog_subtype;
+	union bpf_prog_subtype prog_subtype;
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -122,6 +124,11 @@ struct other_val {
 	long long bar;
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
 static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
 {
 	/* test: {skb->data[0], vlan_push} x 51 + {skb->data[0], vlan_pop} x 51 */
@@ -856,6 +863,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int fixup_skips;
 	__u32 pflags;
 	int i, err;
+	union bpf_attr attr;
+	union bpf_prog_subtype *prog_subtype =
+		test->has_prog_subtype ? &test->prog_subtype : NULL;
 
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		map_fds[i] = -1;
@@ -881,8 +891,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
 		pflags |= BPF_F_ANY_ALIGNMENT;
-	fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
-				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = prog_type;
+	attr.prog_subtype = ptr_to_u64(prog_subtype);
+	attr.prog_subtype_size = prog_subtype ? sizeof(*prog_subtype) : 0;
+	attr.insn_cnt = (__u32)prog_len;
+	attr.insns = ptr_to_u64(prog);
+	attr.license = ptr_to_u64("GPL");
+	bpf_vlog[0] = 0;
+	attr.log_buf = ptr_to_u64(bpf_vlog);
+	attr.log_size = sizeof(bpf_vlog);
+	attr.log_level = 4;
+	attr.prog_flags = pflags;
+	fd_prog = bpf_verify_program_xattr(&attr, sizeof(attr));
 	if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
diff --git a/tools/testing/selftests/bpf/verifier/subtype.c b/tools/testing/selftests/bpf/verifier/subtype.c
new file mode 100644
index 000000000000..cf614223d53f
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/subtype.c
@@ -0,0 +1,10 @@
+{
+	"superfluous subtype",
+	.insns = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "",
+	.result = REJECT,
+	.has_prog_subtype = true,
+},
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 01/10] fs,security: Add a new file access type: MAY_CHROOT
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk
In-Reply-To: <20190625215239.11136-1-mic@digikod.net>

For compatibility reason, MAY_CHROOT is always set with MAY_CHDIR.
However, this new flag enable to differentiate a chdir form a chroot.

This is needed for the Landlock LSM to be able to evaluate a new root
directory.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: James Morris <jmorris@namei.org>
Cc: John Johansen <john.johansen@canonical.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: linux-fsdevel@vger.kernel.org
---
 fs/open.c          | 3 ++-
 include/linux/fs.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/open.c b/fs/open.c
index b5b80469b93d..e8767318fd03 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -494,7 +494,8 @@ int ksys_chroot(const char __user *filename)
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR |
+			MAY_CHROOT);
 	if (error)
 		goto dput_and_out;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f7fdfe93e25d..fa6c9d0c152b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -99,6 +99,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define MAY_CHDIR		0x00000040
 /* called from RCU mode, don't block */
 #define MAY_NOT_BLOCK		0x00000080
+#define MAY_CHROOT		0x00000100
 
 /*
  * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
-- 
2.20.1

^ permalink raw reply related

* [PATCH bpf-next v9 00/10] Landlock LSM: Toward unprivileged sandboxing
From: Mickaël Salaün @ 2019-06-25 21:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Aleksa Sarai, Alexander Viro,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, John Johansen, Jonathan Corbet,
	Kees Cook, Michael Kerrisk

Hi,

This ninth series upgrade Landlock as a stackable LSM [4] and elide the
file system path evaluation from the previous series [1] to make this
review process easier.  This patch series is almost half the size of the
previous one.

Landlock is a low-level framework to build custom access-control systems
or safe endpoint security agents.  There is two types of Landlock hooks:
FS_WALK and FS_PICK.  Each of them accepts a dedicated eBPF program,
called a Landlock program.  The set of actions on a file is well defined
(e.g.  read, write, ioctl, append, lock, mount...) taking inspiration
from the major Linux LSMs and some other access-controls like Capsicum.

The example patch show how a file system access control can be built
based on a list of denied files and directories.  From a security point
of view, it may be preferable to use a whitelist instead of a blacklist,
but this series only enable to match a specific list of files.  Bringing
back a way to evaluate a path is planned for a future dedicated series,
once this base Landlock framework is merged.  I may take inspiration
from the LOOKUP_BENEATH approach [5], but from an eBPF point of view.

The documentation patch contains some kernel documentation and
explanations on how to use Landlock.  The compiled documentation and
some talks can be found here: https://landlock.io
This patch series can be found in a Git repository here:
https://github.com/landlock-lsm/linux/commits/landlock-v9

There is still some minor issues with this patch series but it is enough
to get a deep review.

This is the first step of the roadmap discussed at LPC [2].  While the
intended final goal is to allow unprivileged users to use Landlock, this
series allows only a process with global CAP_SYS_ADMIN to load and
enforce a rule.  This may help to get feedback and avoid unexpected
behaviors.

This series can be applied on top of bpf-next, commit 88091ff56b71
("selftests, bpf: Add test for veth native XDP").  This can be tested
with CONFIG_SECCOMP_FILTER and CONFIG_SECURITY_LANDLOCK.  I would really
appreciate constructive comments on the design and the code.


# Landlock LSM

The goal of this new Linux Security Module (LSM) called Landlock is to
allow any process, including unprivileged ones, to create powerful
security sandboxes comparable to XNU Sandbox or OpenBSD Pledge (which
could be implemented with Landlock).  This kind of sandbox is expected
to help mitigate the security impact of bugs or unexpected/malicious
behaviors in user-space applications.

The approach taken is to add the minimum amount of code while still
allowing the user-space application to create quite complex access
rules.  A dedicated security policy language such as the one used by
SELinux, AppArmor and other major LSMs involves a lot of code and is
usually permitted to only a trusted user (i.e. root).  On the contrary,
eBPF programs already exist and are designed to be safely loaded by
unprivileged user-space.

This design does not seem too intrusive but is flexible enough to allow
a powerful sandbox mechanism accessible by any process on Linux. The use
of seccomp and Landlock is more suitable with the help of a user-space
library (e.g.  libseccomp) that could help to specify a high-level
language to express a security policy instead of raw eBPF programs.
Moreover, thanks to the LLVM front-end, it is quite easy to write an
eBPF program with a subset of the C language.


# Frequently asked questions

## Why is seccomp-bpf not enough?

A seccomp filter can access only raw syscall arguments (i.e. the
register values) which means that it is not possible to filter according
to the value pointed to by an argument, such as a file pathname. As an
embryonic Landlock version demonstrated, filtering at the syscall level
is complicated (e.g. need to take care of race conditions). This is
mainly because the access control checkpoints of the kernel are not at
this high-level but more underneath, at the LSM-hook level. The LSM
hooks are designed to handle this kind of checks.  Landlock abstracts
this approach to leverage the ability of unprivileged users to limit
themselves.

Cf. section "What it isn't?" in Documentation/prctl/seccomp_filter.txt


## Why use the seccomp(2) syscall?

Landlock use the same semantic as seccomp to apply access rule
restrictions. It add a new layer of security for the current process
which is inherited by its children. It makes sense to use an unique
access-restricting syscall (that should be allowed by seccomp filters)
which can only drop privileges. Moreover, a Landlock rule could come
from outside a process (e.g.  passed through a UNIX socket). It is then
useful to differentiate the creation/load of Landlock eBPF programs via
bpf(2), from rule enforcement via seccomp(2).


## Why a new LSM? Are SELinux, AppArmor, Smack and Tomoyo not good
   enough?

The current access control LSMs are fine for their purpose which is to
give the *root* the ability to enforce a security policy for the
*system*. What is missing is a way to enforce a security policy for any
application by its developer and *unprivileged user* as seccomp can do
for raw syscall filtering.

Differences from other (access control) LSMs:
* not only dedicated to administrators (i.e. no_new_priv);
* limited kernel attack surface (e.g. policy parsing);
* constrained policy rules (no DoS: deterministic execution time);
* do not leak more information than the loader process can legitimately
  have access to (minimize metadata inference).


# Changes since v8

* fit with the new LSM stacking framework (security blobs were tested
  but are not use in this series because of the code reduction)
* remove the Landlock program chaining and the file path evaluation
  feature to get a minimum viable product and ease the review
* replace the example with a simple blacklist policy
* rebase on bpf-next


# Changes since v7

* major revamp of the file system enforcement:
  * new eBPF map dedicated to tie an inode with an arbitrary 64-bits
    value, which can be used to tag files
  * three new Landlock hooks: FS_WALK, FS_PICK and FS_GET
  * add the ability to chain Landlock programs
  * add a new eBPF map type to compare inodes
  * don't use macros anymore
* replace subtype fields:
  * triggers: fine-grained bitfiel of actions on which a Landlock
    program may be called (if it comes from a sandbox process)
  * previous: a parent chained program
* upstreamed patches:
  * commit 369130b63178 ("selftests: Enhance kselftest_harness.h to
    print which assert failed")


# Changes since v6

* upstreamed patches:
  * commit 752ba56fb130 ("bpf: Extend check_uarg_tail_zero() checks")
  * commit 0b40808a1084 ("selftests: Make test_harness.h more generally
    available") and related ones
  * commit 3bb857e47e49 ("LSM: Enable multiple calls to
    security_add_hooks() for the same LSM")
* simplify the landlock_context (remove syscall_* fields) and add three
  FS sub-events: IOCTL, LOCK, FCNTL
* minimize the number of callable BPF functions from a Landlock rule
* do not split put_seccomp_filter() with put_seccomp()
* rename Landlock version to Landlock ABI
* miscellaneous fixes
* rebase on net-next


# Changes since v5

* eBPF program subtype:
  * use a prog_subtype pointer instead of inlining it into bpf_attr
  * enable a future-proof behavior (reject unhandled data/size)
  * add tests
* use a simple rule hierarchy (similar to seccomp-bpf)
* add a ptrace scope protection
* add more tests
* add more documentation
* rename some files
* miscellaneous fixes
* rebase on net-next


# Changes since v4

* upstreamed patches:
  * commit d498f8719a09 ("bpf: Rebuild bpf.o for any dependency update")
  * commit a734fb5d6006 ("samples/bpf: Reset global variables") and
    related ones
  * commit f4874d01beba ("bpf: Use bpf_create_map() from the library")
    and related ones
  * commit d02d8986a768 ("bpf: Always test unprivileged programs")
  * commit 640eb7e7b524 ("fs: Constify path_is_under()'s arguments")
  * commit 535e7b4b5ef2 ("bpf: Use u64_to_user_ptr()")
* revamp Landlock to not expose an LSM hook interface but wrap and
  abstract them with Landlock events (currently one for all filesystem
  related operations: LANDLOCK_SUBTYPE_EVENT_FS)
* wrap all filesystem kernel objects through the same FS handle (struct
  landlock_handle_fs): struct file, struct inode, struct path and struct
  dentry
* a rule don't return an errno code but only a boolean to allow or deny
  an access request
* handle all filesystem related LSM hooks
* add some tests and a sample:
  * BPF context tests
  * Landlock sandboxing tests and sample
  * write Landlock rules in C and compile them with LLVM
* change field names of eBPF program subtype
* remove arraymap of handles for now (will be replaced with a revamped
  map)
* remove cgroup handling for now
* add user and kernel documentation
* rebase on net-next


# Changes since v3

* upstreamed patch:
  * commit 1955351da41c ("bpf: Set register type according to
    is_valid_access()")
* use abstract LSM hook arguments with custom types (e.g.
  *_LANDLOCK_ARG_FS for struct file, struct inode and struct path)
* add more LSM hooks to support full filesystem access control
* improve the sandbox example
* fix races and RCU issues:
  * eBPF program execution and eBPF helpers
  * revamp the arraymap of handles to cleanly deal with update/delete
* eBPF program subtype for Landlock:
  * remove the "origin" field
  * add an "option" field
* rebase onto Daniel Mack's patches v7 [3]
* remove merged commit 1955351da41c ("bpf: Set register type according
  to is_valid_access()")
* fix spelling mistakes
* cleanup some type and variable names
* split patches
* for now, remove cgroup delegation handling for unprivileged user
* remove extra access check for cgroup_get_from_fd()
* remove unused example code dealing with skb
* remove seccomp-bpf link:
  * no more seccomp cookie
  * for now, it is no more possible to check the current syscall
    properties


# Changes since v2

* revamp cgroup handling:
  * use Daniel Mack's patches "Add eBPF hooks for cgroups" v5
  * remove bpf_landlock_cmp_cgroup_beneath()
  * make BPF_PROG_ATTACH usable with delegated cgroups
  * add a new CGRP_NO_NEW_PRIVS flag for safe cgroups
  * handle Landlock sandboxing for cgroups hierarchy
  * allow unprivileged processes to attach Landlock eBPF program to
    cgroups
* add subtype to eBPF programs:
  * replace Landlock hook identification by custom eBPF program types
    with a dedicated subtype field
  * manage fine-grained privileged Landlock programs
  * register Landlock programs for dedicated trigger origins (e.g.
    syscall, return from seccomp filter and/or interruption)
* performance and memory optimizations: use an array to access Landlock
  hooks directly but do not duplicated it for each thread
  (seccomp-based)
* allow running Landlock programs without seccomp filter
* fix seccomp-related issues
* remove extra errno bounding check for Landlock programs
* add some examples for optional eBPF functions or context access
  (network related) according to security checks to allow more features
  for privileged programs (e.g. Checmate)


# Changes since v1

* focus on the LSM hooks, not the syscalls:
  * much more simple implementation
  * does not need audit cache tricks to avoid race conditions
  * more simple to use and more generic because using the LSM hook
    abstraction directly
  * more efficient because only checking in LSM hooks
  * architecture agnostic
* switch from cBPF to eBPF:
  * new eBPF program types dedicated to Landlock
  * custom functions used by the eBPF program
  * gain some new features (e.g. 10 registers, can load values of
    different size, LLVM translator) but only a few functions allowed
    and a dedicated map type
  * new context: LSM hook ID, cookie and LSM hook arguments
  * need to set the sysctl kernel.unprivileged_bpf_disable to 0 (default
    value) to be able to load hook filters as unprivileged users
* smaller and simpler:
  * no more checker groups but dedicated arraymap of handles
  * simpler userland structs thanks to eBPF functions
* distinctive name: Landlock


[1] https://lore.kernel.org/lkml/20180227004121.3633-1-mic@digikod.net/
[2] https://lore.kernel.org/lkml/5828776A.1010104@digikod.net/
[3] https://lore.kernel.org/netdev/1477390454-12553-1-git-send-email-daniel@zonque.org/
[4] https://lore.kernel.org/lkml/50db058a-7dde-441b-a7f9-f6837fe8b69f@schaufler-ca.com/
[5] https://lore.kernel.org/lkml/20190520133305.11925-1-cyphar@cyphar.com/

Regards,

Mickaël Salaün (10):
  fs,security: Add a new file access type: MAY_CHROOT
  bpf: Add eBPF program subtype and is_valid_subtype() verifier
  bpf,landlock: Define an eBPF program type for Landlock hooks
  seccomp,landlock: Enforce Landlock programs per process hierarchy
  bpf,landlock: Add a new map type: inode
  landlock: Handle filesystem access control
  landlock: Add ptrace restrictions
  bpf: Add a Landlock sandbox example
  bpf,landlock: Add tests for Landlock
  landlock: Add user and kernel documentation for Landlock

 Documentation/security/index.rst              |   1 +
 Documentation/security/landlock/index.rst     |  20 +
 Documentation/security/landlock/kernel.rst    |  99 +++
 Documentation/security/landlock/user.rst      | 148 +++++
 MAINTAINERS                                   |  13 +
 fs/open.c                                     |   3 +-
 include/linux/bpf.h                           |  17 +
 include/linux/bpf_types.h                     |   6 +
 include/linux/fs.h                            |   1 +
 include/linux/landlock.h                      |  34 ++
 include/linux/lsm_hooks.h                     |   1 +
 include/linux/seccomp.h                       |   5 +
 include/uapi/linux/bpf.h                      |  22 +-
 include/uapi/linux/landlock.h                 | 109 ++++
 include/uapi/linux/seccomp.h                  |   1 +
 kernel/bpf/Makefile                           |   3 +
 kernel/bpf/core.c                             |   3 +
 kernel/bpf/inodemap.c                         | 315 ++++++++++
 kernel/bpf/syscall.c                          |  59 +-
 kernel/bpf/verifier.c                         |  25 +
 kernel/fork.c                                 |   8 +-
 kernel/seccomp.c                              |   4 +
 net/core/filter.c                             |  25 +-
 samples/bpf/.gitignore                        |   1 +
 samples/bpf/Makefile                          |   3 +
 samples/bpf/bpf_load.c                        |  76 ++-
 samples/bpf/bpf_load.h                        |   7 +
 samples/bpf/landlock1.h                       |   8 +
 samples/bpf/landlock1_kern.c                  | 104 ++++
 samples/bpf/landlock1_user.c                  | 157 +++++
 security/Kconfig                              |   1 +
 security/Makefile                             |   2 +
 security/landlock/Kconfig                     |  18 +
 security/landlock/Makefile                    |   5 +
 security/landlock/common.h                    |  81 +++
 security/landlock/enforce.c                   | 272 +++++++++
 security/landlock/enforce.h                   |  18 +
 security/landlock/enforce_seccomp.c           |  92 +++
 security/landlock/hooks.c                     |  95 +++
 security/landlock/hooks.h                     |  31 +
 security/landlock/hooks_fs.c                  | 568 ++++++++++++++++++
 security/landlock/hooks_fs.h                  |  31 +
 security/landlock/hooks_ptrace.c              | 121 ++++
 security/landlock/hooks_ptrace.h              |   8 +
 security/landlock/init.c                      | 159 +++++
 security/security.c                           |  15 +
 tools/include/uapi/linux/bpf.h                |  22 +-
 tools/include/uapi/linux/landlock.h           | 109 ++++
 tools/lib/bpf/bpf.c                           |  10 +-
 tools/lib/bpf/bpf.h                           |   2 +
 tools/lib/bpf/libbpf.c                        |   1 +
 tools/lib/bpf/libbpf.map                      |   1 +
 tools/lib/bpf/libbpf_probes.c                 |   2 +
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/bpf/bpf_helpers.h     |   2 +
 tools/testing/selftests/bpf/test_verifier.c   |  27 +-
 .../testing/selftests/bpf/verifier/landlock.c |  35 ++
 .../testing/selftests/bpf/verifier/subtype.c  |  20 +
 tools/testing/selftests/landlock/.gitignore   |   4 +
 tools/testing/selftests/landlock/Makefile     |  39 ++
 tools/testing/selftests/landlock/test.h       |  48 ++
 tools/testing/selftests/landlock/test_base.c  |  24 +
 tools/testing/selftests/landlock/test_fs.c    | 257 ++++++++
 .../testing/selftests/landlock/test_ptrace.c  | 154 +++++
 64 files changed, 3528 insertions(+), 25 deletions(-)
 create mode 100644 Documentation/security/landlock/index.rst
 create mode 100644 Documentation/security/landlock/kernel.rst
 create mode 100644 Documentation/security/landlock/user.rst
 create mode 100644 include/linux/landlock.h
 create mode 100644 include/uapi/linux/landlock.h
 create mode 100644 kernel/bpf/inodemap.c
 create mode 100644 samples/bpf/landlock1.h
 create mode 100644 samples/bpf/landlock1_kern.c
 create mode 100644 samples/bpf/landlock1_user.c
 create mode 100644 security/landlock/Kconfig
 create mode 100644 security/landlock/Makefile
 create mode 100644 security/landlock/common.h
 create mode 100644 security/landlock/enforce.c
 create mode 100644 security/landlock/enforce.h
 create mode 100644 security/landlock/enforce_seccomp.c
 create mode 100644 security/landlock/hooks.c
 create mode 100644 security/landlock/hooks.h
 create mode 100644 security/landlock/hooks_fs.c
 create mode 100644 security/landlock/hooks_fs.h
 create mode 100644 security/landlock/hooks_ptrace.c
 create mode 100644 security/landlock/hooks_ptrace.h
 create mode 100644 security/landlock/init.c
 create mode 100644 tools/include/uapi/linux/landlock.h
 create mode 100644 tools/testing/selftests/bpf/verifier/landlock.c
 create mode 100644 tools/testing/selftests/bpf/verifier/subtype.c
 create mode 100644 tools/testing/selftests/landlock/.gitignore
 create mode 100644 tools/testing/selftests/landlock/Makefile
 create mode 100644 tools/testing/selftests/landlock/test.h
 create mode 100644 tools/testing/selftests/landlock/test_base.c
 create mode 100644 tools/testing/selftests/landlock/test_fs.c
 create mode 100644 tools/testing/selftests/landlock/test_ptrace.c

-- 
2.20.1

^ permalink raw reply

* Re: Detecting the availability of VSYSCALL
From: Andy Lutomirski @ 2019-06-25 21:49 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Andy Lutomirski, Thomas Gleixner, Linux API, Kernel Hardening,
	linux-x86_64, linux-arch, Kees Cook, Carlos O'Donell, X86 ML
In-Reply-To: <87a7e5v1d9.fsf@oldenburg2.str.redhat.com>

On Tue, Jun 25, 2019 at 1:47 PM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Andy Lutomirski:
>
> >> We want binaries that run fast on VSYSCALL kernels, but can fall back to
> >> full system calls on kernels that do not have them (instead of
> >> crashing).
> >
> > Define "VSYSCALL kernels."  On any remotely recent kernel (*all* new
> > kernels and all kernels for the last several years that haven't
> > specifically requested vsyscall=native), using vsyscalls is much, much
> > slower than just doing syscalls.  I know a way you can tell whether
> > vsyscalls are fast, but it's unreliable, and I'm disinclined to
> > suggest it.  There are also at least two pending patch series that
> > will interfere.
>
> The fast path is for the benefit of the 2.6.32-based kernel in Red Hat
> Enterprise Linux 6.  It doesn't have the vsyscall emulation code yet, I
> think.
>
> My hope is to produce (statically linked) binaries that run as fast on
> that kernel as they run today, but can gracefully fall back to something
> else on kernels without vsyscall support.
>
> >> We could parse the vDSO and prefer the functions found there, but this
> >> is for the statically linked case.  We currently do not have a (minimal)
> >> dynamic loader there in that version of the code base, so that doesn't
> >> really work for us.
> >
> > Is anything preventing you from adding a vDSO parser?  I wrote one
> > just for this type of use:
> >
> > $ wc -l tools/testing/selftests/vDSO/parse_vdso.c
> > 269 tools/testing/selftests/vDSO/parse_vdso.c
> >
> > (289 lines includes quite a bit of comment.)
>
> I'm worried that if I use a custom parser and the binaries start
> crashing again because something changed in the kernel (within the scope
> permitted by the ELF specification), the kernel won't be fixed.
>
> That is, we'd be in exactly the same situation as today.

With my maintainer hat on, the kernel won't do that.  Obviously a
review of my parser would be appreciated, but I consider it to be
fully supported, just like glibc and musl's parsers are fully
supported.  Sadly, I *also* consider the version Go forked for a while
(now fixed) to be supported.  Sigh.

^ permalink raw reply

* Re: Detecting the availability of VSYSCALL
From: Florian Weimer @ 2019-06-25 20:47 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Linux API, Kernel Hardening, linux-x86_64,
	linux-arch, Kees Cook, Carlos O'Donell, X86 ML
In-Reply-To: <CALCETrVh1f5wJNMbMoVqY=bq-7G=uQ84BUkepf5RksA3vUopNQ@mail.gmail.com>

* Andy Lutomirski:

>> We want binaries that run fast on VSYSCALL kernels, but can fall back to
>> full system calls on kernels that do not have them (instead of
>> crashing).
>
> Define "VSYSCALL kernels."  On any remotely recent kernel (*all* new
> kernels and all kernels for the last several years that haven't
> specifically requested vsyscall=native), using vsyscalls is much, much
> slower than just doing syscalls.  I know a way you can tell whether
> vsyscalls are fast, but it's unreliable, and I'm disinclined to
> suggest it.  There are also at least two pending patch series that
> will interfere.

The fast path is for the benefit of the 2.6.32-based kernel in Red Hat
Enterprise Linux 6.  It doesn't have the vsyscall emulation code yet, I
think.

My hope is to produce (statically linked) binaries that run as fast on
that kernel as they run today, but can gracefully fall back to something
else on kernels without vsyscall support.

>> We could parse the vDSO and prefer the functions found there, but this
>> is for the statically linked case.  We currently do not have a (minimal)
>> dynamic loader there in that version of the code base, so that doesn't
>> really work for us.
>
> Is anything preventing you from adding a vDSO parser?  I wrote one
> just for this type of use:
>
> $ wc -l tools/testing/selftests/vDSO/parse_vdso.c
> 269 tools/testing/selftests/vDSO/parse_vdso.c
>
> (289 lines includes quite a bit of comment.)

I'm worried that if I use a custom parser and the binaries start
crashing again because something changed in the kernel (within the scope
permitted by the ELF specification), the kernel won't be fixed.

That is, we'd be in exactly the same situation as today.

Thanks,
Florian

^ permalink raw reply

* Re: Detecting the availability of VSYSCALL
From: Andy Lutomirski @ 2019-06-25 20:13 UTC (permalink / raw)
  To: Kees Cook
  Cc: Florian Weimer, Linux API, Kernel Hardening, linux-x86_64,
	linux-arch, Andy Lutomirski, Carlos O'Donell
In-Reply-To: <201906251131.419D8ACB@keescook>

On Tue, Jun 25, 2019 at 1:08 PM Kees Cook <keescook@chromium.org> wrote:
>
> On Tue, Jun 25, 2019 at 05:15:27PM +0200, Florian Weimer wrote:
> > Should we try mapping something at the magic address (without MAP_FIXED)
> > and see if we get back a different address?  Something in the auxiliary
> > vector would work for us, too, but nothing seems to exists there
> > unfortunately.
>
> It seems like mmap() won't even work because it's in the high memory
> area. I can't map something a page under the vsyscall page either, so I
> can't distinguish it with mmap, mprotect, madvise, or msync. :(
>

I keep contemplating making munmap() work on it.  That would nicely
answer the question: if munmap() fails, it's not there, and, if
munmap() succeeds, it's not there :)

^ permalink raw reply

* Re: Detecting the availability of VSYSCALL
From: Andy Lutomirski @ 2019-06-25 20:11 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Thomas Gleixner, Linux API, Kernel Hardening, linux-x86_64,
	linux-arch, Andy Lutomirski, Kees Cook, Carlos O'Donell,
	X86 ML
In-Reply-To: <87lfxpy614.fsf@oldenburg2.str.redhat.com>

On Tue, Jun 25, 2019 at 9:39 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Thomas Gleixner:
>
> > On Tue, 25 Jun 2019, Florian Weimer wrote:
> >> We're trying to create portable binaries which use VSYSCALL on older
> >> kernels (to avoid performance regressions), but gracefully degrade to
> >> full system calls on kernels which do not have VSYSCALL support compiled
> >> in (or disabled at boot).
> >>
> >> For technical reasons, we cannot use vDSO fallback.  Trying vDSO first
> >> and only then use VSYSCALL is the way this has been tackled in the past,
> >> which is why this userspace ABI breakage goes generally unnoticed.  But
> >> we don't have a dynamic linker in our scenario.
> >
> > I'm not following. On newer kernels which usually have vsyscall disabled
> > you need to use real syscalls anyway, so why are you so worried about
> > performance on older kernels. That doesn't make sense.
>
> We want binaries that run fast on VSYSCALL kernels, but can fall back to
> full system calls on kernels that do not have them (instead of
> crashing).

Define "VSYSCALL kernels."  On any remotely recent kernel (*all* new
kernels and all kernels for the last several years that haven't
specifically requested vsyscall=native), using vsyscalls is much, much
slower than just doing syscalls.  I know a way you can tell whether
vsyscalls are fast, but it's unreliable, and I'm disinclined to
suggest it.  There are also at least two pending patch series that
will interfere.

>
> We could parse the vDSO and prefer the functions found there, but this
> is for the statically linked case.  We currently do not have a (minimal)
> dynamic loader there in that version of the code base, so that doesn't
> really work for us.

Is anything preventing you from adding a vDSO parser?  I wrote one
just for this type of use:

$ wc -l tools/testing/selftests/vDSO/parse_vdso.c
269 tools/testing/selftests/vDSO/parse_vdso.c

(289 lines includes quite a bit of comment.)


$ head -n8 tools/testing/selftests/vDSO/parse_vdso.c
/*
 * parse_vdso.c: Linux reference vDSO parser
 * Written by Andrew Lutomirski, 2011-2014.
 *
 * This code is meant to be linked in to various programs that run on Linux.
 * As such, it is available with as few restrictions as possible.  This file
 * is licensed under the Creative Commons Zero License, version 1.0,
 * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode

If this license is too restrictive for you, I could probably be
convinced to relicense it, I'd be surprised :)  In hindsight, I kind
of wish I'd used MIT instead, since the Go runtime took advantage of
the CC0 license to import it without attribution *and* break it quite
badly in the process.

IMO the correct solution is to parse the vDSO and, if that fails, to
use plain syscalls as a fallback.  You should not ship anything that
uses a vsyscall under any circumstances, unless you need the last
ounce of performance on that one ancient version of OpenSuSE that
crashes if the vDSO is enabled.

^ permalink raw reply

* Re: Detecting the availability of VSYSCALL
From: Kees Cook @ 2019-06-25 20:08 UTC (permalink / raw)
  To: Florian Weimer
  Cc: linux-api, kernel-hardening, linux-x86_64, linux-arch,
	Andy Lutomirski, Carlos O'Donell
In-Reply-To: <87v9wty9v4.fsf@oldenburg2.str.redhat.com>

On Tue, Jun 25, 2019 at 05:15:27PM +0200, Florian Weimer wrote:
> Should we try mapping something at the magic address (without MAP_FIXED)
> and see if we get back a different address?  Something in the auxiliary
> vector would work for us, too, but nothing seems to exists there
> unfortunately.

It seems like mmap() won't even work because it's in the high memory
area. I can't map something a page under the vsyscall page either, so I
can't distinguish it with mmap, mprotect, madvise, or msync. :(

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH v5 16/16] f2fs: add fs-verity support
From: Eric Biggers @ 2019-06-25 17:52 UTC (permalink / raw)
  To: Chao Yu
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, Jaegeuk Kim,
	linux-integrity, linux-ext4, Linus Torvalds, Christoph Hellwig,
	Victor Hsieh
In-Reply-To: <90495fb1-72eb-ca42-8457-ef8e969eda51@huawei.com>

Hi Chao, thanks for the review.

On Tue, Jun 25, 2019 at 03:55:57PM +0800, Chao Yu wrote:
> Hi Eric,
> 
> On 2019/6/21 4:50, Eric Biggers wrote:
> > +static int f2fs_begin_enable_verity(struct file *filp)
> > +{
> > +	struct inode *inode = file_inode(filp);
> > +	int err;
> > +
> 
> I think we'd better add condition here (under inode lock) to disallow enabling
> verity on atomic/volatile inode, as we may fail to write merkle tree data due to
> atomic/volatile inode's special writeback method.
> 

Yes, I'll add the following:

	if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
		return -EOPNOTSUPP;

> > +	err = f2fs_convert_inline_inode(inode);
> > +	if (err)
> > +		return err;
> > +
> > +	err = dquot_initialize(inode);
> > +	if (err)
> > +		return err;
> 
> We can get rid of dquot_initialize() here, since f2fs_file_open() ->
> dquot_file_open() should has initialized quota entry previously, right?

We still need it because dquot_file_open() only calls dquot_initialize() if the
file is being opened for writing.  But here the file descriptor is readonly.
I'll add a comment explaining this here and in the ext4 equivalent.

- Eric

^ permalink raw reply

* Re: Detecting the availability of VSYSCALL
From: Florian Weimer @ 2019-06-25 16:38 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-api, kernel-hardening, linux-x86_64, linux-arch,
	Andy Lutomirski, Kees Cook, Carlos O'Donell, x86
In-Reply-To: <alpine.DEB.2.21.1906251824500.32342@nanos.tec.linutronix.de>

* Thomas Gleixner:

> On Tue, 25 Jun 2019, Florian Weimer wrote:
>> We're trying to create portable binaries which use VSYSCALL on older
>> kernels (to avoid performance regressions), but gracefully degrade to
>> full system calls on kernels which do not have VSYSCALL support compiled
>> in (or disabled at boot).
>>
>> For technical reasons, we cannot use vDSO fallback.  Trying vDSO first
>> and only then use VSYSCALL is the way this has been tackled in the past,
>> which is why this userspace ABI breakage goes generally unnoticed.  But
>> we don't have a dynamic linker in our scenario.
>
> I'm not following. On newer kernels which usually have vsyscall disabled
> you need to use real syscalls anyway, so why are you so worried about
> performance on older kernels. That doesn't make sense.

We want binaries that run fast on VSYSCALL kernels, but can fall back to
full system calls on kernels that do not have them (instead of
crashing).

We could parse the vDSO and prefer the functions found there, but this
is for the statically linked case.  We currently do not have a (minimal)
dynamic loader there in that version of the code base, so that doesn't
really work for us.

>> Is there any reliable way to detect that VSYSCALL is unavailable,
>> without resorting to parsing /proc/self/maps or opening file
>> descriptors?
>
> Not that I'm aware of except
>
>     sigaction(SIG_SEGV,....)
>
> /me hides

I know people do this for SIGILL to probe for CPU features, but yeah,
let's just not go there. 8-p

Thanks,
Florian

^ permalink raw reply

* Re: Detecting the availability of VSYSCALL
From: Thomas Gleixner @ 2019-06-25 16:30 UTC (permalink / raw)
  To: Florian Weimer
  Cc: linux-api, kernel-hardening, linux-x86_64, linux-arch,
	Andy Lutomirski, Kees Cook, Carlos O'Donell, x86
In-Reply-To: <87v9wty9v4.fsf@oldenburg2.str.redhat.com>

On Tue, 25 Jun 2019, Florian Weimer wrote:
> We're trying to create portable binaries which use VSYSCALL on older
> kernels (to avoid performance regressions), but gracefully degrade to
> full system calls on kernels which do not have VSYSCALL support compiled
> in (or disabled at boot).
>
> For technical reasons, we cannot use vDSO fallback.  Trying vDSO first
> and only then use VSYSCALL is the way this has been tackled in the past,
> which is why this userspace ABI breakage goes generally unnoticed.  But
> we don't have a dynamic linker in our scenario.

I'm not following. On newer kernels which usually have vsyscall disabled
you need to use real syscalls anyway, so why are you so worried about
performance on older kernels. That doesn't make sense.

> Is there any reliable way to detect that VSYSCALL is unavailable,
> without resorting to parsing /proc/self/maps or opening file
> descriptors?

Not that I'm aware of except

    sigaction(SIG_SEGV,....)

/me hides
 
> Should we try mapping something at the magic address (without MAP_FIXED)
> and see if we get back a different address?  Something in the auxiliary
> vector would work for us, too, but nothing seems to exists there
> unfortunately.

Would, but there is no such thing.

Thanks,

	tglx

^ permalink raw reply

* Detecting the availability of VSYSCALL
From: Florian Weimer @ 2019-06-25 15:15 UTC (permalink / raw)
  To: linux-api, kernel-hardening, linux-x86_64, linux-arch
  Cc: Andy Lutomirski, Kees Cook, Carlos O'Donell

We're trying to create portable binaries which use VSYSCALL on older
kernels (to avoid performance regressions), but gracefully degrade to
full system calls on kernels which do not have VSYSCALL support compiled
in (or disabled at boot).

For technical reasons, we cannot use vDSO fallback.  Trying vDSO first
and only then use VSYSCALL is the way this has been tackled in the past,
which is why this userspace ABI breakage goes generally unnoticed.  But
we don't have a dynamic linker in our scenario.

Is there any reliable way to detect that VSYSCALL is unavailable,
without resorting to parsing /proc/self/maps or opening file
descriptors?

Should we try mapping something at the magic address (without MAP_FIXED)
and see if we get back a different address?  Something in the auxiliary
vector would work for us, too, but nothing seems to exists there
unfortunately.

Thanks,
Florian

^ permalink raw reply

* Re: [PATCH V34 28/29] efi: Restrict efivar_ssdt_load when the kernel is locked down
From: Ard Biesheuvel @ 2019-06-25 15:00 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: James Morris, linux-security-module, Linux Kernel Mailing List,
	linux-api, Matthew Garrett, linux-efi
In-Reply-To: <20190622000358.19895-29-matthewgarrett@google.com>

On Sat, 22 Jun 2019 at 02:05, Matthew Garrett <matthewgarrett@google.com> wrote:
>
> efivar_ssdt_load allows the kernel to import arbitrary ACPI code from an
> EFI variable, which gives arbitrary code execution in ring 0. Prevent
> that when the kernel is locked down.
>
> Signed-off-by: Matthew Garrett <mjg59@google.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: linux-efi@vger.kernel.org

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> ---
>  drivers/firmware/efi/efi.c | 6 ++++++
>  1 file changed, 6 insertions(+)
>
> diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
> index 55b77c576c42..9f92a013ab27 100644
> --- a/drivers/firmware/efi/efi.c
> +++ b/drivers/firmware/efi/efi.c
> @@ -31,6 +31,7 @@
>  #include <linux/acpi.h>
>  #include <linux/ucs2_string.h>
>  #include <linux/memblock.h>
> +#include <linux/security.h>
>
>  #include <asm/early_ioremap.h>
>
> @@ -242,6 +243,11 @@ static void generic_ops_unregister(void)
>  static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
>  static int __init efivar_ssdt_setup(char *str)
>  {
> +       int ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
> +
> +       if (ret)
> +               return ret;
> +
>         if (strlen(str) < sizeof(efivar_ssdt))
>                 memcpy(efivar_ssdt, str, strlen(str));
>         else
> --
> 2.22.0.410.gd8fdbe21b5-goog
>

^ permalink raw reply

* Re: [RFC PATCH 1/1] Revert "rseq/selftests: arm: use udf instruction for RSEQ_SIG"
From: Mathieu Desnoyers @ 2019-06-25 14:08 UTC (permalink / raw)
  To: Will Deacon
  Cc: shuah, linux-kernel, Peter Zijlstra, Thomas Gleixner,
	Joel Fernandes, Catalin Marinas, Dave Watson, Andi Kleen,
	linux-kselftest, H. Peter Anvin, Chris Lameter, Russell King,
	Michael Kerrisk, Paul E . McKenney, Paul Turner, Boqun Feng,
	Josh Triplett, rostedt, Ben Maurer, linux-api, Andy
In-Reply-To: <20190625091507.GA13263@fuggles.cambridge.arm.com>

----- On Jun 25, 2019, at 5:15 AM, Will Deacon will.deacon@arm.com wrote:

> On Mon, Jun 24, 2019 at 02:20:26PM -0400, Mathieu Desnoyers wrote:
>> ----- On Jun 24, 2019, at 1:24 PM, Will Deacon will.deacon@arm.com wrote:
>> 
>> > On Mon, Jun 17, 2019 at 05:23:04PM +0200, Mathieu Desnoyers wrote:
>> >> This reverts commit 2b845d4b4acd9422bbb668989db8dc36dfc8f438.
>> >> 
>> >> That commit introduces build issues for programs compiled in Thumb mode.
>> >> Rather than try to be clever and emit a valid trap instruction on arm32,
>> >> which requires special care about big/little endian handling on that
>> >> architecture, just emit plain data. Data in the instruction stream is
>> >> technically expected on arm32: this is how literal pools are
>> >> implemented. Reverting to the prior behavior does exactly that.
>> >> 
>> >> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> >> CC: Peter Zijlstra <peterz@infradead.org>
>> >> CC: Thomas Gleixner <tglx@linutronix.de>
>> >> CC: Joel Fernandes <joelaf@google.com>
>> >> CC: Catalin Marinas <catalin.marinas@arm.com>
>> >> CC: Dave Watson <davejwatson@fb.com>
>> >> CC: Will Deacon <will.deacon@arm.com>
>> >> CC: Shuah Khan <shuah@kernel.org>
>> >> CC: Andi Kleen <andi@firstfloor.org>
>> >> CC: linux-kselftest@vger.kernel.org
>> >> CC: "H . Peter Anvin" <hpa@zytor.com>
>> >> CC: Chris Lameter <cl@linux.com>
>> >> CC: Russell King <linux@arm.linux.org.uk>
>> >> CC: Michael Kerrisk <mtk.manpages@gmail.com>
>> >> CC: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>> >> CC: Paul Turner <pjt@google.com>
>> >> CC: Boqun Feng <boqun.feng@gmail.com>
>> >> CC: Josh Triplett <josh@joshtriplett.org>
>> >> CC: Steven Rostedt <rostedt@goodmis.org>
>> >> CC: Ben Maurer <bmaurer@fb.com>
>> >> CC: linux-api@vger.kernel.org
>> >> CC: Andy Lutomirski <luto@amacapital.net>
>> >> CC: Andrew Morton <akpm@linux-foundation.org>
>> >> CC: Linus Torvalds <torvalds@linux-foundation.org>
>> >> CC: Carlos O'Donell <carlos@redhat.com>
>> >> CC: Florian Weimer <fweimer@redhat.com>
>> >> ---
>> >>  tools/testing/selftests/rseq/rseq-arm.h | 52 ++-------------------------------
>> >>  1 file changed, 2 insertions(+), 50 deletions(-)
>> >> 
>> >> diff --git a/tools/testing/selftests/rseq/rseq-arm.h
>> >> b/tools/testing/selftests/rseq/rseq-arm.h
>> >> index 84f28f147fb6..5f262c54364f 100644
>> >> --- a/tools/testing/selftests/rseq/rseq-arm.h
>> >> +++ b/tools/testing/selftests/rseq/rseq-arm.h
>> >> @@ -5,54 +5,7 @@
>> >>   * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> >>   */
>> >>  
>> >> -/*
>> >> - * RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand
>> >> - * value 0x5de3. This traps if user-space reaches this instruction by mistake,
>> >> - * and the uncommon operand ensures the kernel does not move the instruction
>> >> - * pointer to attacker-controlled code on rseq abort.
>> >> - *
>> >> - * The instruction pattern in the A32 instruction set is:
>> >> - *
>> >> - * e7f5def3    udf    #24035    ; 0x5de3
>> >> - *
>> >> - * This translates to the following instruction pattern in the T16 instruction
>> >> - * set:
>> >> - *
>> >> - * little endian:
>> >> - * def3        udf    #243      ; 0xf3
>> >> - * e7f5        b.n    <7f5>
>> >> - *
>> >> - * pre-ARMv6 big endian code:
>> >> - * e7f5        b.n    <7f5>
>> >> - * def3        udf    #243      ; 0xf3
>> >> - *
>> >> - * ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian
>> >> - * code and big-endian data. Ensure the RSEQ_SIG data signature matches code
>> >> - * endianness. Prior to ARMv6, -mbig-endian generates big-endian code and data
>> >> - * (which match), so there is no need to reverse the endianness of the data
>> >> - * representation of the signature. However, the choice between BE32 and BE8
>> >> - * is done by the linker, so we cannot know whether code and data endianness
>> >> - * will be mixed before the linker is invoked.
>> >> - */
>> >> -
>> >> -#define RSEQ_SIG_CODE	0xe7f5def3
>> >> -
>> >> -#ifndef __ASSEMBLER__
>> >> -
>> >> -#define RSEQ_SIG_DATA							\
>> >> -	({								\
>> >> -		int sig;						\
>> >> -		asm volatile ("b 2f\n\t"				\
>> >> -			      "1: .inst " __rseq_str(RSEQ_SIG_CODE) "\n\t" \
>> >> -			      "2:\n\t"					\
>> >> -			      "ldr %[sig], 1b\n\t"			\
>> >> -			      : [sig] "=r" (sig));			\
>> >> -		sig;							\
>> >> -	})
>> >> -
>> >> -#define RSEQ_SIG	RSEQ_SIG_DATA
>> >> -
>> >> -#endif
>> >> +#define RSEQ_SIG	0x53053053
>> > 
>> > I don't get why you're reverting back to this old signature value, when the
>> > one we came up with will work well when interpreted as an instruction in the
>> > *vast* majority of scenarios that people care about (A32/T32 little-endian).
>> > I think you might be under-estimating just how dead things like BE32 really
>> > are.
>> 
>> My issue is that the current .instr approach is broken for programs or functions
>> built in Thumb mode, and I received no feedback on the solutions I proposed for
>> those issues, which led me to propose a patch reverting to a simple .word.
> 
> I understand why you're moving from .inst to .word, but I don't understand
> why that necessitates a change in the value. Why not .word 0xe7f5def3 ? You
> could also flip the bytes around in case of big-endian, which would keep the
> instruction coding clean for BE8.

As long as we state and document that this should not be expected to generate
valid instructions on big endian prior to ARMv6, I'm OK with that approach, e.g.:

/*
 * - ARM little endian
 *
 * RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand
 * value 0x5de3. This traps if user-space reaches this instruction by mistake,
 * and the uncommon operand ensures the kernel does not move the instruction
 * pointer to attacker-controlled code on rseq abort.
 *
 * The instruction pattern in the A32 instruction set is:
 *
 * e7f5def3    udf    #24035    ; 0x5de3
 *
 * This translates to the following instruction pattern in the T16 instruction
 * set:
 *
 * little endian:
 * def3        udf    #243      ; 0xf3
 * e7f5        b.n    <7f5>
 *
 * - ARMv6+ big endian:
 *
 * ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian
 * code and big-endian data. The data value of the signature needs to have its
 * byte order reversed to generate the trap instruction:
 *
 * Data: 0xf3def5e7
 *
 * Translates to this A32 instruction pattern:
 *
 * e7f5def3    udf    #24035    ; 0x5de3
 *
 * Translates to this T16 instruction pattern:
 *
 * def3        udf    #243      ; 0xf3
 * e7f5        b.n    <7f5>
 *
 * - Prior to ARMv6 big endian:
 *
 * Prior to ARMv6, -mbig-endian generates big-endian code and data (which match),
 * so the endianness of the data representation of the signature should not be
 * reversed. However, the choice between BE32 and BE8 is done by the linker,
 * so we cannot know whether code and data endianness will be mixed before the
 * linker is invoked. So rather than try to play tricks with the linker, the rseq
 * signature is simply data (not a trap instruction) prior to ARMv6 on big endian.
 * This is why the signature is expressed as data (.word) rather than as instruction
 * (.inst) in assembler.
 */

#ifdef __ARMEB__
#define RSEQ_SIG    0xf3def5e7      /* udf    #24035    ; 0x5de3 (ARMv6+) */
#else
#define RSEQ_SIG    0xe7f5def3      /* udf    #24035    ; 0x5de3 */
#endif

> 
>> > That said, when you ran into .inst.n/.inst.w issues, did you try something
>> > along the lines of the WASM() macro we use in arch/arm/, which adds the ".w"
>> > suffix when targetting Thumb?
>> 
>> AFAIU, the WASM macros depend on CONFIG_THUMB2_KERNEL, which may be fine within
>> the kernel, but for user-space things are a bit more complex.
>> 
>> A compile-unit can be compiled as thumb, which will set a compiler define
>> which we could use to detect thumb mode. However, unfortunately, a single
>> function can also be compiled with an attribute selecting thumb mode, which
>> AFAIU does not influence the preprocessor defines.
> 
> Thanks, I hadn't considered that case. I don't know the right way to handle
> that in the toolchain, so using .word is probably the best bet in the
> absence of any better suggestions from the tools folks.

Emitting a no-op within an excluded section, and using the size of that no-op
to restore the original mode is the best way I found, but I find it rather
tricky and bug-prone, so I would rather prefer the .word approach.

Thoughts ?

Thanks,

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply

* Re: [PATCH 05/25] vfs: Implement parameter value retrieval with fsinfo() [ver #14]
From: Christian Brauner @ 2019-06-25  9:44 UTC (permalink / raw)
  To: David Howells
  Cc: viro, raven, mszeredi, linux-api, linux-fsdevel, linux-kernel
In-Reply-To: <156138537329.25627.5525420330768463735.stgit@warthog.procyon.org.uk>

On Mon, Jun 24, 2019 at 03:09:33PM +0100, David Howells wrote:
> Implement parameter value retrieval with fsinfo() - akin to parsing
> /proc/mounts.
> 
> This allows all the parameters to be retrieved in one go with:
> 
> 	struct fsinfo_params params = {
> 		.request	= FSINFO_ATTR_PARAMETER,
> 	};
> 
> Each parameter comes as a pair of blobs with a length tacked on the front
> rather than using separators, since any printable character that could be
> used as a separator can be found in some value somewhere (including comma).
> In fact, cifs allows the separator to be set using the "sep=" option in
> parameter parsing.
> 
> The length on the front of each blob is 1-3 bytes long.  Each byte has a
> flag in bit 7 that's set if there are more bytes and clear on the last
> byte; bits 0-6 should be shifted and OR'd into the length count.  The bytes
> are most-significant first.
> 
> For example, 0x83 0xf5 0x06 is the length (0x03<<14 | 0x75<<7 | 0x06).

Ok, but that is such a royal pain for userspace. Shouldn't we export a
uapi helper that they can use to parse out the length or even iterate
the string or something?

> 
> As mentioned, each parameter comes as a pair of blobs in key, value order.
> The value has length zero if not present.  So, for example:
> 
> 	\x08compress\x04zlib
> 
> from btrfs would be equivalent to "compress=zlib" and:
> 
> 	\x02ro\x00\x06noexec\x00
> 
> would be equivalent to "ro,noexec".
> 
> The test-fsinfo sample program is modified to dump the parameters.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  fs/fsinfo.c                 |  122 +++++++++++++++++++++++++++++++++++++++++++
>  include/linux/fsinfo.h      |    4 +
>  include/uapi/linux/fsinfo.h |    1 
>  samples/vfs/test-fsinfo.c   |   38 +++++++++++++
>  4 files changed, 165 insertions(+)
> 
> diff --git a/fs/fsinfo.c b/fs/fsinfo.c
> index 9e2a25510b88..3b35cedab0df 100644
> --- a/fs/fsinfo.c
> +++ b/fs/fsinfo.c
> @@ -296,6 +296,32 @@ static int fsinfo_generic_param_enum(struct file_system_type *f,
>  	return sizeof(*p);
>  }
>  
> +void fsinfo_note_sb_params(struct fsinfo_kparams *params, unsigned int s_flags)
> +{
> +	if (s_flags & SB_DIRSYNC)
> +		fsinfo_note_param(params, "dirsync", NULL);
> +	if (s_flags & SB_LAZYTIME)
> +		fsinfo_note_param(params, "lazytime", NULL);
> +	if (s_flags & SB_MANDLOCK)
> +		fsinfo_note_param(params, "mand", NULL);
> +	if (s_flags & SB_POSIXACL)
> +		fsinfo_note_param(params, "posixacl", NULL);
> +	if (s_flags & SB_RDONLY)
> +		fsinfo_note_param(params, "ro", NULL);
> +	else
> +		fsinfo_note_param(params, "rw", NULL);
> +	if (s_flags & SB_SYNCHRONOUS)
> +		fsinfo_note_param(params, "sync", NULL);
> +}
> +EXPORT_SYMBOL(fsinfo_note_sb_params);
> +
> +static int fsinfo_generic_parameters(struct path *path,
> +				     struct fsinfo_kparams *params)
> +{
> +	fsinfo_note_sb_params(params, READ_ONCE(path->dentry->d_sb->s_flags));
> +	return params->usage;
> +}
> +
>  /*
>   * Implement some queries generically from stuff in the superblock.
>   */
> @@ -304,6 +330,7 @@ int generic_fsinfo(struct path *path, struct fsinfo_kparams *params)
>  	struct file_system_type *fs = path->dentry->d_sb->s_type;
>  
>  #define _gen(X, Y) FSINFO_ATTR_##X: return fsinfo_generic_##Y(path, params->buffer)
> +#define _genp(X, Y) FSINFO_ATTR_##X: return fsinfo_generic_##Y(path, params)
>  #define _genf(X, Y) FSINFO_ATTR_##X: return fsinfo_generic_##Y(fs, params)
>  
>  	switch (params->request) {
> @@ -319,6 +346,7 @@ int generic_fsinfo(struct path *path, struct fsinfo_kparams *params)
>  	case _genf(PARAM_DESCRIPTION,	param_description);
>  	case _genf(PARAM_SPECIFICATION,	param_specification);
>  	case _genf(PARAM_ENUM,		param_enum);
> +	case _genp(PARAMETERS,		parameters);
>  	default:
>  		return -EOPNOTSUPP;
>  	}
> @@ -358,8 +386,16 @@ static int vfs_fsinfo(struct path *path, struct fsinfo_kparams *params)
>  		return fsinfo(path, params);
>  
>  	while (!signal_pending(current)) {
> +		if (params->request == FSINFO_ATTR_PARAMETERS) {
> +			if (down_read_killable(&dentry->d_sb->s_umount) < 0)
> +				return -ERESTARTSYS;
> +		}
> +
>  		params->usage = 0;
>  		ret = fsinfo(path, params);
> +		if (params->request == FSINFO_ATTR_PARAMETERS)
> +			up_read(&dentry->d_sb->s_umount);
> +
>  		if (ret <= (int)params->buf_size)
>  			return ret; /* Error or it fitted */
>  		kvfree(params->buffer);
> @@ -529,6 +565,7 @@ static const struct fsinfo_attr_info fsinfo_buffer_info[FSINFO_ATTR__NR] = {
>  	FSINFO_STRUCT		(PARAM_DESCRIPTION,	param_description),
>  	FSINFO_STRUCT_N		(PARAM_SPECIFICATION,	param_specification),
>  	FSINFO_STRUCT_N		(PARAM_ENUM,		param_enum),
> +	FSINFO_OPAQUE		(PARAMETERS,		-),
>  };
>  
>  /**
> @@ -676,3 +713,88 @@ SYSCALL_DEFINE5(fsinfo,
>  error:
>  	return ret;
>  }
> +
> +/*
> + * Store a parameter into the user's parameter buffer.  The key is prefixed by
> + * a single byte length (1-127) and the value by one (0-0x7f) or two bytes
> + * (0x80-0x3fff) or three bytes (0x4000-0x1fffff).
> + *
> + * Note that we must always make the size determination, even if the buffer is
> + * already full, so that we can tell the caller how much buffer we actually
> + * need.
> + */
> +static void __fsinfo_note_param(struct fsinfo_kparams *params, const char *key,
> +				const char *val, unsigned int vlen)
> +{
> +	char *p;
> +	unsigned int usage;
> +	int klen, total, vmeta;
> +	u8 x;
> +
> +	klen = strlen(key);
> +	BUG_ON(klen < 1 || klen > 127);
> +	BUG_ON(vlen > (1 << 21) - 1);
> +	BUG_ON(vlen > 0 && !val);
> +
> +	vmeta = (vlen <= 127) ? 1 : (vlen <= 127 * 127) ? 2 : 3;
> +
> +	total = 1 + klen + vmeta + vlen;
> +
> +	usage = params->usage;
> +	params->usage = usage + total;
> +	if (!params->buffer || params->usage > params->buf_size)
> +		return;
> +
> +	p = params->buffer + usage;
> +	*p++ = klen;
> +	p = memcpy(p, key, klen);
> +	p += klen;
> +
> +	/* The more significant groups of 7 bits in the size are included in
> +	 * most->least order with 0x80 OR'd in.  The least significant 7 bits
> +	 * are last with the top bit clear.
> +	 */
> +	x = vlen >> 14;
> +	if (x & 0x7f)
> +		*p++ = 0x80 | x;
> +
> +	x = vlen >> 7;
> +	if (x & 0x7f)
> +		*p++ = 0x80 | x;
> +
> +	*p++ = vlen & 0x7f;
> +	memcpy(p, val, vlen);
> +}
> +
> +/**
> + * fsinfo_note_param - Store a parameter for FSINFO_ATTR_PARAMETERS
> + * @params: The parameter buffer
> + * @key: The parameter's key
> + * @val: The parameter's value (or NULL)
> + */
> +void fsinfo_note_param(struct fsinfo_kparams *params, const char *key,
> +		       const char *val)
> +{
> +	__fsinfo_note_param(params, key, val, val ? strlen(val) : 0);
> +}
> +EXPORT_SYMBOL(fsinfo_note_param);
> +
> +/**
> + * fsinfo_note_paramf - Store a formatted parameter for FSINFO_ATTR_PARAMETERS
> + * @params: The parameter buffer
> + * @key: The parameter's key
> + * @val_fmt: Format string for the parameter's value
> + */
> +void fsinfo_note_paramf(struct fsinfo_kparams *params, const char *key,
> +			const char *val_fmt, ...)
> +{
> +	va_list va;
> +	int n;
> +
> +	va_start(va, val_fmt);
> +	n = vsnprintf(params->scratch_buffer, 4096, val_fmt, va);
> +	va_end(va);
> +
> +	__fsinfo_note_param(params, key, params->scratch_buffer, n);
> +}
> +EXPORT_SYMBOL(fsinfo_note_paramf);
> diff --git a/include/linux/fsinfo.h b/include/linux/fsinfo.h
> index e17e4f0bae18..731931afbf1c 100644
> --- a/include/linux/fsinfo.h
> +++ b/include/linux/fsinfo.h
> @@ -29,6 +29,10 @@ struct fsinfo_kparams {
>  };
>  
>  extern int generic_fsinfo(struct path *, struct fsinfo_kparams *);
> +extern void fsinfo_note_sb_params(struct fsinfo_kparams *, unsigned int);
> +extern void fsinfo_note_param(struct fsinfo_kparams *, const char *, const char *);
> +extern void fsinfo_note_paramf(struct fsinfo_kparams *, const char *, const char *, ...)
> +	__printf(3, 4);
>  
>  static inline void fsinfo_set_cap(struct fsinfo_capabilities *c,
>  				  enum fsinfo_capability cap)
> diff --git a/include/uapi/linux/fsinfo.h b/include/uapi/linux/fsinfo.h
> index 9d929d2f7eee..475cd1c97b12 100644
> --- a/include/uapi/linux/fsinfo.h
> +++ b/include/uapi/linux/fsinfo.h
> @@ -30,6 +30,7 @@ enum fsinfo_attribute {
>  	FSINFO_ATTR_PARAM_DESCRIPTION	= 12,	/* General fs parameter description */
>  	FSINFO_ATTR_PARAM_SPECIFICATION	= 13,	/* Nth parameter specification */
>  	FSINFO_ATTR_PARAM_ENUM		= 14,	/* Nth enum-to-val */
> +	FSINFO_ATTR_PARAMETERS		= 15,	/* Mount parameters (large string) */
>  	FSINFO_ATTR__NR
>  };
>  
> diff --git a/samples/vfs/test-fsinfo.c b/samples/vfs/test-fsinfo.c
> index 3c6ea3a5c157..8cf5b02e333a 100644
> --- a/samples/vfs/test-fsinfo.c
> +++ b/samples/vfs/test-fsinfo.c
> @@ -81,6 +81,7 @@ static const struct fsinfo_attr_info fsinfo_buffer_info[FSINFO_ATTR__NR] = {
>  	FSINFO_STRUCT		(PARAM_DESCRIPTION,	param_description),
>  	FSINFO_STRUCT_N		(PARAM_SPECIFICATION,	param_specification),
>  	FSINFO_STRUCT_N		(PARAM_ENUM,		param_enum),
> +	FSINFO_OVERLARGE	(PARAMETERS,		-),
>  };
>  
>  #define FSINFO_NAME(X,Y) [FSINFO_ATTR_##X] = #Y
> @@ -100,6 +101,7 @@ static const char *fsinfo_attr_names[FSINFO_ATTR__NR] = {
>  	FSINFO_NAME		(PARAM_DESCRIPTION,	param_description),
>  	FSINFO_NAME		(PARAM_SPECIFICATION,	param_specification),
>  	FSINFO_NAME		(PARAM_ENUM,		param_enum),
> +	FSINFO_NAME		(PARAMETERS,		parameters),
>  };
>  
>  union reply {
> @@ -352,6 +354,34 @@ static void dump_fsinfo(enum fsinfo_attribute attr,
>  	dumper(r, size);
>  }
>  
> +static void dump_params(struct fsinfo_attr_info about, union reply *r, int size)
> +{
> +	int len;
> +	char *p = r->buffer, *e = p + size;
> +	bool is_key = true;
> +
> +	while (p < e) {
> +		len = 0;
> +		while (p[0] & 0x80) {
> +			len <<= 7;
> +			len |= *p++ & 0x7f;
> +		}
> +
> +		len <<= 7;
> +		len |= *p++;
> +		if (len > e - p)
> +			break;
> +		if (is_key || len)
> +			printf("%s%*.*s", is_key ? "[PARM] " : "= ", len, len, p);
> +		if (is_key)
> +			putchar(' ');
> +		else
> +			putchar('\n');
> +		p += len;
> +		is_key = !is_key;
> +	}
> +}
> +
>  /*
>   * Try one subinstance of an attribute.
>   */
> @@ -427,6 +457,12 @@ static int try_one(const char *file, struct fsinfo_params *params, bool raw)
>  		return 0;
>  	}
>  
> +	switch (params->request) {
> +	case FSINFO_ATTR_PARAMETERS:
> +		if (ret == 0)
> +			return 0;
> +	}
> +
>  	switch (about.flags & (__FSINFO_N | __FSINFO_NM)) {
>  	case 0:
>  		printf("\e[33m%s\e[m: ",
> @@ -469,6 +505,8 @@ static int try_one(const char *file, struct fsinfo_params *params, bool raw)
>  		return 0;
>  
>  	case __FSINFO_OVER:
> +		if (params->request == FSINFO_ATTR_PARAMETERS)
> +			dump_params(about, r, ret);
>  		return 0;
>  
>  	case __FSINFO_STRUCT_ARRAY:
> 

^ permalink raw reply

* Re: [PATCH 04/25] vfs: Allow fsinfo() to be used to query an fs parameter description [ver #14]
From: Christian Brauner @ 2019-06-25  9:40 UTC (permalink / raw)
  To: David Howells
  Cc: viro, raven, mszeredi, linux-api, linux-fsdevel, linux-kernel
In-Reply-To: <156138536153.25627.3397429813740738568.stgit@warthog.procyon.org.uk>

On Mon, Jun 24, 2019 at 03:09:21PM +0100, David Howells wrote:
> Provide fsinfo() attributes that can be used to query a filesystem
> parameter description.  To do this, fsinfo() can be called on an

Can you give a usecase for this?
Wouldn't it be more helpful if fsinfo() when used to query a filesystem
without a created or attached superblock could be used to query options
the underlying fs supports so that users know what they can fsconfig()?
(Maybe that's not possible because you'd already need to have a
superblock created or attached in this case.)

> fs_context that doesn't yet have a superblock created and attached.
> 
> It can be obtained by doing, for example:
> 
> 	fd = fsopen("ext4", 0);
> 
> 	struct fsinfo_param_name name;
> 	struct fsinfo_params params = {
> 		.at_flags = AT_FSINFO_FROM_FSOPEN,
> 		.request = fsinfo_attr_param_name,
> 		.Nth	 = 3,
> 	};
> 	fsinfo(fd, NULL, &params, &name, sizeof(name));
> 
> to query the 4th parameter name in the name to parameter ID mapping table.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  fs/fsinfo.c                 |  101 +++++++++++++++++++++++++++++++
>  include/uapi/linux/fsinfo.h |   58 ++++++++++++++++++
>  samples/vfs/Makefile        |    2 +
>  samples/vfs/test-fs-query.c |  138 +++++++++++++++++++++++++++++++++++++++++++
>  samples/vfs/test-fsinfo.c   |   14 ++++
>  5 files changed, 311 insertions(+), 2 deletions(-)
>  create mode 100644 samples/vfs/test-fs-query.c
> 
> diff --git a/fs/fsinfo.c b/fs/fsinfo.c
> index c24701f994d1..9e2a25510b88 100644
> --- a/fs/fsinfo.c
> +++ b/fs/fsinfo.c
> @@ -9,6 +9,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/fsinfo.h>
>  #include <linux/fs_context.h>
> +#include <linux/fs_parser.h>
>  #include <uapi/linux/mount.h>
>  #include "internal.h"
>  
> @@ -223,12 +224,87 @@ static int fsinfo_generic_name_encoding(struct path *path, char *buf)
>  	return sizeof(encoding) - 1;
>  }
>  
> +static int fsinfo_generic_param_description(struct file_system_type *f,
> +					    struct fsinfo_kparams *params)
> +{
> +	const struct fs_parameter_description *desc = f->parameters;
> +	const struct fs_parameter_spec *s;
> +	const struct fs_parameter_enum *e;

nit: *s can probably be moved inside the first if() and *e into the second
if().

> +	struct fsinfo_param_description *p = params->buffer;
> +
> +	if (desc && desc->specs) {
> +		for (s = desc->specs; s->name; s++) {}
> +		p->nr_params = s - desc->specs;
> +		if (desc->enums) {
> +			for (e = desc->enums; e->name[0]; e++) {}
> +			p->nr_enum_names = e - desc->enums;
> +		}
> +	}
> +
> +	return sizeof(*p);
> +}
> +
> +static int fsinfo_generic_param_specification(struct file_system_type *f,
> +					      struct fsinfo_kparams *params)
> +{
> +	const struct fs_parameter_description *desc = f->parameters;
> +	const struct fs_parameter_spec *s;
> +	struct fsinfo_param_specification *p = params->buffer;
> +	unsigned int nth = params->Nth;
> +
> +	if (!desc || !desc->specs)
> +		return -ENODATA;
> +
> +	for (s = desc->specs; s->name; s++) {
> +		if (nth == 0)
> +			goto found;
> +		nth--;
> +	}
> +
> +	return -ENODATA;
> +
> +found:
> +	p->type = s->type;
> +	p->flags = s->flags;
> +	p->opt = s->opt;
> +	strlcpy(p->name, s->name, sizeof(p->name));
> +	return sizeof(*p);
> +}
> +
> +static int fsinfo_generic_param_enum(struct file_system_type *f,
> +				     struct fsinfo_kparams *params)
> +{
> +	const struct fs_parameter_description *desc = f->parameters;
> +	const struct fs_parameter_enum *e;
> +	struct fsinfo_param_enum *p = params->buffer;
> +	unsigned int nth = params->Nth;
> +
> +	if (!desc || !desc->enums)
> +		return -ENODATA;
> +
> +	for (e = desc->enums; e->name; e++) {
> +		if (nth == 0)
> +			goto found;
> +		nth--;
> +	}
> +
> +	return -ENODATA;
> +
> +found:
> +	p->opt = e->opt;
> +	strlcpy(p->name, e->name, sizeof(p->name));
> +	return sizeof(*p);
> +}
> +
>  /*
>   * Implement some queries generically from stuff in the superblock.
>   */
>  int generic_fsinfo(struct path *path, struct fsinfo_kparams *params)
>  {
> +	struct file_system_type *fs = path->dentry->d_sb->s_type;
> +
>  #define _gen(X, Y) FSINFO_ATTR_##X: return fsinfo_generic_##Y(path, params->buffer)
> +#define _genf(X, Y) FSINFO_ATTR_##X: return fsinfo_generic_##Y(fs, params)
>  
>  	switch (params->request) {
>  	case _gen(STATFS,		statfs);
> @@ -240,6 +316,9 @@ int generic_fsinfo(struct path *path, struct fsinfo_kparams *params)
>  	case _gen(VOLUME_UUID,		volume_uuid);
>  	case _gen(VOLUME_ID,		volume_id);
>  	case _gen(NAME_ENCODING,	name_encoding);
> +	case _genf(PARAM_DESCRIPTION,	param_description);
> +	case _genf(PARAM_SPECIFICATION,	param_specification);
> +	case _genf(PARAM_ENUM,		param_enum);
>  	default:
>  		return -EOPNOTSUPP;
>  	}
> @@ -342,10 +421,11 @@ static int vfs_fsinfo_fd(unsigned int fd, struct fsinfo_kparams *params)
>  }
>  
>  /*
> - * Allow access to an fs_context object as created by fsopen() or fspick().
> + * Allow an fs_context object as created by fsopen() or fspick() to be queried.
>   */
>  static int vfs_fsinfo_fscontext(int fd, struct fsinfo_kparams *params)
>  {
> +	struct file_system_type *fs;
>  	struct fs_context *fc;
>  	struct fd f = fdget(fd);
>  	int ret;
> @@ -354,12 +434,26 @@ static int vfs_fsinfo_fscontext(int fd, struct fsinfo_kparams *params)
>  		return -EBADF;
>  
>  	ret = -EINVAL;
> -	if (f.file->f_op == &fscontext_fops)
> +	if (f.file->f_op != &fscontext_fops)

Here you're fixing the error I pointed out in the other patch (I believe
[03/<max>].). Probably best to fix it in the original patch. ;)

>  		goto out_f;
> +	fc = f.file->private_data;
> +	fs = fc->fs_type;
> +
>  	ret = -EOPNOTSUPP;
>  	if (fc->ops == &legacy_fs_context_ops)
>  		goto out_f;
>  
> +	/* Filesystem parameter query is static information and doesn't need a
> +	 * lock to read it, nor even a dentry or superblock.
> +	 */
> +	switch (params->request) {
> +	case _genf(PARAM_DESCRIPTION,	param_description);
> +	case _genf(PARAM_SPECIFICATION,	param_specification);
> +	case _genf(PARAM_ENUM,		param_enum);
> +	default:
> +		break;
> +	}
> +
>  	ret = mutex_lock_interruptible(&fc->uapi_mutex);
>  	if (ret == 0) {
>  		ret = -EIO;
> @@ -432,6 +526,9 @@ static const struct fsinfo_attr_info fsinfo_buffer_info[FSINFO_ATTR__NR] = {
>  	FSINFO_STRING		(VOLUME_NAME,		-),
>  	FSINFO_STRING		(NAME_ENCODING,		-),
>  	FSINFO_STRING		(NAME_CODEPAGE,		-),
> +	FSINFO_STRUCT		(PARAM_DESCRIPTION,	param_description),
> +	FSINFO_STRUCT_N		(PARAM_SPECIFICATION,	param_specification),
> +	FSINFO_STRUCT_N		(PARAM_ENUM,		param_enum),
>  };
>  
>  /**
> diff --git a/include/uapi/linux/fsinfo.h b/include/uapi/linux/fsinfo.h
> index a7a7c731d992..9d929d2f7eee 100644
> --- a/include/uapi/linux/fsinfo.h
> +++ b/include/uapi/linux/fsinfo.h
> @@ -27,6 +27,9 @@ enum fsinfo_attribute {
>  	FSINFO_ATTR_VOLUME_NAME		= 9,	/* Volume name (string) */
>  	FSINFO_ATTR_NAME_ENCODING	= 10,	/* Filename encoding (string) */
>  	FSINFO_ATTR_NAME_CODEPAGE	= 11,	/* Filename codepage (string) */
> +	FSINFO_ATTR_PARAM_DESCRIPTION	= 12,	/* General fs parameter description */
> +	FSINFO_ATTR_PARAM_SPECIFICATION	= 13,	/* Nth parameter specification */
> +	FSINFO_ATTR_PARAM_ENUM		= 14,	/* Nth enum-to-val */
>  	FSINFO_ATTR__NR
>  };
>  
> @@ -216,4 +219,59 @@ struct fsinfo_fsinfo {
>  	__u32	max_cap;	/* Number of supported capabilities (fsinfo_cap__nr) */
>  };
>  
> +/*
> + * Information struct for fsinfo(fsinfo_attr_param_description).
> + *
> + * Query the parameter set for a filesystem.
> + */
> +struct fsinfo_param_description {
> +	__u32		nr_params;		/* Number of individual parameters */
> +	__u32		nr_enum_names;		/* Number of enum names  */
> +};
> +
> +/*
> + * Information struct for fsinfo(fsinfo_attr_param_specification).
> + *
> + * Query the specification of the Nth filesystem parameter.
> + */
> +struct fsinfo_param_specification {
> +	__u32		type;		/* enum fsinfo_param_specification_type */
> +	__u32		flags;		/* Qualifiers */
> +	__u32		opt;		/* Corresponding params have same ID here */
> +	char		name[240];
> +};
> +
> +enum fsinfo_param_specification_type {
> +	FSINFO_PARAM_SPEC_NOT_DEFINED		= 0,
> +	FSINFO_PARAM_SPEC_IS_FLAG		= 1,
> +	FSINFO_PARAM_SPEC_IS_BOOL		= 2,
> +	FSINFO_PARAM_SPEC_IS_U32		= 3,
> +	FSINFO_PARAM_SPEC_IS_U32_OCTAL		= 4,
> +	FSINFO_PARAM_SPEC_IS_U32_HEX		= 5,
> +	FSINFO_PARAM_SPEC_IS_S32		= 6,
> +	FSINFO_PARAM_SPEC_IS_U64		= 7,
> +	FSINFO_PARAM_SPEC_IS_ENUM		= 8,
> +	FSINFO_PARAM_SPEC_IS_STRING		= 9,
> +	FSINFO_PARAM_SPEC_IS_BLOB		= 10,
> +	FSINFO_PARAM_SPEC_IS_BLOCKDEV		= 11,
> +	FSINFO_PARAM_SPEC_IS_PATH		= 12,
> +	FSINFO_PARAM_SPEC_IS_FD			= 13,
> +	NR__FSINFO_PARAM_SPEC
> +};
> +
> +#define FSINFO_PARAM_SPEC_VALUE_IS_OPTIONAL	0X00000001
> +#define FSINFO_PARAM_SPEC_PREFIX_NO_IS_NEG	0X00000002
> +#define FSINFO_PARAM_SPEC_EMPTY_STRING_IS_NEG	0X00000004
> +#define FSINFO_PARAM_SPEC_DEPRECATED		0X00000008
> +
> +/*
> + * Information struct for fsinfo(fsinfo_attr_param_enum).
> + *
> + * Query the Nth filesystem enum parameter value name.
> + */
> +struct fsinfo_param_enum {
> +	__u32		opt;		/* ->opt of the relevant parameter specification */
> +	char		name[252];	/* Name of the enum value */
> +};
> +
>  #endif /* _UAPI_LINUX_FSINFO_H */
> diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
> index d3cc8e9a4fd8..3c542d3b9479 100644
> --- a/samples/vfs/Makefile
> +++ b/samples/vfs/Makefile
> @@ -1,6 +1,7 @@
>  # List of programs to build
>  hostprogs-y := \
>  	test-fsinfo \
> +	test-fs-query \
>  	test-fsmount \
>  	test-statx
>  
> @@ -10,5 +11,6 @@ always := $(hostprogs-y)
>  HOSTCFLAGS_test-fsinfo.o += -I$(objtree)/usr/include
>  HOSTLDLIBS_test-fsinfo += -lm
>  
> +HOSTCFLAGS_test-fs-query.o += -I$(objtree)/usr/include
>  HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include
>  HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
> diff --git a/samples/vfs/test-fs-query.c b/samples/vfs/test-fs-query.c
> new file mode 100644
> index 000000000000..7572411ddb7e
> --- /dev/null
> +++ b/samples/vfs/test-fs-query.c
> @@ -0,0 +1,138 @@
> +/* Test using the fsinfo() system call to query mount parameters.
> + *
> + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
> + * Written by David Howells (dhowells@redhat.com)
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public Licence
> + * as published by the Free Software Foundation; either version
> + * 2 of the Licence, or (at your option) any later version.
> + */
> +
> +#define _GNU_SOURCE
> +#define _ATFILE_SOURCE
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <time.h>
> +#include <math.h>
> +#include <sys/syscall.h>
> +#include <linux/fsinfo.h>
> +#include <linux/fcntl.h>
> +#include <sys/stat.h>
> +
> +#ifndef __NR_fsopen
> +#define __NR_fsopen -1
> +#endif
> +#ifndef __NR_fsinfo
> +#define __NR_fsinfo -1
> +#endif
> +
> +static int fsopen(const char *fs_name, unsigned int flags)
> +{
> +	return syscall(__NR_fsopen, fs_name, flags);
> +}
> +
> +static ssize_t fsinfo(int dfd, const char *filename, struct fsinfo_params *params,
> +		      void *buffer, size_t buf_size)
> +{
> +	return syscall(__NR_fsinfo, dfd, filename, params, buffer, buf_size);
> +}
> +
> +static const char *param_types[NR__FSINFO_PARAM_SPEC] = {
> +	[FSINFO_PARAM_SPEC_NOT_DEFINED]		= "?undef",
> +	[FSINFO_PARAM_SPEC_IS_FLAG]		= "flag",
> +	[FSINFO_PARAM_SPEC_IS_BOOL]		= "bool",
> +	[FSINFO_PARAM_SPEC_IS_U32]		= "u32",
> +	[FSINFO_PARAM_SPEC_IS_U32_OCTAL]	= "octal",
> +	[FSINFO_PARAM_SPEC_IS_U32_HEX]		= "hex",
> +	[FSINFO_PARAM_SPEC_IS_S32]		= "s32",
> +	[FSINFO_PARAM_SPEC_IS_U64]		= "u64",
> +	[FSINFO_PARAM_SPEC_IS_ENUM]		= "enum",
> +	[FSINFO_PARAM_SPEC_IS_STRING]		= "string",
> +	[FSINFO_PARAM_SPEC_IS_BLOB]		= "binary",
> +	[FSINFO_PARAM_SPEC_IS_BLOCKDEV]		= "blockdev",
> +	[FSINFO_PARAM_SPEC_IS_PATH]		= "path",
> +	[FSINFO_PARAM_SPEC_IS_FD]		= "fd",
> +};
> +
> +/*
> + *
> + */
> +int main(int argc, char **argv)
> +{
> +	struct fsinfo_param_description desc;
> +	struct fsinfo_param_specification spec;
> +	struct fsinfo_param_enum enum_name;
> +
> +	struct fsinfo_params params = {
> +		.at_flags = AT_FSINFO_FROM_FSOPEN,
> +	};
> +	int fd;
> +
> +	if (argc != 2) {
> +		printf("Format: test-fs-query <fs_name>\n");
> +		exit(2);
> +	}
> +
> +	fd = fsopen(argv[1], 0);
> +	if (fd == -1) {
> +		perror(argv[1]);
> +		exit(1);
> +	}
> +
> +	params.request = FSINFO_ATTR_PARAM_DESCRIPTION;
> +	if (fsinfo(fd, NULL, &params, &desc, sizeof(desc)) == -1) {
> +		perror("fsinfo/desc");
> +		exit(1);
> +	}
> +
> +	printf("Filesystem %s has %u parameters\n", argv[1], desc.nr_params);
> +
> +	params.request = FSINFO_ATTR_PARAM_SPECIFICATION;
> +	for (params.Nth = 0; params.Nth < desc.nr_params; params.Nth++) {
> +		char type[32];
> +
> +		if (fsinfo(fd, NULL, &params, &spec, sizeof(spec)) == -1) {
> +			if (errno == ENODATA)
> +				break;
> +			perror("fsinfo/spec");
> +			exit(1);
> +		}
> +
> +		if (spec.type < NR__FSINFO_PARAM_SPEC)
> +			strcpy(type, param_types[spec.type]);
> +		else
> +			sprintf(type, "?%u", spec.type);
> +
> +		printf("- PARAM[%3u] %-20s %3u %s%s%s%s%s\n",
> +		       params.Nth,
> +		       spec.name,
> +		       spec.opt,
> +		       type,
> +		       spec.flags & FSINFO_PARAM_SPEC_VALUE_IS_OPTIONAL ? ",opt" : "",
> +		       spec.flags & FSINFO_PARAM_SPEC_PREFIX_NO_IS_NEG ? ",neg-no" : "",
> +		       spec.flags & FSINFO_PARAM_SPEC_EMPTY_STRING_IS_NEG ? ",neg-empty" : "",
> +		       spec.flags & FSINFO_PARAM_SPEC_DEPRECATED ? ",dep" : "");
> +	}
> +
> +	printf("Filesystem has %u enumeration values\n", desc.nr_enum_names);
> +
> +	params.request = FSINFO_ATTR_PARAM_ENUM;
> +	for (params.Nth = 0; params.Nth < desc.nr_enum_names; params.Nth++) {
> +		if (fsinfo(fd, NULL, &params, &enum_name, sizeof(enum_name)) == -1) {
> +			if (errno == ENODATA)
> +				break;
> +			perror("fsinfo/enum");
> +			exit(1);
> +		}
> +		printf("- ENUM[%3u] %3u: %s\n",
> +		       params.Nth, enum_name.opt, enum_name.name);
> +	}
> +	return 0;
> +}
> diff --git a/samples/vfs/test-fsinfo.c b/samples/vfs/test-fsinfo.c
> index 8cce1986df7e..3c6ea3a5c157 100644
> --- a/samples/vfs/test-fsinfo.c
> +++ b/samples/vfs/test-fsinfo.c
> @@ -78,6 +78,9 @@ static const struct fsinfo_attr_info fsinfo_buffer_info[FSINFO_ATTR__NR] = {
>  	FSINFO_STRING		(VOLUME_NAME,		volume_name),
>  	FSINFO_STRING		(NAME_ENCODING,		name_encoding),
>  	FSINFO_STRING		(NAME_CODEPAGE,		name_codepage),
> +	FSINFO_STRUCT		(PARAM_DESCRIPTION,	param_description),
> +	FSINFO_STRUCT_N		(PARAM_SPECIFICATION,	param_specification),
> +	FSINFO_STRUCT_N		(PARAM_ENUM,		param_enum),
>  };
>  
>  #define FSINFO_NAME(X,Y) [FSINFO_ATTR_##X] = #Y
> @@ -94,6 +97,9 @@ static const char *fsinfo_attr_names[FSINFO_ATTR__NR] = {
>  	FSINFO_NAME		(VOLUME_NAME,		volume_name),
>  	FSINFO_NAME		(NAME_ENCODING,		name_encoding),
>  	FSINFO_NAME		(NAME_CODEPAGE,		name_codepage),
> +	FSINFO_NAME		(PARAM_DESCRIPTION,	param_description),
> +	FSINFO_NAME		(PARAM_SPECIFICATION,	param_specification),
> +	FSINFO_NAME		(PARAM_ENUM,		param_enum),
>  };
>  
>  union reply {
> @@ -514,6 +520,14 @@ int main(int argc, char **argv)
>  	}
>  
>  	for (attr = 0; attr <= FSINFO_ATTR__NR; attr++) {
> +		switch (attr) {
> +		case FSINFO_ATTR_PARAM_DESCRIPTION:
> +		case FSINFO_ATTR_PARAM_SPECIFICATION:
> +		case FSINFO_ATTR_PARAM_ENUM:
> +			/* See test-fs-query.c instead */
> +			continue;
> +		}
> +
>  		Nth = 0;
>  		do {
>  			Mth = 0;
> 

^ permalink raw reply

* Re: [PATCH 02/25] fsinfo: Add syscalls to other arches [ver #14]
From: Christian Brauner @ 2019-06-25  9:31 UTC (permalink / raw)
  To: David Howells
  Cc: viro, raven, mszeredi, linux-api, linux-fsdevel, linux-kernel
In-Reply-To: <156138534520.25627.6299627770679918852.stgit@warthog.procyon.org.uk>

On Mon, Jun 24, 2019 at 03:09:05PM +0100, David Howells wrote:
> Add the fsinfo syscall to the other arches.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  arch/alpha/kernel/syscalls/syscall.tbl      |    1 +
>  arch/arm/tools/syscall.tbl                  |    1 +
>  arch/arm64/include/asm/unistd.h             |    2 +-

I think you missed

arch/arm64/include/asm/unistd32.h

?

>  arch/ia64/kernel/syscalls/syscall.tbl       |    1 +
>  arch/m68k/kernel/syscalls/syscall.tbl       |    1 +
>  arch/microblaze/kernel/syscalls/syscall.tbl |    1 +
>  arch/mips/kernel/syscalls/syscall_n32.tbl   |    1 +
>  arch/mips/kernel/syscalls/syscall_n64.tbl   |    1 +
>  arch/mips/kernel/syscalls/syscall_o32.tbl   |    1 +
>  arch/parisc/kernel/syscalls/syscall.tbl     |    1 +
>  arch/powerpc/kernel/syscalls/syscall.tbl    |    1 +
>  arch/s390/kernel/syscalls/syscall.tbl       |    1 +
>  arch/sh/kernel/syscalls/syscall.tbl         |    1 +
>  arch/sparc/kernel/syscalls/syscall.tbl      |    1 +
>  arch/xtensa/kernel/syscalls/syscall.tbl     |    1 +
>  15 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
> index 9e7704e44f6d..624d01c3c8eb 100644
> --- a/arch/alpha/kernel/syscalls/syscall.tbl
> +++ b/arch/alpha/kernel/syscalls/syscall.tbl
> @@ -473,3 +473,4 @@
>  541	common	fsconfig			sys_fsconfig
>  542	common	fsmount				sys_fsmount
>  543	common	fspick				sys_fspick
> +544	common	fsinfo				sys_fsinfo
> diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
> index aaf479a9e92d..ad608b49808c 100644
> --- a/arch/arm/tools/syscall.tbl
> +++ b/arch/arm/tools/syscall.tbl
> @@ -447,3 +447,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
> index 70e6882853c0..e8f7d95a1481 100644
> --- a/arch/arm64/include/asm/unistd.h
> +++ b/arch/arm64/include/asm/unistd.h
> @@ -44,7 +44,7 @@
>  #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
>  #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
>  
> -#define __NR_compat_syscalls		434
> +#define __NR_compat_syscalls		435
>  #endif
>  
>  #define __ARCH_WANT_SYS_CLONE
> diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
> index e01df3f2f80d..68314763ad16 100644
> --- a/arch/ia64/kernel/syscalls/syscall.tbl
> +++ b/arch/ia64/kernel/syscalls/syscall.tbl
> @@ -354,3 +354,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
> index 7e3d0734b2f3..ee73a7534b1b 100644
> --- a/arch/m68k/kernel/syscalls/syscall.tbl
> +++ b/arch/m68k/kernel/syscalls/syscall.tbl
> @@ -433,3 +433,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
> index 26339e417695..7bc067f4b713 100644
> --- a/arch/microblaze/kernel/syscalls/syscall.tbl
> +++ b/arch/microblaze/kernel/syscalls/syscall.tbl
> @@ -439,3 +439,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
> index 0e2dd68ade57..29b76bd67cc0 100644
> --- a/arch/mips/kernel/syscalls/syscall_n32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
> @@ -372,3 +372,4 @@
>  431	n32	fsconfig			sys_fsconfig
>  432	n32	fsmount				sys_fsmount
>  433	n32	fspick				sys_fspick
> +434	n32	fsinfo				sys_fsinfo
> diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
> index 5eebfa0d155c..349fb30bb8b5 100644
> --- a/arch/mips/kernel/syscalls/syscall_n64.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
> @@ -348,3 +348,4 @@
>  431	n64	fsconfig			sys_fsconfig
>  432	n64	fsmount				sys_fsmount
>  433	n64	fspick				sys_fspick
> +434	n64	fsinfo				sys_fsinfo
> diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
> index 3cc1374e02d0..71057426b503 100644
> --- a/arch/mips/kernel/syscalls/syscall_o32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
> @@ -421,3 +421,4 @@
>  431	o32	fsconfig			sys_fsconfig
>  432	o32	fsmount				sys_fsmount
>  433	o32	fspick				sys_fspick
> +434	o32	fsinfo				sys_fsinfo
> diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
> index c9e377d59232..32cff48a1ebd 100644
> --- a/arch/parisc/kernel/syscalls/syscall.tbl
> +++ b/arch/parisc/kernel/syscalls/syscall.tbl
> @@ -430,3 +430,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
> index 103655d84b4b..e5755eb6fb84 100644
> --- a/arch/powerpc/kernel/syscalls/syscall.tbl
> +++ b/arch/powerpc/kernel/syscalls/syscall.tbl
> @@ -515,3 +515,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
> index e822b2964a83..bcd54116e107 100644
> --- a/arch/s390/kernel/syscalls/syscall.tbl
> +++ b/arch/s390/kernel/syscalls/syscall.tbl
> @@ -436,3 +436,4 @@
>  431  common	fsconfig		sys_fsconfig			sys_fsconfig
>  432  common	fsmount			sys_fsmount			sys_fsmount
>  433  common	fspick			sys_fspick			sys_fspick
> +434	common	fsinfo			sys_fsinfo			sys_fsinfo
> diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
> index 016a727d4357..0320a5c63cbd 100644
> --- a/arch/sh/kernel/syscalls/syscall.tbl
> +++ b/arch/sh/kernel/syscalls/syscall.tbl
> @@ -436,3 +436,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
> index e047480b1605..f81b1f9402bd 100644
> --- a/arch/sparc/kernel/syscalls/syscall.tbl
> +++ b/arch/sparc/kernel/syscalls/syscall.tbl
> @@ -479,3 +479,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
> index 5fa0ee1c8e00..729795148850 100644
> --- a/arch/xtensa/kernel/syscalls/syscall.tbl
> +++ b/arch/xtensa/kernel/syscalls/syscall.tbl
> @@ -404,3 +404,4 @@
>  431	common	fsconfig			sys_fsconfig
>  432	common	fsmount				sys_fsmount
>  433	common	fspick				sys_fspick
> +434	common	fsinfo				sys_fsinfo
> 

^ permalink raw reply

* Re: [PATCH 03/25] vfs: Allow fsinfo() to query what's in an fs_context [ver #14]
From: Christian Brauner @ 2019-06-25  9:27 UTC (permalink / raw)
  To: David Howells
  Cc: viro, raven, mszeredi, linux-api, linux-fsdevel, linux-kernel
In-Reply-To: <156138535407.25627.15015993364565647650.stgit@warthog.procyon.org.uk>

On Mon, Jun 24, 2019 at 03:09:14PM +0100, David Howells wrote:
> Allow fsinfo() to be used to query the filesystem attached to an fs_context
> once a superblock has been created or if it comes from fspick().
> 
> The caller must specify AT_FSINFO_FROM_FSOPEN in the parameters and must

Yeah, I like that better than how it was before.

> supply the fd from fsopen() as dfd and must set filename to NULL.
> 
> This is done with something like:
> 
> 	fd = fsopen("ext4", 0);
> 	...
> 	struct fsinfo_params params = {
> 		.at_flags = AT_FSINFO_FROM_FSOPEN;
> 		...
> 	};
> 	fsinfo(fd, NULL, &params, ...);
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  fs/fsinfo.c                |   46 +++++++++++++++++++++++++++++++++++++++++++-
>  fs/statfs.c                |    2 +-
>  include/uapi/linux/fcntl.h |    2 ++
>  3 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/fsinfo.c b/fs/fsinfo.c
> index 49b46f96dda3..c24701f994d1 100644
> --- a/fs/fsinfo.c
> +++ b/fs/fsinfo.c
> @@ -8,6 +8,7 @@
>  #include <linux/security.h>
>  #include <linux/uaccess.h>
>  #include <linux/fsinfo.h>
> +#include <linux/fs_context.h>
>  #include <uapi/linux/mount.h>
>  #include "internal.h"
>  
> @@ -340,6 +341,42 @@ static int vfs_fsinfo_fd(unsigned int fd, struct fsinfo_kparams *params)
>  	return ret;
>  }
>  
> +/*
> + * Allow access to an fs_context object as created by fsopen() or fspick().
> + */
> +static int vfs_fsinfo_fscontext(int fd, struct fsinfo_kparams *params)
> +{
> +	struct fs_context *fc;
> +	struct fd f = fdget(fd);
> +	int ret;
> +
> +	if (!f.file)
> +		return -EBADF;
> +
> +	ret = -EINVAL;
> +	if (f.file->f_op == &fscontext_fops)

Don't you mean != ?

if (f.file->f_op != &fscontext_fops)

> +		goto out_f;
> +	ret = -EOPNOTSUPP;
> +	if (fc->ops == &legacy_fs_context_ops)
> +		goto out_f;
> +
> +	ret = mutex_lock_interruptible(&fc->uapi_mutex);
> +	if (ret == 0) {
> +		ret = -EIO;

Why EIO when there's no root dentry?

> +		if (fc->root) {
> +			struct path path = { .dentry = fc->root };
> +
> +			ret = vfs_fsinfo(&path, params);
> +		}
> +
> +		mutex_unlock(&fc->uapi_mutex);
> +	}
> +
> +out_f:
> +	fdput(f);
> +	return ret;
> +}
> +
>  /*
>   * Return buffer information by requestable attribute.
>   *
> @@ -445,6 +482,9 @@ SYSCALL_DEFINE5(fsinfo,
>  		params.request = user_params.request;
>  		params.Nth = user_params.Nth;
>  		params.Mth = user_params.Mth;
> +

[1]:

> +		if ((params.at_flags & AT_FSINFO_FROM_FSOPEN) && filename)
> +			return -EINVAL;
>  	} else {
>  		params.request = FSINFO_ATTR_STATFS;
>  	}
> @@ -453,6 +493,8 @@ SYSCALL_DEFINE5(fsinfo,
>  		user_buf_size = 0;
>  		user_buffer = NULL;
>  	}
> +	if ((params.at_flags & AT_FSINFO_FROM_FSOPEN) && filename)
> +		return -EINVAL;

Sorry, why is this checked twice (see [1])? Or is the diff just
misleading here?

>  
>  	/* Allocate an appropriately-sized buffer.  We will truncate the
>  	 * contents when we write the contents back to userspace.
> @@ -500,7 +542,9 @@ SYSCALL_DEFINE5(fsinfo,
>  	if (!params.buffer)
>  		goto error_scratch;
>  
> -	if (filename)
> +	if (params.at_flags & AT_FSINFO_FROM_FSOPEN)
> +		ret = vfs_fsinfo_fscontext(dfd, &params);
> +	else if (filename)
>  		ret = vfs_fsinfo_path(dfd, filename, &params);
>  	else
>  		ret = vfs_fsinfo_fd(dfd, &params);
> diff --git a/fs/statfs.c b/fs/statfs.c
> index eea7af6f2f22..b9b63d9f4f24 100644
> --- a/fs/statfs.c
> +++ b/fs/statfs.c
> @@ -86,7 +86,7 @@ int vfs_statfs(const struct path *path, struct kstatfs *buf)
>  	int error;
>  
>  	error = statfs_by_dentry(path->dentry, buf);
> -	if (!error)
> +	if (!error && path->mnt)
>  		buf->f_flags = calculate_f_flags(path->mnt);
>  	return error;
>  }
> diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
> index 1d338357df8a..6a2402a8fa30 100644
> --- a/include/uapi/linux/fcntl.h
> +++ b/include/uapi/linux/fcntl.h
> @@ -91,6 +91,8 @@
>  #define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
>  #define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
>  
> +#define AT_FSINFO_FROM_FSOPEN	0x2000	/* Examine the fs_context attached to dfd by fsopen() */
> +
>  #define AT_RECURSIVE		0x8000	/* Apply to the entire subtree */
>  
>  
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox