Linux Security Modules development

Linux Security Modules development
 help / color / mirror / Atom feed

* [PATCH bpf-next RFC v1 2/5] tools: sync bpf.h header
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module
In-Reply-To: <20210821184824.2052643-1-memxor@gmail.com>

Update the bpf.h UAPI header with changes made from file local storage
additions.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 tools/include/uapi/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c4f7892edb2b..d4bf4e4d56b5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -906,6 +906,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_FILE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -4871,6 +4872,42 @@ union bpf_attr {
  * 	Return
  *		Value specified by user at BPF link creation/attachment time
  *		or 0, if it was not specified.
+ *
+ * void *bpf_file_storage_get(struct bpf_map *map, void *file, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from a *file*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *file* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *file*) except this
+ *		helper enforces the key must be an file and the map must also
+ *		be a **BPF_MAP_TYPE_FILE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *file* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *file*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_file_storage_delete(struct bpf_map *map, void *file)
+ *	Description
+ *		Delete a bpf_local_storage from a *file*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5048,6 +5085,8 @@ union bpf_attr {
 	FN(timer_cancel),		\
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
+	FN(file_storage_get),		\
+	FN(file_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.33.0


^ permalink raw reply related

* [PATCH bpf-next RFC v1 1/5] bpf: Implement file local storage
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module
In-Reply-To: <20210821184824.2052643-1-memxor@gmail.com>

This map is useful in general to tie data associated with a open file
(not fd) from eBPF programs, such that data is released when the file
goes away (e.g. checkpoint/restore usecase).

Another usecase is implementing Capsicum [0] style capability sandbox in
userspace using eBPF LSM, enforcing rights at the file level using this
mechanism.

The implementation is exactly similar to bpf_inode_storage, with some
minor changes where required.

[0]: https://www.usenix.org/legacy/event/sec10/tech/full_papers/Watson.pdf

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 include/linux/bpf_lsm.h       |  21 +++
 include/linux/bpf_types.h     |   1 +
 include/uapi/linux/bpf.h      |  39 ++++++
 kernel/bpf/Makefile           |   2 +-
 kernel/bpf/bpf_file_storage.c | 244 ++++++++++++++++++++++++++++++++++
 kernel/bpf/bpf_lsm.c          |   4 +
 kernel/bpf/syscall.c          |   3 +-
 kernel/bpf/verifier.c         |  10 ++
 security/bpf/hooks.c          |   2 +
 9 files changed, 324 insertions(+), 2 deletions(-)
 create mode 100644 kernel/bpf/bpf_file_storage.c

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 479c101546ad..5901a39cd5ac 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -42,6 +42,18 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
 
+static inline struct bpf_storage_blob *bpf_file(const struct file *file)
+{
+	if (unlikely(!file->f_security))
+		return NULL;
+
+	return file->f_security + bpf_lsm_blob_sizes.lbs_file;
+}
+
+extern const struct bpf_func_proto bpf_file_storage_get_proto;
+extern const struct bpf_func_proto bpf_file_storage_delete_proto;
+void bpf_file_storage_free(struct file *file);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -65,6 +77,15 @@ static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline struct bpf_storage_blob *bpf_file(const struct file *file)
+{
+	return NULL;
+}
+
+static inline void bpf_file_storage_free(struct file *file)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..c68cc6d9e7da 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -107,6 +107,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_FILE_STORAGE, file_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c4f7892edb2b..d4bf4e4d56b5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -906,6 +906,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_FILE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -4871,6 +4872,42 @@ union bpf_attr {
  * 	Return
  *		Value specified by user at BPF link creation/attachment time
  *		or 0, if it was not specified.
+ *
+ * void *bpf_file_storage_get(struct bpf_map *map, void *file, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from a *file*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *file* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *file*) except this
+ *		helper enforces the key must be an file and the map must also
+ *		be a **BPF_MAP_TYPE_FILE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *file* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *file*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_file_storage_delete(struct bpf_map *map, void *file)
+ *	Description
+ *		Delete a bpf_local_storage from a *file*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5048,6 +5085,8 @@ union bpf_attr {
 	FN(timer_cancel),		\
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
+	FN(file_storage_get),		\
+	FN(file_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 7f33098ca63f..98a18e402a0a 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
-obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
+obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o bpf_file_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_file_storage.c b/kernel/bpf/bpf_file_storage.c
new file mode 100644
index 000000000000..c826bc0405c4
--- /dev/null
+++ b/kernel/bpf/bpf_file_storage.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/filter.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+
+DEFINE_BPF_STORAGE_CACHE(file_cache);
+
+static struct bpf_local_storage __rcu **file_storage_ptr(void *owner)
+{
+	struct bpf_storage_blob *bsb;
+	struct file *file = owner;
+
+	bsb = bpf_file(file);
+	if (!bsb)
+		return NULL;
+	return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *
+file_storage_lookup(struct file *file, struct bpf_map *map, bool cacheit_lockit)
+{
+	struct bpf_local_storage *file_storage;
+	struct bpf_local_storage_map *smap;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_file(file);
+	if (!bsb)
+		return NULL;
+
+	file_storage = rcu_dereference(bsb->storage);
+	if (!file_storage)
+		return NULL;
+
+	smap = (struct bpf_local_storage_map *)map;
+	return bpf_local_storage_lookup(file_storage, smap, cacheit_lockit);
+}
+
+void bpf_file_storage_free(struct file *file)
+{
+	struct bpf_local_storage *local_storage;
+	struct bpf_local_storage_elem *selem;
+	bool free_file_storage = false;
+	struct bpf_storage_blob *bsb;
+	struct hlist_node *n;
+
+	bsb = bpf_file(file);
+	if (!bsb)
+		return;
+
+	rcu_read_lock();
+
+	local_storage = rcu_dereference(bsb->storage);
+	if (!local_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	raw_spin_lock_bh(&local_storage->lock);
+	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+		bpf_selem_unlink_map(selem);
+		free_file_storage = bpf_selem_unlink_storage_nolock(local_storage,
+								    selem, false);
+	}
+	raw_spin_unlock_bh(&local_storage->lock);
+	rcu_read_unlock();
+
+	if (free_file_storage)
+		kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_fd_file_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_local_storage_data *sdata;
+	struct file *file;
+	int fd;
+
+	fd = *(int *)key;
+	file = fget_raw(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	sdata = file_storage_lookup(file, map, true);
+	fput(file);
+	return sdata ? sdata->data : NULL;
+}
+
+static int bpf_fd_file_storage_update_elem(struct bpf_map *map, void *key,
+					   void *value, u64 map_flags)
+{
+	struct bpf_local_storage_data *sdata;
+	struct file *file;
+	int fd;
+
+	fd = *(int *)key;
+	file = fget_raw(fd);
+	if (!file)
+		return -EBADF;
+	if (!file_storage_ptr(file)) {
+		fput(file);
+		return -EBADF;
+	}
+
+	sdata = bpf_local_storage_update(file,
+					 (struct bpf_local_storage_map *)map,
+					 value, map_flags);
+	fput(file);
+	return PTR_ERR_OR_ZERO(sdata);
+}
+
+static int file_storage_delete(struct file *file, struct bpf_map *map)
+{
+	struct bpf_local_storage_data *sdata;
+
+	sdata = file_storage_lookup(file, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	bpf_selem_unlink(SELEM(sdata));
+
+	return 0;
+}
+
+static int bpf_fd_file_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct file *file;
+	int fd, err;
+
+	fd = *(int *)key;
+	file = fget_raw(fd);
+	if (!file)
+		return -EBADF;
+
+	err = file_storage_delete(file, map);
+	fput(file);
+	return err;
+}
+
+BPF_CALL_4(bpf_file_storage_get, struct bpf_map *, map, struct file *, file,
+	   void *, value, u64, flags)
+{
+	struct bpf_local_storage_data *sdata;
+
+	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return (unsigned long)NULL;
+
+	if (!file || !file_storage_ptr(file))
+		return (unsigned long)NULL;
+
+	sdata = file_storage_lookup(file, map, true);
+	if (sdata)
+		return (unsigned long)sdata->data;
+
+	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+		sdata = bpf_local_storage_update(
+			file, (struct bpf_local_storage_map *)map, value,
+			BPF_NOEXIST);
+		return IS_ERR(sdata) ? (unsigned long)NULL :
+					     (unsigned long)sdata->data;
+	}
+
+	return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_file_storage_delete, struct bpf_map *, map, struct file *, file)
+{
+	if (!file)
+		return -EINVAL;
+
+	return file_storage_delete(file, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static struct bpf_map *file_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&file_cache);
+	return &smap->map;
+}
+
+static void file_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&file_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap, NULL);
+}
+
+static int file_storage_map_btf_id;
+
+const struct bpf_map_ops file_storage_map_ops = {
+	.map_meta_equal        = bpf_map_meta_equal,
+	.map_alloc_check       = bpf_local_storage_map_alloc_check,
+	.map_alloc             = file_storage_map_alloc,
+	.map_free              = file_storage_map_free,
+	.map_get_next_key      = notsupp_get_next_key,
+	.map_lookup_elem       = bpf_fd_file_storage_lookup_elem,
+	.map_update_elem       = bpf_fd_file_storage_update_elem,
+	.map_delete_elem       = bpf_fd_file_storage_delete_elem,
+	.map_check_btf         = bpf_local_storage_map_check_btf,
+	.map_btf_name          = "bpf_local_storage_map",
+	.map_btf_id            = &file_storage_map_btf_id,
+	.map_owner_storage_ptr = file_storage_ptr,
+};
+
+BTF_ID_LIST_SINGLE(bpf_file_storage_btf_ids, struct, file)
+
+const struct bpf_func_proto bpf_file_storage_get_proto = {
+	.func        = bpf_file_storage_get,
+	.gpl_only    = false,
+	.ret_type    = RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type   = ARG_CONST_MAP_PTR,
+	.arg2_type   = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_file_storage_btf_ids[0],
+	.arg3_type   = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type   = ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_file_storage_delete_proto = {
+	.func        = bpf_file_storage_delete,
+	.gpl_only    = false,
+	.ret_type    = RET_INTEGER,
+	.arg1_type   = ARG_CONST_MAP_PTR,
+	.arg2_type   = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_file_storage_btf_ids[0],
+};
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 06062370c3b8..48c2022fd958 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -121,6 +121,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_bprm_opts_set_proto;
 	case BPF_FUNC_ima_inode_hash:
 		return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
+	case BPF_FUNC_file_storage_get:
+		return &bpf_file_storage_get_proto;
+	case BPF_FUNC_file_storage_delete:
+		return &bpf_file_storage_delete_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e50c0bfdb7d..946a85945776 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -783,7 +783,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
-		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
+		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_FILE_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f5a0077c9981..dc615658c92d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5390,6 +5390,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_task_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_FILE_STORAGE:
+		if (func_id != BPF_FUNC_file_storage_get &&
+		    func_id != BPF_FUNC_file_storage_delete)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -5473,6 +5478,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_file_storage_get:
+	case BPF_FUNC_file_storage_delete:
+		if (map->map_type != BPF_MAP_TYPE_FILE_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index e5971fa74fd7..faa70467db4d 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -13,6 +13,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = {
 	#undef LSM_HOOK
 	LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free),
 	LSM_HOOK_INIT(task_free, bpf_task_storage_free),
+	LSM_HOOK_INIT(file_free_security, bpf_file_storage_free),
 };
 
 static int __init bpf_lsm_init(void)
@@ -25,6 +26,7 @@ static int __init bpf_lsm_init(void)
 struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = {
 	.lbs_inode = sizeof(struct bpf_storage_blob),
 	.lbs_task = sizeof(struct bpf_storage_blob),
+	.lbs_file = sizeof(struct bpf_storage_blob),
 };
 
 DEFINE_LSM(bpf) = {
-- 
2.33.0


^ permalink raw reply related

* [PATCH bpf-next RFC v1 0/5] Implement file local storage
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module

This series implements a file local storage map for eBPF LSM programs. This
allows to tie lifetime of data in the map to an open file description (in POSIX
parlance). Like other local storage map types, lifetime of data is tied to the
struct file instance.

The main purpose is a general purpose map keyed by fd where the open file
underlying the fd (struct file *) serves as the key into the map. It is possible
to use struct file * from kernelspace, but sharing update access with userspace
means userspace has no way except kcmp-aring with another known fd with a key.
This is pretty wasteful.

It can also be used to treat the map as a set of files that have been added to
it, such that multiples sets can be looked up for matching purposes in O(1)
instead of O(n) using kcmp(2) from userspace (for same struct file *).

There are multiple other usecases served by this map. One of the motivating ones
is the ability to now implement a Capsicum [0] style capability based sandbox
using eBPF LSM, but the actual mechanism is much more generic and allows
applications to enforce rights of their own per open file that they delegate to
other users by conventional fd-passing on UNIX (dup/fork/SCM_RIGHTS).

The implementation is exactly the same as bpf_inode_storage, except some
modifications to use struct file * as the key instead of struct inode *.

[0]: https://www.usenix.org/legacy/event/sec10/tech/full_papers/Watson.pdf

Kumar Kartikeya Dwivedi (5):
  bpf: Implement file local storage
  tools: sync bpf.h header
  libbpf: Add bpf_probe_map_type support for file local storage
  tools: bpf: update bpftool for file_storage map
  tools: testing: Add selftest for file local storage map

 include/linux/bpf_lsm.h                       |  21 ++
 include/linux/bpf_types.h                     |   1 +
 include/uapi/linux/bpf.h                      |  39 +++
 kernel/bpf/Makefile                           |   2 +-
 kernel/bpf/bpf_file_storage.c                 | 244 ++++++++++++++++++
 kernel/bpf/bpf_lsm.c                          |   4 +
 kernel/bpf/syscall.c                          |   3 +-
 kernel/bpf/verifier.c                         |  10 +
 security/bpf/hooks.c                          |   2 +
 .../bpf/bpftool/Documentation/bpftool-map.rst |   2 +-
 tools/bpf/bpftool/bash-completion/bpftool     |   3 +-
 tools/bpf/bpftool/map.c                       |   3 +-
 tools/include/uapi/linux/bpf.h                |  39 +++
 tools/lib/bpf/libbpf_probes.c                 |   1 +
 .../bpf/prog_tests/test_local_storage.c       |  51 ++++
 .../selftests/bpf/progs/local_storage.c       |  23 ++
 16 files changed, 443 insertions(+), 5 deletions(-)
 create mode 100644 kernel/bpf/bpf_file_storage.c

-- 
2.33.0

^ permalink raw reply

* [PATCH v5 1/1] NAX LSM: Add initial support
From: Igor Zhbanov @ 2021-08-21  9:47 UTC (permalink / raw)
  To: linux-integrity, linux-security-module, Mimi Zohar, THOBY Simon,
	linux-kernel
In-Reply-To: <281927a3-7d3e-7aac-509d-9d3b1609b02b@gmail.com>

Add initial support for NAX (No Anonymous Execution), which is a Linux
Security Module that extends DAC by making impossible to make anonymous
and modified pages executable for privileged processes.

Intercepts anonymous executable pages created with mmap() and mprotect()
system calls.

Log violations (in non-quiet mode) and block the action or kill the
offending process, depending on the enabled settings.

See Documentation/admin-guide/LSM/NAX.rst.

Signed-off-by: Igor Zhbanov <izh1979@gmail.com>
---
 Documentation/admin-guide/LSM/NAX.rst         |  72 +++
 Documentation/admin-guide/LSM/index.rst       |   1 +
 .../admin-guide/kernel-parameters.rst         |   1 +
 .../admin-guide/kernel-parameters.txt         |  32 ++
 security/Kconfig                              |  11 +-
 security/Makefile                             |   2 +
 security/nax/Kconfig                          | 113 +++++
 security/nax/Makefile                         |   4 +
 security/nax/nax-lsm.c                        | 472 ++++++++++++++++++
 9 files changed, 703 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/admin-guide/LSM/NAX.rst
 create mode 100644 security/nax/Kconfig
 create mode 100644 security/nax/Makefile
 create mode 100644 security/nax/nax-lsm.c

diff --git a/Documentation/admin-guide/LSM/NAX.rst b/Documentation/admin-guide/LSM/NAX.rst
new file mode 100644
index 000000000000..da54b3be4cda
--- /dev/null
+++ b/Documentation/admin-guide/LSM/NAX.rst
@@ -0,0 +1,72 @@
+=======
+NAX LSM
+=======
+
+:Author: Igor Zhbanov
+
+NAX (No Anonymous Execution) is a Linux Security Module that extends DAC
+by making impossible to make anonymous and modified pages executable for
+processes. The module intercepts anonymous executable pages created with
+mmap() and mprotect() system calls.
+
+To select it at boot time, add ``nax`` to ``security`` kernel command-line
+parameter.
+
+The following sysctl parameters are available:
+
+* ``kernel.nax.check_all``:
+ - 0: Check all processes.
+ - 1: Check only privileged processes. The privileged process is a process
+      for which any of the following is true:
+      - ``uid  == 0``
+      - ``euid == 0``
+      - ``suid == 0``
+      - ``cap_effective`` has any capability except for the ones allowed
+        in ``kernel.nax.allowed_caps``
+      - ``cap_permitted`` has any capability except for the ones allowed
+        in ``kernel.nax.allowed_caps``
+
+ Checking of uid/euid/suid is important because a process may call seteuid(0)
+ to gain privileges (if SECURE_NO_SETUID_FIXUP secure bit is not set).
+
+* ``kernel.nax.allowed_caps``:
+
+ Hexadecimal number representing the set of capabilities a non-root
+ process can possess without being considered "privileged" by NAX LSM.
+
+ For the meaning of the capabilities bits and their value, please check
+ ``include/uapi/linux/capability.h`` and ``capabilities(7)`` manual page.
+
+ For example, ``CAP_SYS_PTRACE`` has a number 19. Therefore, to add it to
+ allowed capabilities list, we need to set 19'th bit (2^19 or 1 << 19)
+ or 80000 in hexadecimal form. Capabilities can be bitwise ORed.
+
+* ``kernel.nax.mode``:
+
+ - 0: Only log errors (when enabled by ``kernel.nax.quiet``) (default mode)
+ - 1: Forbid unsafe pages mappings (and log when enabled)
+ - 2: Kill the violating process (and log when enabled)
+
+* ``kernel.nax.quiet``:
+
+ - 0: Log violations (default)
+ - 1: Be quiet
+
+* ``kernel.nax.locked``:
+
+ - 0: Changing of the module's sysctl parameters is allowed
+ - 1: Further changing of the module's sysctl parameters is forbidden
+
+ Setting this parameter to ``1`` after initial setup during the system boot
+ will prevent the module disabling at the later time.
+
+There are matching kernel command-line parameters (with the same values):
+
+- ``nax_allowed_caps``
+- ``nax_check_all``
+- ``nax_mode``
+- ``nax_quiet``
+- ``nax_locked``
+
+The ``nax_locked`` command-line parameter must be specified last to avoid
+premature setting locking.
diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst
index a6ba95fbaa9f..e9df7fc9a461 100644
--- a/Documentation/admin-guide/LSM/index.rst
+++ b/Documentation/admin-guide/LSM/index.rst
@@ -42,6 +42,7 @@ subdirectories.
 
    apparmor
    LoadPin
+   NAX
    SELinux
    Smack
    tomoyo
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 01ba293a2d70..f4e91dc729f0 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -136,6 +136,7 @@ parameter is applicable::
 	MOUSE	Appropriate mouse support is enabled.
 	MSI	Message Signaled Interrupts (PCI).
 	MTD	MTD (Memory Technology Device) support is enabled.
+	NAX	NAX support is enabled.
 	NET	Appropriate network support is enabled.
 	NUMA	NUMA support is enabled.
 	NFS	Appropriate NFS support is enabled.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdb22006f713..10ed55e28d49 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3100,6 +3100,38 @@
 
 	n2=		[NET] SDL Inc. RISCom/N2 synchronous serial card
 
+	nax_allowed_caps= [NAX] Hexadecimal number representing the set of
+			capabilities a non-root process can possess without
+			being considered "privileged" by NAX LSM.
+
+			For the meaning of the capabilities bits and their
+			value, please check include/uapi/linux/capability.h
+			and capabilities(7) manual page.
+
+			For example, `CAP_SYS_PTRACE` has a number 19.
+			Therefore, to add it to allowed capabilities list,
+			we need to set 19'th bit (2^19 or 1 << 19) or 80000
+			in hexadecimal form. Capabilities can be bitwise ORed.
+
+	nax_check_all=	[NAX] NAX LSM processes checking mode:
+			0 - Check only privileged processes (default).
+			1 - Check all processes.
+
+	nax_locked=	[NAX] NAX LSM settings' locking mode:
+			0 - Changing NAX sysctl parameters is allowed.
+			1 - Changing NAX sysctl parameters is forbidden until
+			    reboot.
+
+	nax_mode=	[NAX] NAX LSM violation reaction mode:
+			0 - Only log errors (when not in quiet mode; default).
+			1 - Forbid unsafe pages mappings (and log when
+			    enabled).
+			2 - Kill the violating process (and log when enabled).
+
+	nax_quiet=	[NAX] NAX LSM log verbosity:
+			0 - Log messages to syslog.
+			1 - Be quiet.
+
 	netdev=		[NET] Network devices parameters
 			Format: <irq>,<io>,<mem_start>,<mem_end>,<name>
 			Note that mem_start is often overloaded to mean
diff --git a/security/Kconfig b/security/Kconfig
index 0ced7fd33e4d..771419647ae1 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -239,6 +239,7 @@ source "security/yama/Kconfig"
 source "security/safesetid/Kconfig"
 source "security/lockdown/Kconfig"
 source "security/landlock/Kconfig"
+source "security/nax/Kconfig"
 
 source "security/integrity/Kconfig"
 
@@ -278,11 +279,11 @@ endchoice
 
 config LSM
 	string "Ordered list of enabled LSMs"
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor,bpf" if DEFAULT_SECURITY_SMACK
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo,bpf" if DEFAULT_SECURITY_APPARMOR
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,tomoyo,bpf" if DEFAULT_SECURITY_TOMOYO
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,bpf" if DEFAULT_SECURITY_DAC
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf"
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor,bpf" if DEFAULT_SECURITY_SMACK
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo,bpf" if DEFAULT_SECURITY_APPARMOR
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,tomoyo,bpf" if DEFAULT_SECURITY_TOMOYO
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,bpf" if DEFAULT_SECURITY_DAC
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf"
 	help
 	  A comma-separated list of LSMs, in initialization order.
 	  Any LSMs left off this list will be ignored. This can be
diff --git a/security/Makefile b/security/Makefile
index 47e432900e24..5c261bbf4659 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -14,6 +14,7 @@ subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
 subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown
 subdir-$(CONFIG_BPF_LSM)		+= bpf
 subdir-$(CONFIG_SECURITY_LANDLOCK)	+= landlock
+subdir-$(CONFIG_SECURITY_NAX)		+= nax
 
 # always enable default capabilities
 obj-y					+= commoncap.o
@@ -34,6 +35,7 @@ obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown/
 obj-$(CONFIG_CGROUPS)			+= device_cgroup.o
 obj-$(CONFIG_BPF_LSM)			+= bpf/
 obj-$(CONFIG_SECURITY_LANDLOCK)		+= landlock/
+obj-$(CONFIG_SECURITY_NAX)		+= nax/
 
 # Object integrity file lists
 subdir-$(CONFIG_INTEGRITY)		+= integrity
diff --git a/security/nax/Kconfig b/security/nax/Kconfig
new file mode 100644
index 000000000000..71fc0175cfb2
--- /dev/null
+++ b/security/nax/Kconfig
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config SECURITY_NAX
+	bool "NAX support"
+	depends on SECURITY
+	help
+	  This selects NAX (No Anonymous Execution), which extends DAC
+	  support with additional system-wide security settings beyond
+	  regular Linux discretionary access controls. Currently, the only
+	  available behavior is restricting the execution of anonymous and
+	  modified pages.
+
+	  The module can restrict either privileged or all processes,
+	  depending on the settings. It is possible to configure action,
+	  performed when the violation is detected (log, log + block,
+	  log + kill).
+
+	  Further information can be found in
+	  Documentation/admin-guide/LSM/NAX.rst.
+
+	  If you are unsure how to answer this question, answer N.
+
+choice
+	prompt "NAX violation action mode"
+	default SECURITY_NAX_MODE_LOG
+	depends on SECURITY_NAX
+	help
+	  Select the NAX violation action mode.
+
+	  In the default permissive mode the violations are only logged
+	  (if logging is not suppressed). In the enforcing mode the violations
+	  are prohibited. And in the kill mode the process is terminated.
+
+	  The value can be overridden at boot time with the kernel command-line
+	  parameter "nax_mode=" (0, 1, 2) or "kernel.nax.mode=" (0, 1, 2)
+	  sysctl parameter (if the settings are not locked).
+
+	config SECURITY_NAX_MODE_LOG
+		bool "Permissive mode"
+		help
+		  In this mode violations are only logged (if logging is not
+		  suppressed by the "kernel.nax.quiet" parameter). The
+		  violating system call will not be prohibited.
+	config SECURITY_NAX_MODE_ENFORCING
+		bool "Enforcing mode"
+		help
+		  In this mode violations are prohibited and logged (if
+		  logging is not suppressed by the "kernel.nax.quiet"
+		  parameter). The violating system call will return -EACCES
+		  error.
+	config SECURITY_NAX_MODE_KILL
+		bool "Kill mode"
+		help
+		  In this mode the violating process is terminated on the
+		  first violation system call. The violation event is logged
+		  (if logging is not suppressed by the "kernel.nax.quiet"
+		  parameter).
+endchoice
+
+config SECURITY_NAX_MODE
+	int
+	depends on SECURITY_NAX
+	default 0 if SECURITY_NAX_MODE_LOG
+	default 1 if SECURITY_NAX_MODE_ENFORCING
+	default 2 if SECURITY_NAX_MODE_KILL
+
+config SECURITY_NAX_CHECK_ALL
+	bool "Check all processes"
+	depends on SECURITY_NAX
+	help
+	  If selected, NAX will check all processes. If not selected, NAX
+	  will check only privileged processes (which is determined either
+	  by having zero uid, euid, suid or fsuid; or by possessing
+	  capabilities outside of allowed set).
+
+	  The value can also be overridden at boot time with the kernel
+	  command-line parameter "nax_check_all=" (0, 1) or
+	  "kernel.nax.check_all=" (0, 1) sysctl parameter (if the settings
+	  are not locked).
+
+config SECURITY_NAX_ALLOWED_CAPS
+	hex "Process capabilities ignored by NAX"
+	default 0x0
+	range 0x0 0xffffffffffff
+	depends on SECURITY_NAX
+	help
+	  Hexadecimal number representing the set of capabilities
+	  a non-root process can possess without being considered
+	  "privileged" by NAX LSM.
+
+	  The value can be overridden at boot time with the command-line
+	  parameter "nax_allowed_caps=" or "kernel.nax.allowed_caps=" sysctl
+	  parameter (if the settings are not locked).
+
+config SECURITY_NAX_QUIET
+	bool "Silence NAX messages"
+	depends on SECURITY_NAX
+	help
+	  If selected, NAX will not print violations.
+
+	  The value can be overridden at boot with the command-line
+	  parameter "nax_quiet=" (0, 1) or "kernel.nax.quiet=" (0, 1) sysctl
+	  parameter (if the settings are not locked).
+
+config SECURITY_NAX_LOCKED
+	bool "Lock NAX settings"
+	depends on SECURITY_NAX
+	help
+	  Prevent any update to the settings of the NAX LSM. This applies to
+	  both sysctl writes and the kernel command line.
+
+	  If not selected, it can be enabled at boot time with the kernel
+	  command-line parameter "nax_locked=1" or "kernel.nax_locked=1"
+	  sysctl parameter (if the settings are not locked).
diff --git a/security/nax/Makefile b/security/nax/Makefile
new file mode 100644
index 000000000000..9c3372210c77
--- /dev/null
+++ b/security/nax/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_SECURITY_NAX) := nax.o
+
+nax-y := nax-lsm.o
diff --git a/security/nax/nax-lsm.c b/security/nax/nax-lsm.c
new file mode 100644
index 000000000000..1bfdc65ed650
--- /dev/null
+++ b/security/nax/nax-lsm.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016-2021 Open Mobile Platform LLC.
+ *
+ * Written by: Igor Zhbanov <i.zhbanov@omp.ru, izh1979@gmail.com>
+ *
+ * NAX (No Anonymous Execution) Linux Security Module
+ * This module prevents execution of the code in anonymous or modified pages.
+ * For more details, see Documentation/admin-guide/LSM/NAX.rst and
+ * Documentation/admin-guide/kernel-parameters.rst
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "NAX: " fmt
+
+#include <linux/capability.h>
+#include <linux/cred.h>
+#include <linux/ctype.h>
+#include <linux/lsm_hooks.h>
+#include <linux/mman.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/securebits.h>
+#include <linux/security.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/uidgid.h>
+
+#define NAX_MODE_PERMISSIVE 0 /* Log only             */
+#define NAX_MODE_ENFORCING  1 /* Enforce and log      */
+#define NAX_MODE_KILL       2 /* Kill process and log */
+
+static int max_mode = NAX_MODE_KILL;
+
+static int mode      = CONFIG_SECURITY_NAX_MODE,
+	   quiet     = IS_ENABLED(CONFIG_SECURITY_NAX_QUIET),
+	   locked    = IS_ENABLED(CONFIG_SECURITY_NAX_LOCKED),
+	   check_all = IS_ENABLED(CONFIG_SECURITY_NAX_CHECK_ALL);
+
+#define ALLOWED_CAPS_HEX_LEN (_KERNEL_CAPABILITY_U32S * 8)
+
+static char allowed_caps_hex[ALLOWED_CAPS_HEX_LEN + 1];
+static kernel_cap_t __rcu *allowed_caps;
+DEFINE_SPINLOCK(allowed_caps_mutex);
+
+static bool
+is_interesting_process(void)
+{
+	bool ret = false;
+	const struct cred *cred;
+	kuid_t root_uid;
+	kernel_cap_t *caps;
+
+	if (check_all)
+		return true;
+
+	cred = current_cred();
+	root_uid = make_kuid(cred->user_ns, 0);
+
+	rcu_read_lock();
+	caps = rcu_dereference(allowed_caps);
+	/*
+	 * We count a process as interesting if it any of its uid/euid/suid
+	 * is zero (because it may call seteuid(0) to gain privileges) or
+	 * it has any not allowed capability (even in a user namespace)
+	 */
+	if ((!issecure(SECURE_NO_SETUID_FIXUP) &&
+	     (uid_eq(cred->uid,  root_uid) ||
+	      uid_eq(cred->euid, root_uid) ||
+	      uid_eq(cred->suid, root_uid))) ||
+	    (!cap_issubset(cred->cap_effective, *caps)) ||
+	    (!cap_issubset(cred->cap_permitted, *caps)))
+		ret = true;
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static void
+log_warn(const char *reason)
+{
+	if (quiet)
+		return;
+
+	pr_warn_ratelimited("%s: pid=%d, uid=%u, comm=\"%s\"\n",
+			    reason, current->pid,
+			    from_kuid(&init_user_ns, current_cred()->uid),
+				      current->comm);
+}
+
+static void
+kill_current_task(void)
+{
+	pr_warn("Killing pid=%d, uid=%u, comm=\"%s\"\n",
+		current->pid, from_kuid(&init_user_ns, current_cred()->uid),
+		current->comm);
+	force_sig(SIGKILL);
+}
+
+static int
+nax_mmap_file(struct file *file, unsigned long reqprot,
+	      unsigned long prot, unsigned long flags)
+{
+	int ret = 0;
+
+	if (mode == NAX_MODE_PERMISSIVE && quiet)
+		return 0; /* Skip further checks in this case */
+
+	if (!(prot & PROT_EXEC)) /* Not executable memory */
+		return 0;
+
+	if (!is_interesting_process())
+		return 0; /* Not interesting processes can do anything */
+
+	if (!file) { /* Anonymous executable memory */
+		log_warn("MMAP_ANON_EXEC");
+		ret = -EACCES;
+	} else if (prot & PROT_WRITE) { /* Mapping file RWX */
+		log_warn("MMAP_FILE_WRITE_EXEC");
+		ret = -EACCES;
+	}
+
+	if (ret && mode == NAX_MODE_KILL)
+		kill_current_task();
+
+	return (mode != NAX_MODE_PERMISSIVE) ? ret : 0;
+}
+
+static int
+nax_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+		  unsigned long prot)
+{
+	int ret = 0;
+
+	if (mode == NAX_MODE_PERMISSIVE && quiet)
+		return 0; /* Skip further checks in this case */
+
+	if (!(prot & PROT_EXEC)) /* Not executable memory */
+		return 0;
+
+	if (!is_interesting_process())
+		return 0; /* Not interesting processes can do anything */
+
+	if (!(vma->vm_flags & VM_EXEC)) {
+		if (vma->vm_start >= vma->vm_mm->start_brk &&
+		    vma->vm_end   <= vma->vm_mm->brk) {
+			log_warn("MPROTECT_EXEC_HEAP");
+			ret = -EACCES;
+		} else if (!vma->vm_file &&
+			   ((vma->vm_start <= vma->vm_mm->start_stack &&
+			     vma->vm_end   >= vma->vm_mm->start_stack) ||
+			    vma_is_stack_for_current(vma))) {
+			log_warn("MPROTECT_EXEC_STACK");
+			ret = -EACCES;
+		} else if (vma->vm_file && vma->anon_vma) {
+			/*
+			 * We are making executable a file mapping that has
+			 * had some COW done. Since pages might have been
+			 * written, check ability to execute the possibly
+			 * modified content. This typically should only
+			 * occur for text relocations.
+			 */
+			log_warn("MPROTECT_EXEC_MODIFIED");
+			ret = -EACCES;
+		}
+	}
+
+	if (!ret) {
+		if (!vma->vm_file) { /* Anonymous executable memory */
+			log_warn("MPROTECT_ANON_EXEC");
+			ret = -EACCES;
+		} else if (prot & PROT_WRITE) { /* Remapping file as RWX */
+			log_warn("MPROTECT_FILE_WRITE_EXEC");
+			ret = -EACCES;
+		}
+	}
+
+	if (ret && mode == NAX_MODE_KILL)
+		kill_current_task();
+
+	return (mode != NAX_MODE_PERMISSIVE) ? ret : 0;
+}
+
+static struct security_hook_list nax_hooks[] __lsm_ro_after_init = {
+	LSM_HOOK_INIT(mmap_file, nax_mmap_file),
+	LSM_HOOK_INIT(file_mprotect, nax_file_mprotect),
+};
+
+static void
+update_allowed_caps(kernel_cap_t *caps)
+{
+	kernel_cap_t *old_caps;
+
+	*caps = cap_intersect(*caps, CAP_FULL_SET); /* Drop unsupported */
+	spin_lock(&allowed_caps_mutex);
+	old_caps = rcu_dereference_protected(allowed_caps,
+					     lockdep_is_held(&allowed_caps_mutex));
+	rcu_assign_pointer(allowed_caps, caps);
+	spin_unlock(&allowed_caps_mutex);
+	synchronize_rcu();
+	kfree(old_caps);
+}
+
+static int
+set_default_allowed_caps(void)
+{
+	size_t i;
+	kernel_cap_t *caps;
+
+	caps = kmalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	CAP_FOR_EACH_U32(i)
+		caps->cap[i] = (CONFIG_SECURITY_NAX_ALLOWED_CAPS >> (i * 8)) &
+			       0xff;
+
+	update_allowed_caps(caps);
+	return 0;
+}
+
+static int
+parse_and_set_caps(char *str)
+{
+	size_t len, i;
+	kernel_cap_t *caps;
+
+	/* len is guaranteed not to exceed ALLOWED_CAPS_HEX_LEN */
+	len = strlen(str);
+	for (i = 0; i < len; i++)
+		if (!isxdigit(str[i]))
+			return -EINVAL;
+
+	caps = kmalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	CAP_FOR_EACH_U32(i) {
+		unsigned long l;
+
+		if (kstrtoul(str + (len >= 8 ? len - 8 : 0), 16, &l))
+			return -EINVAL;
+
+		caps->cap[i] = l;
+		if (len < 8)
+			break;
+
+		len -= 8;
+		str[len] = '\0';
+	}
+
+	update_allowed_caps(caps);
+	return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+
+static int
+nax_dointvec_minmax(struct ctl_table *table, int write,
+		    void *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (write && (!capable(CAP_SYS_ADMIN) || locked))
+		return -EPERM;
+
+	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+
+static int
+nax_dostring(struct ctl_table *table, int write, void *buffer,
+	     size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	if (write) { /* A user is setting the allowed capabilities */
+		int error;
+		char *buf = (char *)buffer;
+		size_t len = *lenp;
+
+		if (!capable(CAP_SYS_ADMIN) || locked)
+			return -EPERM;
+
+		/* Do not allow trailing garbage or excessive length */
+		if (len == ALLOWED_CAPS_HEX_LEN + 1) {
+			if (buf[--len] != '\n')
+				return -EINVAL;
+		} else if (len > ALLOWED_CAPS_HEX_LEN || len <= 0) {
+			return -EINVAL;
+		}
+
+		error = proc_dostring(table, write, buffer, lenp, ppos);
+		if (error)
+			return error;
+
+		ret = parse_and_set_caps(allowed_caps_hex);
+	} else { /* A user is getting the allowed capabilities */
+		unsigned int i;
+		kernel_cap_t *caps;
+
+		rcu_read_lock();
+		caps = rcu_dereference(allowed_caps);
+		CAP_FOR_EACH_U32(i)
+			snprintf(allowed_caps_hex + i * 8, 9, "%08x",
+				 caps->cap[CAP_LAST_U32 - i]);
+
+		rcu_read_unlock();
+		ret = proc_dostring(table, write, buffer, lenp, ppos);
+	}
+
+	return ret;
+}
+
+struct ctl_path nax_sysctl_path[] = {
+	{ .procname = "kernel" },
+	{ .procname = "nax"    },
+	{ }
+};
+
+static struct ctl_table nax_sysctl_table[] = {
+	{
+		.procname     = "allowed_caps",
+		.data         = allowed_caps_hex,
+		.maxlen       = ALLOWED_CAPS_HEX_LEN + 1,
+		.mode         = 0644,
+		.proc_handler = nax_dostring,
+	}, {
+		.procname     = "check_all",
+		.data         = &check_all,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE,
+	}, {
+		.procname     = "locked",
+		.data         = &locked,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE,
+	}, {
+		.procname     = "mode",
+		.data         = &mode,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = &max_mode,
+	}, {
+		.procname     = "quiet",
+		.data         = &quiet,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE,
+	},
+	{ }
+};
+
+static void __init
+nax_init_sysctl(void)
+{
+	if (!register_sysctl_paths(nax_sysctl_path, nax_sysctl_table))
+		panic("NAX: sysctl registration failed.\n");
+}
+
+#else /* !CONFIG_SYSCTL */
+
+static inline void
+nax_init_sysctl(void)
+{
+
+}
+
+#endif /* !CONFIG_SYSCTL */
+
+static int __init setup_allowed_caps(char *str)
+{
+	if (locked)
+		return 1;
+
+	/* Do not allow trailing garbage or excessive length */
+	if (strlen(str) > ALLOWED_CAPS_HEX_LEN) {
+		pr_err("Invalid 'nax_allowed_caps' parameter value (%s)\n",
+		       str);
+		return 1;
+	}
+
+	strscpy(allowed_caps_hex, str, sizeof(allowed_caps_hex));
+	if (parse_and_set_caps(allowed_caps_hex))
+		pr_err("Invalid 'nax_allowed_caps' parameter value (%s)\n",
+		       str);
+
+	return 1;
+}
+__setup("nax_allowed_caps=", setup_allowed_caps);
+
+static int __init setup_check_all(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val))
+		check_all = val ? 1 : 0;
+
+	return 1;
+}
+__setup("nax_quiet=", setup_check_all);
+
+static int __init setup_locked(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val))
+		locked = val ? 1 : 0;
+
+	return 1;
+}
+__setup("nax_locked=", setup_locked);
+
+static int __init setup_mode(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val)) {
+		if (val > max_mode) {
+			pr_err("Invalid 'nax_mode' parameter value (%s)\n",
+			       str);
+			val = max_mode;
+		}
+
+		mode = val;
+	}
+
+	return 1;
+}
+__setup("nax_mode=", setup_mode);
+
+static int __init setup_quiet(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val))
+		quiet = val ? 1 : 0;
+
+	return 1;
+}
+__setup("nax_quiet=", setup_quiet);
+
+static __init int
+nax_init(void)
+{
+	int rc;
+
+	pr_info("Starting.\n");
+	rc = set_default_allowed_caps();
+	if (rc < 0)
+		return rc;
+
+	security_add_hooks(nax_hooks, ARRAY_SIZE(nax_hooks), "nax");
+	nax_init_sysctl();
+
+	return 0;
+}
+
+DEFINE_LSM(nax) = {
+	.name = "nax",
+	.init = nax_init,
+};
-- 
2.26.2



^ permalink raw reply related

* [PATCH v5 0/1] NAX (No Anonymous Execution) LSM
From: Igor Zhbanov @ 2021-08-21  9:46 UTC (permalink / raw)
  To: linux-integrity, linux-security-module, Mimi Zohar, THOBY Simon,
	linux-kernel

[Overview]

Fileless malware attacks are becoming more and more popular, and even
ready-to-use frameworks are available [1], [2], [3]. They are based on
running of the malware code from anonymous executable memory pages (which
are not backed by an executable file or a library on a filesystem.) This
allows effectively hiding malware presence in a system, making filesystem
integrity checking tools unable to detect the intrusion.

Typically, the malware first needs to intercept the execution flow (e.g.,
by the means of ROP-based exploit). Then it needs to download the main
part (in the form of normal executable or library) from its server,
because it is hard to implement the entire exploit in ROP-based form.
There are a number of security mechanisms that can ensure the integrity
of the file-system, but we need to ensure the integrity of the code in
memory too, to be sure, that only authorized code is running in the
system.

The proposed LSM is preventing the creation of anonymous executable pages
for the processes. The LSM intercepts mmap() and mprotect() system calls
and handles it similarly to SELinux handlers.

The module allows to block the violating system call or to kill the
violating process, depending on the settings, along with rate-limited
logging.

Currently, the module restricts ether all processes or only the
privileged processes, depending on the settings. The privileged process
is a process for which any of the following is true:
+ uid   == 0 && !issecure(SECURE_NOROOT)
+ euid  == 0 && !issecure(SECURE_NOROOT)
+ suid  == 0 && !issecure(SECURE_NOROOT)
+ cap_effective has any capability except of kernel.nax.allowed_caps
+ cap_permitted has any capability except of kernel.nax.allowed_caps

Checking of uid/euid/suid is important because a process may call
seteuid(0) to gain privileges (if SECURE_NO_SETUID_FIXUP secure bit
is not set).

The sysctl parameter kernel.nax.allowed_caps allows to define safe
capabilities set for the privileged processes.

[JIT]

Because of blocked anonymous code execution, JIT-compiled code, some
interpreters (which are using JIT) and libffi-based projects can be
broken.

Our observation shows that such processes are typically running by a
user, so they will not be privileged, so they will be allowed to use
anonymous executable pages.

But for small embedded set-ups it could be possible to get rid of such
processes at all, so the module could be enabled without further
restrictions to protect both privileged and non-privileged processes.

In addition, libffi can be modified not to use anonymous executable
pages.

[Similar implementations]

Although SELinux could be used to enable similar functionality, this LSM
is simpler. It could be used in set-ups, where SELinux would be overkill.

There is also SARA LSM module, which solves similar task, but it is more
complex.

[Cooperation with other security mechanisms]

NAX LSM is more useful in conjunction with IMA. IMA would be responsible
for integrity checking of file-based executables and libraries, and
NAX LSM would be responsible for preventing of anonymous code execution.

Alternatively, NAX LSM can be used with read-only root file system,
protected by dm-verity/fs-verity.

[TODO]
- Implement xattrs support for marking privileged binaries on a per-file
  basis and protect them with EVM.
- Store NAX attributes in the per-task LSM blob to implement special
  launchers for the privileged processes, so all of the children processes
  of such a launcher would be allowed to have anonymous executable pages
  (but not to grandchildren).

[Links]

[1] https://blog.fbkcs.ru/elf-in-memory-execution/
[2] https://magisterquis.github.io/2018/03/31/in-memory-only-elf-execution.html
[3] https://www.prodefence.org/fireelf-fileless-linux-malware-framework/

[Credits]

Thanks to Mimi Zohar for consulting and to Simon Thoby and Randy Dunlap for
thorough review.

[Changelog]

V5
- Move max_mode out of #ifdef scope to fix build.

V4
- Fix indentation issues and typos in Kconfig.

V3
- Fix memory leak in allowed_caps assigning code.
- Protect allowed_caps updating with a spinlock.
- Fix Kconfig options description.
- Add example for allowed_caps value.
- Fix typo in documentation.

V2
- Fixed typo in Kconfig.
- Fixed "cap_effective" and "cap_permitted" parameters description in NAX.rst.
- Added "nax_allowed_caps" setup parameter. Factored out capabilities parsing
  logic.
- Added parameter for checking all processes (not only privileged).
- Added Kconfig parameter for setting allowed capabilities.
- Updated nax_file_mprotect() to avoid calling of nax_mmap_file() to avoid
  duplicated checks.
- Protect allowed_caps with RCU.
- Fixed all errors and most warning found by checkpatch.pl.
- Updated the module documentation. Added description of the boot parameters to
  kernel-parameters.
- Updated commit message.

V1:
- Initial implementation.

Igor Zhbanov (1):
  NAX LSM: Add initial support

 Documentation/admin-guide/LSM/NAX.rst         |  72 +++
 Documentation/admin-guide/LSM/index.rst       |   1 +
 .../admin-guide/kernel-parameters.rst         |   1 +
 .../admin-guide/kernel-parameters.txt         |  32 ++
 security/Kconfig                              |  11 +-
 security/Makefile                             |   2 +
 security/nax/Kconfig                          | 113 +++++
 security/nax/Makefile                         |   4 +
 security/nax/nax-lsm.c                        | 472 ++++++++++++++++++
 9 files changed, 703 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/admin-guide/LSM/NAX.rst
 create mode 100644 security/nax/Kconfig
 create mode 100644 security/nax/Makefile
 create mode 100644 security/nax/nax-lsm.c

-- 
2.26.2

Igor Zhbanov (1):
  NAX LSM: Add initial support

 Documentation/admin-guide/LSM/NAX.rst         |  72 +++
 Documentation/admin-guide/LSM/index.rst       |   1 +
 .../admin-guide/kernel-parameters.rst         |   1 +
 .../admin-guide/kernel-parameters.txt         |  32 ++
 security/Kconfig                              |  11 +-
 security/Makefile                             |   2 +
 security/nax/Kconfig                          | 113 +++++
 security/nax/Makefile                         |   4 +
 security/nax/nax-lsm.c                        | 472 ++++++++++++++++++
 9 files changed, 703 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/admin-guide/LSM/NAX.rst
 create mode 100644 security/nax/Kconfig
 create mode 100644 security/nax/Makefile
 create mode 100644 security/nax/nax-lsm.c

base-commit: 7c60610d476766e128cc4284bb6349732cbd6606
-- 
2.26.2

^ permalink raw reply

* Re: [PATCH v4 1/1] NAX LSM: Add initial support
From: kernel test robot @ 2021-08-21  5:22 UTC (permalink / raw)
  To: Igor Zhbanov, linux-integrity, linux-security-module, Mimi Zohar,
	THOBY Simon, linux-kernel
  Cc: kbuild-all
In-Reply-To: <cddb2e03-794d-b126-10aa-3670607bf477@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2518 bytes --]

Hi Igor,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux/master]
[also build test ERROR on linus/master security/next-testing v5.14-rc6 next-20210820]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Igor-Zhbanov/NAX-No-Anonymous-Execution-LSM/20210820-231531
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830
config: microblaze-buildonly-randconfig-r003-20210821 (attached as .config)
compiler: microblaze-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/6c93ee3871fae69975f9fc3c41c0f65743ea7ac8
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Igor-Zhbanov/NAX-No-Anonymous-Execution-LSM/20210820-231531
        git checkout 6c93ee3871fae69975f9fc3c41c0f65743ea7ac8
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=microblaze SHELL=/bin/bash security/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   security/nax/nax-lsm.c: In function 'setup_mode':
>> security/nax/nax-lsm.c:429:27: error: 'max_mode' undeclared (first use in this function)
     429 |                 if (val > max_mode) {
         |                           ^~~~~~~~
   security/nax/nax-lsm.c:429:27: note: each undeclared identifier is reported only once for each function it appears in


vim +/max_mode +429 security/nax/nax-lsm.c

   423	
   424	static int __init setup_mode(char *str)
   425	{
   426		unsigned long val;
   427	
   428		if (!locked && !kstrtoul(str, 0, &val)) {
 > 429			if (val > max_mode) {
   430				pr_err("Invalid 'nax_mode' parameter value (%s)\n",
   431				       str);
   432				val = max_mode;
   433			}
   434	
   435			mode = val;
   436		}
   437	
   438		return 1;
   439	}
   440	__setup("nax_mode=", setup_mode);
   441	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31432 bytes --]

^ permalink raw reply

* Re: [PATCH v28 22/25] Audit: Add record for multiple process LSM attributes
From: Casey Schaufler @ 2021-08-20 23:48 UTC (permalink / raw)
  To: Paul Moore
  Cc: casey.schaufler, James Morris, linux-security-module, selinux,
	linux-audit, keescook, john.johansen, penguin-kernel,
	Stephen Smalley, Casey Schaufler
In-Reply-To: <93d97b1e-d3ea-0fe0-f0c2-62db09d01889@schaufler-ca.com>

On 8/20/2021 12:17 PM, Casey Schaufler wrote:
> On 8/20/2021 12:06 PM, Paul Moore wrote:
>> On Thu, Aug 19, 2021 at 6:41 PM Casey Schaufler <casey@schaufler-ca.com> wrote:
>>> On 8/18/2021 5:56 PM, Casey Schaufler wrote:
>>>> On 8/18/2021 5:47 PM, Paul Moore wrote:
>>>>> ...
>>>>> I just spent a few minutes tracing the code paths up from audit
>>>>> through netlink and then through the socket layer and I'm not seeing
>>>>> anything obvious where the path differs from any other syscall;
>>>>> current->audit_context *should* be valid just like any other syscall.
>>>>> However, I do have to ask, are you only seeing these audit records
>>>>> with a current->audit_context equal to NULL during early boot?
>>>> Nope. Sorry.
>>> It looks as if all of the NULL audit_context cases are for either
>>> auditd or systemd. Given what the events are, this isn't especially
>>> surprising.
>> I think we may be back to the "early boot" theory.
>>
>> Unless you explicitly enable audit on the kernel cmdline, e.g.
>> "audit=1", processes started before userspace enables audit will not
>> have a properly allocated audit_context; see the "if
>> (likely(!audit_ever_enabled))" check at the top of audit_alloc() for
>> the reason why.

I found a hack-around that no one will like. I changed that check to be

(likely(!audit_ever_enabled) && !lsm_multiple_contexts())

It probably introduces a memory leak and/or performance degradation,
but it has the desired affect.


>>
>> I could be wrong here, but I suspect if you add "audit=1" to your
>> kernel command line those remaining cases of NULL audit_contexts will
>> resolve themselves.  If not, we still have work to do ... well, I mean
>> we still have (different) work to do even if this solves the mystery,
>> it's just that we can now explain what you are seeing :)
> Yup, adding "audit=1" to the command line appears to have gotten
> systemd an audit context. It looks like user space enabling audit
> doesn't assign an audit context to the existing systemd process.
>


^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Tim Harvey @ 2021-08-20 21:19 UTC (permalink / raw)
  To: Ahmad Fatoum
  Cc: David Gstir, Aymen Sghaier, Mimi Zohar, Jan Luebbe, keyrings,
	Steffen Trumtrar, linux-security-module, Udit Agarwal, Herbert Xu,
	Horia Geantă, Richard Weinberger, James Morris, Eric Biggers,
	Serge E. Hallyn, Sumit Garg, James Bottomley, Franck LENORMAND,
	David Howells, open list, Jarkko Sakkinen, linux-crypto,
	Sascha Hauer, linux-integrity, David S. Miller
In-Reply-To: <9200d46d-94a2-befd-e9b0-93036e56eb8a@pengutronix.de>

On Fri, Aug 20, 2021 at 1:36 PM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>
> On 20.08.21 22:20, Tim Harvey wrote:
> > On Fri, Aug 20, 2021 at 9:20 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
> >> On 20.08.21 17:39, Tim Harvey wrote:
> >>> Thanks for your work!
> >>>
> >>> I've been asked to integrate the capability of using CAAM to
> >>> blob/deblob data to an older 5.4 kernel such as NXP's downstream
> >>> vendor kernel does [1] and I'm trying to understand how your series
> >>> works. I'm not at all familiar with the Linux Key Management API's or
> >>> trusted keys. Can you provide an example of how this can be used for
> >>> such a thing?
> >>
> >> Here's an example with dm-crypt:
> >>
> >>   https://lore.kernel.org/linux-integrity/5d44e50e-4309-830b-79f6-f5d888b1ef69@pengutronix.de/
> >>
> >> dm-crypt is a bit special at the moment, because it has direct support for
> >> trusted keys. For interfacing with other parts of the kernel like ecryptfs
> >> or EVM, you have to create encrypted keys rooted to the trusted keys and use
> >> those. The kernel documentation has an example:
> >>
> >>   https://www.kernel.org/doc/html/v5.13/security/keys/trusted-encrypted.html
> >>
> >> If you backport this series, you can include the typo fix spotted by David.
> >>
> >> I'll send out a revised series, but given that a regression fix I want to
> >> rebase on hasn't been picked up for 3 weeks now, I am not in a hurry.
> >>
> > Thanks for the reference.
> >
> > I'm still trying to understand the keyctl integration with caam. For
> > the 'data' param to keyctl you are using tings like 'new <len>' and
> > 'load <data>'. Where are these 'commands' identified?
>
> Search for match_table_t in security/keys/trusted-keys/trusted_core.c
>
> > I may still be missing something. I'm using 4.14-rc6 with your series
> > and seeing the following:
>
> That's an odd version to backport stuff to..
>
> > # cat /proc/cmdline
> > trusted.source=caam
> > # keyctl add trusted mykey 'new 32' @s)# create new trusted key named
> > 'mykey' of 32 bytes in the session keyring
> > 480104283
> > # keyctl print 480104283 # dump the key
> > keyctl_read_alloc: Unknown error 126
> > ^^^ not clear what this is
>
> Not sure what returns -ENOKEY for you. I haven't been using trusted
> keys on v4.14, but you can try tracing the keyctl syscall.

yikes... that would be painful. I typo'd and meant 5.14-rc6 :) I'm
working with mainline first to make sure I understand everything. If I
backport this it would be to 5.4 but that looks to be extremely
painful. It looks like there was a lot of activity around trusted keys
in 5.13.

It works for a user keyring but not a session keyring... does that
explain anything?
# keyctl add trusted mykey 'new 32' @u
941210782
# keyctl print 941210782
83b7845cb45216496aead9ee2c6a406f587d64aad47bddc539d8947a247e618798d9306b36398b5dc2722a4c3f220a3a763ee175f6bd64758fdd49ca4db597e8ce328121b60edbba9b8d8d55056be896
# keyctl add trusted mykey 'new 32' @s
310571960
# keyctl print 310571960
keyctl_read_alloc: Unknown error 126

Sorry, I'm still trying to wrap my head around the differences in
keyrings and trusted vs user keys.

Tim

^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Ahmad Fatoum @ 2021-08-20 20:36 UTC (permalink / raw)
  To: Tim Harvey
  Cc: David Gstir, Aymen Sghaier, Mimi Zohar, Jan Luebbe, keyrings,
	Steffen Trumtrar, linux-security-module, Udit Agarwal, Herbert Xu,
	Horia Geantă, Richard Weinberger, James Morris, Eric Biggers,
	Serge E. Hallyn, Sumit Garg, James Bottomley, Franck LENORMAND,
	David Howells, open list, Jarkko Sakkinen, linux-crypto,
	Sascha Hauer, linux-integrity, David S. Miller
In-Reply-To: <CAJ+vNU225mgHHg00r67f1L6bEub+_h55hCBAMhCq2rd8kWU-qg@mail.gmail.com>

On 20.08.21 22:20, Tim Harvey wrote:
> On Fri, Aug 20, 2021 at 9:20 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>> On 20.08.21 17:39, Tim Harvey wrote:
>>> Thanks for your work!
>>>
>>> I've been asked to integrate the capability of using CAAM to
>>> blob/deblob data to an older 5.4 kernel such as NXP's downstream
>>> vendor kernel does [1] and I'm trying to understand how your series
>>> works. I'm not at all familiar with the Linux Key Management API's or
>>> trusted keys. Can you provide an example of how this can be used for
>>> such a thing?
>>
>> Here's an example with dm-crypt:
>>
>>   https://lore.kernel.org/linux-integrity/5d44e50e-4309-830b-79f6-f5d888b1ef69@pengutronix.de/
>>
>> dm-crypt is a bit special at the moment, because it has direct support for
>> trusted keys. For interfacing with other parts of the kernel like ecryptfs
>> or EVM, you have to create encrypted keys rooted to the trusted keys and use
>> those. The kernel documentation has an example:
>>
>>   https://www.kernel.org/doc/html/v5.13/security/keys/trusted-encrypted.html
>>
>> If you backport this series, you can include the typo fix spotted by David.
>>
>> I'll send out a revised series, but given that a regression fix I want to
>> rebase on hasn't been picked up for 3 weeks now, I am not in a hurry.
>>
> Thanks for the reference.
> 
> I'm still trying to understand the keyctl integration with caam. For
> the 'data' param to keyctl you are using tings like 'new <len>' and
> 'load <data>'. Where are these 'commands' identified?

Search for match_table_t in security/keys/trusted-keys/trusted_core.c

> I may still be missing something. I'm using 4.14-rc6 with your series
> and seeing the following:

That's an odd version to backport stuff to..

> # cat /proc/cmdline
> trusted.source=caam
> # keyctl add trusted mykey 'new 32' @s)# create new trusted key named
> 'mykey' of 32 bytes in the session keyring
> 480104283
> # keyctl print 480104283 # dump the key
> keyctl_read_alloc: Unknown error 126
> ^^^ not clear what this is

Not sure what returns -ENOKEY for you. I haven't been using trusted
keys on v4.14, but you can try tracing the keyctl syscall.

Cheers,
Ahmad

> 
> Best regards,
> 
> Tim
> 


-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Tim Harvey @ 2021-08-20 20:20 UTC (permalink / raw)
  To: Ahmad Fatoum
  Cc: David Gstir, Aymen Sghaier, Mimi Zohar, Jan Luebbe, keyrings,
	Steffen Trumtrar, linux-security-module, Udit Agarwal, Herbert Xu,
	Horia Geantă, Richard Weinberger, James Morris, Eric Biggers,
	Serge E. Hallyn, Sumit Garg, James Bottomley, Franck LENORMAND,
	David Howells, open list, Jarkko Sakkinen, linux-crypto,
	Sascha Hauer, linux-integrity, David S. Miller
In-Reply-To: <2b48a848-d70b-9c43-5ca0-9ab72622ed12@pengutronix.de>

On Fri, Aug 20, 2021 at 9:20 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>
> Hello Tim,
>
> On 20.08.21 17:39, Tim Harvey wrote:
> > On Wed, Jul 21, 2021 at 9:49 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
> >>
> >> Series applies on top of
> >> https://lore.kernel.org/linux-integrity/20210721160258.7024-1-a.fatoum@pengutronix.de/T/#u
> >>
> >> v2 -> v3:
> >>  - Split off first Kconfig preparation patch. It fixes a regression,
> >>    so sent that out, so it can be applied separately (Sumit)
> >>  - Split off second key import patch. I'll send that out separately
> >>    as it's a development aid and not required within the CAAM series
> >>  - add MAINTAINERS entry
> >>
> >> v1 -> v2:
> >>  - Added new commit to make trusted key Kconfig option independent
> >>    of TPM and added new Kconfig file for trusted keys
> >>  - Add new commit for importing existing key material
> >>  - Allow users to force use of kernel RNG (Jarkko)
> >>  - Enforce maximum keymod size (Horia)
> >>  - Use append_seq_(in|out)_ptr_intlen instead of append_seq_(in|out)_ptr
> >>    (Horia)
> >>  - Make blobifier handle private to CAAM glue code file (Horia)
> >>  - Extend trusted keys documentation for CAAM
> >>  - Rebased and updated original cover letter:
> >>
> >> The Cryptographic Acceleration and Assurance Module (CAAM) is an IP core
> >> built into many newer i.MX and QorIQ SoCs by NXP.
> >>
> >> Its blob mechanism can AES encrypt/decrypt user data using a unique
> >> never-disclosed device-specific key.
> >>
> >> There has been multiple discussions on how to represent this within the kernel:
> >>
> >> The Cryptographic Acceleration and Assurance Module (CAAM) is an IP core
> >> built into many newer i.MX and QorIQ SoCs by NXP.
> >>
> >> Its blob mechanism can AES encrypt/decrypt user data using a unique
> >> never-disclosed device-specific key. There has been multiple
> >> discussions on how to represent this within the kernel:
> >>
> >>  - [RFC] crypto: caam - add red blobifier
> >>    Steffen implemented[1] a PoC sysfs driver to start a discussion on how to
> >>    best integrate the blob mechanism.
> >>    Mimi suggested that it could be used to implement trusted keys.
> >>    Trusted keys back then were a TPM-only feature.
> >>
> >>  - security/keys/secure_key: Adds the secure key support based on CAAM.
> >>    Udit added[2] a new "secure" key type with the CAAM as backend. The key
> >>    material stays within the kernel only.
> >>    Mimi and James agreed that this needs a generic interface, not specific
> >>    to CAAM. Mimi suggested trusted keys. Jan noted that this could serve as
> >>    basis for TEE-backed keys.
> >>
> >>  - [RFC] drivers: crypto: caam: key: Add caam_tk key type
> >>    Franck added[3] a new "caam_tk" key type based on Udit's work. This time
> >>    it uses CAAM "black blobs" instead of "red blobs", so key material stays
> >>    within the CAAM and isn't exposed to kernel in plaintext.
> >>    James voiced the opinion that there should be just one user-facing generic
> >>    wrap/unwrap key type with multiple possible handlers.
> >>    David suggested trusted keys.
> >>
> >>  - Introduce TEE based Trusted Keys support
> >>    Sumit reworked[4] trusted keys to support multiple possible backends with
> >>    one chosen at boot time and added a new TEE backend along with TPM.
> >>    This now sits in Jarkko's master branch to be sent out for v5.13
> >>
> >> This patch series builds on top of Sumit's rework to have the CAAM as yet another
> >> trusted key backend.
> >>
> >> The CAAM bits are based on Steffen's initial patch from 2015. His work had been
> >> used in the field for some years now, so I preferred not to deviate too much from it.
> >>
> >> This series has been tested with dmcrypt[5] on an i.MX6DL.
> >>
> >> Looking forward to your feedback.
> >>
> >> Cheers,
> >> Ahmad
> >>
> >>  [1]: https://lore.kernel.org/linux-crypto/1447082306-19946-2-git-send-email-s.trumtrar@pengutronix.de/
> >>  [2]: https://lore.kernel.org/linux-integrity/20180723111432.26830-1-udit.agarwal@nxp.com/
> >>  [3]: https://lore.kernel.org/lkml/1551456599-10603-2-git-send-email-franck.lenormand@nxp.com/
> >>  [4]: https://lore.kernel.org/lkml/1604419306-26105-1-git-send-email-sumit.garg@linaro.org/
> >>  [5]: https://lore.kernel.org/linux-integrity/20210122084321.24012-2-a.fatoum@pengutronix.de/
> >>
> >> ---
> >> To: Jarkko Sakkinen <jarkko@kernel.org>
> >> To: "Horia Geantă" <horia.geanta@nxp.com>
> >> To: Mimi Zohar <zohar@linux.ibm.com>
> >> To: Aymen Sghaier <aymen.sghaier@nxp.com>
> >> To: Herbert Xu <herbert@gondor.apana.org.au>
> >> To: "David S. Miller" <davem@davemloft.net>
> >> To: James Bottomley <jejb@linux.ibm.com>
> >> Cc: David Howells <dhowells@redhat.com>
> >> Cc: James Morris <jmorris@namei.org>
> >> Cc: "Serge E. Hallyn" <serge@hallyn.com>
> >> Cc: Steffen Trumtrar <s.trumtrar@pengutronix.de>
> >> Cc: Udit Agarwal <udit.agarwal@nxp.com>
> >> Cc: Jan Luebbe <j.luebbe@pengutronix.de>
> >> Cc: David Gstir <david@sigma-star.at>
> >> Cc: Eric Biggers <ebiggers@kernel.org>
> >> Cc: Richard Weinberger <richard@nod.at>
> >> Cc: Franck LENORMAND <franck.lenormand@nxp.com>
> >> Cc: Sumit Garg <sumit.garg@linaro.org>
> >> Cc: linux-integrity@vger.kernel.org
> >> Cc: keyrings@vger.kernel.org
> >> Cc: linux-crypto@vger.kernel.org
> >> Cc: linux-kernel@vger.kernel.org
> >> Cc: linux-security-module@vger.kernel.org
> >>
> >> Ahmad Fatoum (4):
> >>   KEYS: trusted: allow users to use kernel RNG for key material
> >>   KEYS: trusted: allow trust sources to use kernel RNG for key material
> >>   crypto: caam - add in-kernel interface for blob generator
> >>   KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
> >>
> >>  Documentation/admin-guide/kernel-parameters.txt   |   8 +-
> >>  Documentation/security/keys/trusted-encrypted.rst |  60 +++-
> >>  MAINTAINERS                                       |   9 +-
> >>  drivers/crypto/caam/Kconfig                       |   3 +-
> >>  drivers/crypto/caam/Makefile                      |   1 +-
> >>  drivers/crypto/caam/blob_gen.c                    | 230 +++++++++++++++-
> >>  include/keys/trusted-type.h                       |   2 +-
> >>  include/keys/trusted_caam.h                       |  11 +-
> >>  include/soc/fsl/caam-blob.h                       |  56 ++++-
> >>  security/keys/trusted-keys/Kconfig                |  11 +-
> >>  security/keys/trusted-keys/Makefile               |   2 +-
> >>  security/keys/trusted-keys/trusted_caam.c         |  74 +++++-
> >>  security/keys/trusted-keys/trusted_core.c         |  23 +-
> >>  13 files changed, 477 insertions(+), 13 deletions(-)
> >>  create mode 100644 drivers/crypto/caam/blob_gen.c
> >>  create mode 100644 include/keys/trusted_caam.h
> >>  create mode 100644 include/soc/fsl/caam-blob.h
> >>  create mode 100644 security/keys/trusted-keys/trusted_caam.c
> >>
> >> base-commit: 97408d81ed533b953326c580ff2c3f1948b3fcee
> >> --
> >> git-series 0.9.1
> >
> > Ahmad,
> >
> > Thanks for your work!
> >
> > I've been asked to integrate the capability of using CAAM to
> > blob/deblob data to an older 5.4 kernel such as NXP's downstream
> > vendor kernel does [1] and I'm trying to understand how your series
> > works. I'm not at all familiar with the Linux Key Management API's or
> > trusted keys. Can you provide an example of how this can be used for
> > such a thing?
>
> Here's an example with dm-crypt:
>
>   https://lore.kernel.org/linux-integrity/5d44e50e-4309-830b-79f6-f5d888b1ef69@pengutronix.de/
>
> dm-crypt is a bit special at the moment, because it has direct support for
> trusted keys. For interfacing with other parts of the kernel like ecryptfs
> or EVM, you have to create encrypted keys rooted to the trusted keys and use
> those. The kernel documentation has an example:
>
>   https://www.kernel.org/doc/html/v5.13/security/keys/trusted-encrypted.html
>
> If you backport this series, you can include the typo fix spotted by David.
>
> I'll send out a revised series, but given that a regression fix I want to
> rebase on hasn't been picked up for 3 weeks now, I am not in a hurry.
>

Ahmad,

Thanks for the reference.

I'm still trying to understand the keyctl integration with caam. For
the 'data' param to keyctl you are using tings like 'new <len>' and
'load <data>'. Where are these 'commands' identified?

I may still be missing something. I'm using 4.14-rc6 with your series
and seeing the following:
# cat /proc/cmdline
trusted.source=caam
# keyctl add trusted mykey 'new 32' @s)# create new trusted key named
'mykey' of 32 bytes in the session keyring
480104283
# keyctl print 480104283 # dump the key
keyctl_read_alloc: Unknown error 126
^^^ not clear what this is

Best regards,

Tim

^ permalink raw reply

* Re: [PATCH v28 22/25] Audit: Add record for multiple process LSM attributes
From: Casey Schaufler @ 2021-08-20 19:17 UTC (permalink / raw)
  To: Paul Moore
  Cc: casey.schaufler, James Morris, linux-security-module, selinux,
	linux-audit, keescook, john.johansen, penguin-kernel,
	Stephen Smalley, Casey Schaufler
In-Reply-To: <CAHC9VhSsJoEc=EDkUCrHr5Uid9DhsoininpvPVt+Ab6RsqieOQ@mail.gmail.com>

On 8/20/2021 12:06 PM, Paul Moore wrote:
> On Thu, Aug 19, 2021 at 6:41 PM Casey Schaufler <casey@schaufler-ca.com> wrote:
>> On 8/18/2021 5:56 PM, Casey Schaufler wrote:
>>> On 8/18/2021 5:47 PM, Paul Moore wrote:
>>>> ...
>>>> I just spent a few minutes tracing the code paths up from audit
>>>> through netlink and then through the socket layer and I'm not seeing
>>>> anything obvious where the path differs from any other syscall;
>>>> current->audit_context *should* be valid just like any other syscall.
>>>> However, I do have to ask, are you only seeing these audit records
>>>> with a current->audit_context equal to NULL during early boot?
>>> Nope. Sorry.
>> It looks as if all of the NULL audit_context cases are for either
>> auditd or systemd. Given what the events are, this isn't especially
>> surprising.
> I think we may be back to the "early boot" theory.
>
> Unless you explicitly enable audit on the kernel cmdline, e.g.
> "audit=1", processes started before userspace enables audit will not
> have a properly allocated audit_context; see the "if
> (likely(!audit_ever_enabled))" check at the top of audit_alloc() for
> the reason why.
>
> I could be wrong here, but I suspect if you add "audit=1" to your
> kernel command line those remaining cases of NULL audit_contexts will
> resolve themselves.  If not, we still have work to do ... well, I mean
> we still have (different) work to do even if this solves the mystery,
> it's just that we can now explain what you are seeing :)

Yup, adding "audit=1" to the command line appears to have gotten
systemd an audit context. It looks like user space enabling audit
doesn't assign an audit context to the existing systemd process.



^ permalink raw reply

* Re: [PATCH v28 22/25] Audit: Add record for multiple process LSM attributes
From: Paul Moore @ 2021-08-20 19:06 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: casey.schaufler, James Morris, linux-security-module, selinux,
	linux-audit, keescook, john.johansen, penguin-kernel,
	Stephen Smalley
In-Reply-To: <6f219a4d-8686-e35a-6801-eb66f98c8032@schaufler-ca.com>

On Thu, Aug 19, 2021 at 6:41 PM Casey Schaufler <casey@schaufler-ca.com> wrote:
> On 8/18/2021 5:56 PM, Casey Schaufler wrote:
> > On 8/18/2021 5:47 PM, Paul Moore wrote:
> >> ...
> >> I just spent a few minutes tracing the code paths up from audit
> >> through netlink and then through the socket layer and I'm not seeing
> >> anything obvious where the path differs from any other syscall;
> >> current->audit_context *should* be valid just like any other syscall.
> >> However, I do have to ask, are you only seeing these audit records
> >> with a current->audit_context equal to NULL during early boot?
> >
> > Nope. Sorry.
>
> It looks as if all of the NULL audit_context cases are for either
> auditd or systemd. Given what the events are, this isn't especially
> surprising.

I think we may be back to the "early boot" theory.

Unless you explicitly enable audit on the kernel cmdline, e.g.
"audit=1", processes started before userspace enables audit will not
have a properly allocated audit_context; see the "if
(likely(!audit_ever_enabled))" check at the top of audit_alloc() for
the reason why.

I could be wrong here, but I suspect if you add "audit=1" to your
kernel command line those remaining cases of NULL audit_contexts will
resolve themselves.  If not, we still have work to do ... well, I mean
we still have (different) work to do even if this solves the mystery,
it's just that we can now explain what you are seeing :)

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH 3/3] virt: Add sev_secret module to expose confidential computing secrets
From: Dov Murik @ 2021-08-20 18:36 UTC (permalink / raw)
  To: Andrew Scull, Ard Biesheuvel
  Cc: linux-efi, Borislav Petkov, Ashish Kalra, Brijesh Singh,
	Tom Lendacky, James Morris, Serge E. Hallyn, Andi Kleen,
	Dr. David Alan Gilbert, James Bottomley, Tobin Feldman-Fitzthum,
	Jim Cadden, linux-coco, linux-security-module,
	Linux Kernel Mailing List, Dov Murik
In-Reply-To: <CADcWuH0mP+e6GxkUGN3ni_Yu0z8YTn-mo677obH+p-OFCL+wOQ@mail.gmail.com>



On 19/08/2021 16:02, Andrew Scull wrote:
> On Mon, 16 Aug 2021 at 10:57, Ard Biesheuvel <ardb@kernel.org> wrote:
>>
>> On Fri, 13 Aug 2021 at 15:05, Andrew Scull <ascull@google.com> wrote:
>>>
>>> On Mon, Aug 09, 2021 at 07:01:57PM +0000, Dov Murik wrote:

[...]

>>>
>>>> +static int sev_secret_unlink(struct inode *dir, struct dentry *dentry)
>>>> +{
>>>> +     struct sev_secret *s = sev_secret_get();
>>>> +     struct inode *inode = d_inode(dentry);
>>>> +     struct secret_entry *e = (struct secret_entry *)inode->i_private;
>>>> +     int i;
>>>> +
>>>> +     if (e) {
>>>> +             /* Zero out the secret data */
>>>> +             memzero_explicit(e->data, secret_entry_data_len(e));
>>>
>>> Would there be a benefit in flushing these zeros?
>>>
>>
>> Do you mean cache clean+invalidate? Better to be precise here.
> 
> At least a clean, to have the zeros written back to memory from the
> cache, in order to overwrite the secret.
> 

I agree, but not sure how to implement this:

I see there's an arch_wb_cache_pmem exported function which internally
(in arch/x86/lib/usercopy_64.c) calls clean_cache_range which seems to
do what we want (assume the secret can be longer than the cache line).

But arch_wb_cache_pmem is declared in include/linux/libnvdimm.h and
guarded with #ifdef CONFIG_ARCH_HAS_PMEM_API -- both seem not related to
what I'm trying to do.

I see there's an exported clflush_cache_range for x86 -- but that's a
clean+flush if I understand correctly.

Suggestions on how to approach? I can copy the clean_cache_range
implementation into the sev_secret module but hopefully there's a better
way to reuse.  Maybe export clean_cache_range in x86?

Since this is for SEV the solution can be x86-specific, but if there's a
generic way I guess it's better (I think all of sev_secret module
doesn't have x86-specific stuff).

-Dov


>>
>>>> +             e->guid = NULL_GUID;
>>>> +     }
>>>> +
>>>> +     inode->i_private = NULL;
>>>> +
>>>> +     for (i = 0; i < SEV_SECRET_NUM_FILES; i++)
>>>> +             if (s->fs_files[i] == dentry)
>>>> +                     s->fs_files[i] = NULL;
>>>> +
>>>> +     /*
>>>> +      * securityfs_remove tries to lock the directory's inode, but we reach
>>>> +      * the unlink callback when it's already locked
>>>> +      */
>>>> +     inode_unlock(dir);
>>>> +     securityfs_remove(dentry);
>>>> +     inode_lock(dir);
>>>> +
>>>> +     return 0;
>>>> +}

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: liqiong @ 2021-08-20 17:53 UTC (permalink / raw)
  To: THOBY Simon, zohar@linux.ibm.com
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <1f631c3d-5dce-e477-bfb3-05aa38836442@viveris.fr>

Hi Simon,

On 2021/8/20 21:23, THOBY Simon wrote:
> Hi Liqiong,
>
> On 8/20/21 12:15 PM, 李力琼 wrote:
>> Hi, Simon:
>>
>> This solution is better then rwsem, a temp "ima_rules" variable should
>> can fix. I also have a another idea, with a little trick, default list
>> can traverse to the new list, so we don't need care about the read side.
>>
>> here is the patch:
>>
>> @@ -918,8 +918,21 @@ void ima_update_policy(void)
>>          list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
>>
>>          if (ima_rules != policy) {
>> +               struct list_head *prev_rules = ima_rules;
>> +               struct list_head *first = ima_rules->next;
>>                  ima_policy_flag = 0;
>> +
>> +               /*
>> +                * Make the previous list can traverse to new list,
>> +                * that is tricky, or there is a deadly loop whithin
>> +                * "list_for_each_entry_rcu(entry, ima_rules, list)"
>> +                *
>> +                * After update "ima_rules", restore the previous list.
>> +                */
> I think this could be rephrased to be a tad clearer, I am not quite sure
> how I must interpret the first sentence of the comment.
I got it,  how about this:
  /*
   * The previous list has to traverse to new list,
   * Or there may be a deadly loop within
   * "list_for_each_entry_rcu(entry, ima_rules, list)"
   *
   * That is tricky, after updated "ima_rules", restore the previous list.
   */
>
>
>> +               prev_rules->next = policy->next;
>>                  ima_rules = policy;
>> +               syncchronize_rcu();
> I'm a bit puzzled as you seem to imply in the mail this patch was tested,
> but there is no 'syncchronize_rcu' (with two 'c') symbol in the kernel.
> Was that a copy/paste error? Or maybe you forgot the 'not' in "This
> patch has been tested"? These errors happen, and I am myself quite an
> expert in doing them :)


Sorry for the mistake, I copy/paste the patch and delete/edit some lines,
have reviewed before sending, but not found. I have made a case to reproduce
the error, dumping "ima_rules" and every item address of list in the error
situaiton, I can watchthe "ima_rules" change, old list traversing to the 
new list.
And I have been doing a reboot test which found this bug. This patch 
seems to work fine.


>
>> +               prev_rules->next = first;
>>
>>
>> The side effect is the "ima_default_rules" will be changed a little while.
>> But it make sense, the process should be checked again by the new policy.
>>
>> This patch has been tested, if will do, I can resubmit this patch.>
>> How about this ?
>
> Correct me if I'm wrong, here is how I think I understand you patch.
> We start with a situation like that (step 0):
> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>
> Then we decide to update the policy for the first time, so
> 'ima_rules [&ima_default_rules] != policy [&ima_policy_rules]'.
> We enter the condition.
> First we copy the current value of ima_rules (&ima_default_rules)
> to a temporary variable 'prev_rules'. We also create a pointer dubbed
> 'first' to the entry 1 in the default list (step 1):
> prev_rules -------------
>                         \/
> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>                                                                     /\
> first --------------------------------------------------------------
>
>
> Then we update prev_rules->next to point to policy->next (step 2):
> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>   /\
> first
> 	(notice that list entry 0 no longer points backwards to 'list entry 1',
> 	but I don't think there is any reverse iteration in IMA, so it should be
> 	safe)
>
> prev_rules -------------
>                         \/
> ima_rules --> List entry 0 (head node) = ima_default_rules
>                         |
>                         |
>                         -------------------------------------------
>                                                                   \/
> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>
>
> We then update ima_rules to point to ima_policy_rules (step 3):
> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>   /\
> first
>
> prev_rules -------------
>                         \/
> ima_rules     List entry 0 (head node) = ima_default_rules
>       |                 |
>       |                 |
>       |                 ------------------------------------------
>       ---------------                                            |
>                     \/                                           \/
> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>                                                                     /\
> first --------------------------------------------------------------
>
> Then we run synchronize_rcu() to wait for any RCU reader to exit their loops (step 4).
>
> Finally we update prev_rules->next to point back to the ima policy and fix the loop (step 5):
>
> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>   /\
> first
>
> prev_rules ---> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>                                                                       /\
>                                                                   first (now useless)
> ima_rules
>       |
>       |
>       |
>       ---------------
>                     \/
> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>
> The goal is that readers should still be able to loop
> (forward, as we saw that backward looping is temporarily broken)
> while in steps 0-4.


Yes, It's the workflow.


> I'm not completely sure what would happen to a client that started iterating
> over ima_rules right after step 2.
>
> Wouldn't they be able to start looping through the new policy
> as 'List entry 0 (head node) = ima_default_rules' points to ima_policy_rules?
> And if they, wouldn't they loop until the write to 'ima_rule' at step 3 (admittedly
> very shortly thereafter) completed?
> And would the compiler be allowed to optimize the read to 'ima_rules' in the
> list_for_each_entry() loop, thereby never reloading the new value for
> 'ima_rules', and thus looping forever, just what we are trying to avoid?


Yes,  "ima_rules" cache not update in time, It's a risk. I am not sure 
if "WRITE_ONCE"
can do this trick. How about:
     WRITE_ONCE(prev_rules->next, policy->next);
     WRITE_ONCE(ima_rules, policy);


If can't fix the cache issue, maybe the "ima_rules_tmp" solution is the 
best way.
I will test it.


> Overall, I'm tempted to say this is perhaps a bit too complex (at least,
> my head tells me it is, but that may very well be because I'm terrible
> at concurrency issues).
>
> Honestly, in this case I think awaiting input from more experienced
> kernel devs than I is the best path forward :-)
>
>> ----------
>> Regards,
>> liqiong
>>
> Thanks,
> Simon

^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Ahmad Fatoum @ 2021-08-20 16:25 UTC (permalink / raw)
  To: David Gstir
  Cc: Jarkko Sakkinen, Horia Geantă, Mimi Zohar, Aymen Sghaier,
	Herbert Xu, David S. Miller, James Bottomley, Jan Luebbe,
	Udit Agarwal, Sumit Garg, Eric Biggers, Franck LENORMAND,
	Richard Weinberger, James Morris, linux-kernel, David Howells,
	linux-security-module, keyrings, linux-crypto, kernel,
	linux-integrity, Steffen Trumtrar, Serge E. Hallyn
In-Reply-To: <74737543-4A73-49F8-92F7-F7FFE64A00DB@sigma-star.at>

Hello David,

On 10.08.21 13:28, David Gstir wrote:
> Hi Ahmad,
> 
>> On 09.08.2021, at 12:16, Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
> 
> [...]
> 
>> If it interests you, I described[2] my CAAM+ubifs+fscrypt use case in the
>> discussion thread on my fscrypt-trusted-keys v1. Jan, a colleague of mine, held a
>> talk[3] on the different solutions for authenticated and encrypted storage, which
>> you may want to check out.
>>
>> I'd really appreciate feedback here on the the CAAM parts of this series, so this can
>> eventually go mainline.
> 
> Since you mention the fscrypt trusted-keys use case:
> 
> I noticed that the key length for trusted-keys is limited to
> 256 - 1024bit keys. fscrypt does however also support keys
> with e.g. 128bit keys (AES-128-CBC-ESSIV, AES-128-CTS-CBC).
> AFAIK, CAAM and TEE key blobs would also support key lengths outside the 256 - 1024bit range.
> 
> Wouldn’t it make sense to align the supported key lengths?
> I.e. extend the range of supported key lengths for trusted keys.
> Or is there a specific reason why key lengths below 256bit are
> not supported by trusted-keys?

No idea. I would suggest staying clear about arguing in its favor though
until CAAM and DCP are merged. My parallel fscrypt endeavors seem to have
only diverted maintainer attention. ;-)

Cheers,
Ahmad

> 
> Cheers,
> David
> 
> 
> 


-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Ahmad Fatoum @ 2021-08-20 16:19 UTC (permalink / raw)
  To: Tim Harvey
  Cc: David Gstir, Aymen Sghaier, Mimi Zohar, Jan Luebbe, keyrings,
	Steffen Trumtrar, linux-security-module, Udit Agarwal, Herbert Xu,
	Horia Geantă, Richard Weinberger, James Morris, Eric Biggers,
	Serge E. Hallyn, Sumit Garg, James Bottomley, Franck LENORMAND,
	David Howells, open list, Jarkko Sakkinen, linux-crypto,
	Sascha Hauer, linux-integrity, David S. Miller
In-Reply-To: <CAJ+vNU23cXPmiqKcKH_WAgD-ea+=pEJzGK+q7zOy=v2o0XU7kA@mail.gmail.com>

Hello Tim,

On 20.08.21 17:39, Tim Harvey wrote:
> On Wed, Jul 21, 2021 at 9:49 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>>
>> Series applies on top of
>> https://lore.kernel.org/linux-integrity/20210721160258.7024-1-a.fatoum@pengutronix.de/T/#u
>>
>> v2 -> v3:
>>  - Split off first Kconfig preparation patch. It fixes a regression,
>>    so sent that out, so it can be applied separately (Sumit)
>>  - Split off second key import patch. I'll send that out separately
>>    as it's a development aid and not required within the CAAM series
>>  - add MAINTAINERS entry
>>
>> v1 -> v2:
>>  - Added new commit to make trusted key Kconfig option independent
>>    of TPM and added new Kconfig file for trusted keys
>>  - Add new commit for importing existing key material
>>  - Allow users to force use of kernel RNG (Jarkko)
>>  - Enforce maximum keymod size (Horia)
>>  - Use append_seq_(in|out)_ptr_intlen instead of append_seq_(in|out)_ptr
>>    (Horia)
>>  - Make blobifier handle private to CAAM glue code file (Horia)
>>  - Extend trusted keys documentation for CAAM
>>  - Rebased and updated original cover letter:
>>
>> The Cryptographic Acceleration and Assurance Module (CAAM) is an IP core
>> built into many newer i.MX and QorIQ SoCs by NXP.
>>
>> Its blob mechanism can AES encrypt/decrypt user data using a unique
>> never-disclosed device-specific key.
>>
>> There has been multiple discussions on how to represent this within the kernel:
>>
>> The Cryptographic Acceleration and Assurance Module (CAAM) is an IP core
>> built into many newer i.MX and QorIQ SoCs by NXP.
>>
>> Its blob mechanism can AES encrypt/decrypt user data using a unique
>> never-disclosed device-specific key. There has been multiple
>> discussions on how to represent this within the kernel:
>>
>>  - [RFC] crypto: caam - add red blobifier
>>    Steffen implemented[1] a PoC sysfs driver to start a discussion on how to
>>    best integrate the blob mechanism.
>>    Mimi suggested that it could be used to implement trusted keys.
>>    Trusted keys back then were a TPM-only feature.
>>
>>  - security/keys/secure_key: Adds the secure key support based on CAAM.
>>    Udit added[2] a new "secure" key type with the CAAM as backend. The key
>>    material stays within the kernel only.
>>    Mimi and James agreed that this needs a generic interface, not specific
>>    to CAAM. Mimi suggested trusted keys. Jan noted that this could serve as
>>    basis for TEE-backed keys.
>>
>>  - [RFC] drivers: crypto: caam: key: Add caam_tk key type
>>    Franck added[3] a new "caam_tk" key type based on Udit's work. This time
>>    it uses CAAM "black blobs" instead of "red blobs", so key material stays
>>    within the CAAM and isn't exposed to kernel in plaintext.
>>    James voiced the opinion that there should be just one user-facing generic
>>    wrap/unwrap key type with multiple possible handlers.
>>    David suggested trusted keys.
>>
>>  - Introduce TEE based Trusted Keys support
>>    Sumit reworked[4] trusted keys to support multiple possible backends with
>>    one chosen at boot time and added a new TEE backend along with TPM.
>>    This now sits in Jarkko's master branch to be sent out for v5.13
>>
>> This patch series builds on top of Sumit's rework to have the CAAM as yet another
>> trusted key backend.
>>
>> The CAAM bits are based on Steffen's initial patch from 2015. His work had been
>> used in the field for some years now, so I preferred not to deviate too much from it.
>>
>> This series has been tested with dmcrypt[5] on an i.MX6DL.
>>
>> Looking forward to your feedback.
>>
>> Cheers,
>> Ahmad
>>
>>  [1]: https://lore.kernel.org/linux-crypto/1447082306-19946-2-git-send-email-s.trumtrar@pengutronix.de/
>>  [2]: https://lore.kernel.org/linux-integrity/20180723111432.26830-1-udit.agarwal@nxp.com/
>>  [3]: https://lore.kernel.org/lkml/1551456599-10603-2-git-send-email-franck.lenormand@nxp.com/
>>  [4]: https://lore.kernel.org/lkml/1604419306-26105-1-git-send-email-sumit.garg@linaro.org/
>>  [5]: https://lore.kernel.org/linux-integrity/20210122084321.24012-2-a.fatoum@pengutronix.de/
>>
>> ---
>> To: Jarkko Sakkinen <jarkko@kernel.org>
>> To: "Horia Geantă" <horia.geanta@nxp.com>
>> To: Mimi Zohar <zohar@linux.ibm.com>
>> To: Aymen Sghaier <aymen.sghaier@nxp.com>
>> To: Herbert Xu <herbert@gondor.apana.org.au>
>> To: "David S. Miller" <davem@davemloft.net>
>> To: James Bottomley <jejb@linux.ibm.com>
>> Cc: David Howells <dhowells@redhat.com>
>> Cc: James Morris <jmorris@namei.org>
>> Cc: "Serge E. Hallyn" <serge@hallyn.com>
>> Cc: Steffen Trumtrar <s.trumtrar@pengutronix.de>
>> Cc: Udit Agarwal <udit.agarwal@nxp.com>
>> Cc: Jan Luebbe <j.luebbe@pengutronix.de>
>> Cc: David Gstir <david@sigma-star.at>
>> Cc: Eric Biggers <ebiggers@kernel.org>
>> Cc: Richard Weinberger <richard@nod.at>
>> Cc: Franck LENORMAND <franck.lenormand@nxp.com>
>> Cc: Sumit Garg <sumit.garg@linaro.org>
>> Cc: linux-integrity@vger.kernel.org
>> Cc: keyrings@vger.kernel.org
>> Cc: linux-crypto@vger.kernel.org
>> Cc: linux-kernel@vger.kernel.org
>> Cc: linux-security-module@vger.kernel.org
>>
>> Ahmad Fatoum (4):
>>   KEYS: trusted: allow users to use kernel RNG for key material
>>   KEYS: trusted: allow trust sources to use kernel RNG for key material
>>   crypto: caam - add in-kernel interface for blob generator
>>   KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
>>
>>  Documentation/admin-guide/kernel-parameters.txt   |   8 +-
>>  Documentation/security/keys/trusted-encrypted.rst |  60 +++-
>>  MAINTAINERS                                       |   9 +-
>>  drivers/crypto/caam/Kconfig                       |   3 +-
>>  drivers/crypto/caam/Makefile                      |   1 +-
>>  drivers/crypto/caam/blob_gen.c                    | 230 +++++++++++++++-
>>  include/keys/trusted-type.h                       |   2 +-
>>  include/keys/trusted_caam.h                       |  11 +-
>>  include/soc/fsl/caam-blob.h                       |  56 ++++-
>>  security/keys/trusted-keys/Kconfig                |  11 +-
>>  security/keys/trusted-keys/Makefile               |   2 +-
>>  security/keys/trusted-keys/trusted_caam.c         |  74 +++++-
>>  security/keys/trusted-keys/trusted_core.c         |  23 +-
>>  13 files changed, 477 insertions(+), 13 deletions(-)
>>  create mode 100644 drivers/crypto/caam/blob_gen.c
>>  create mode 100644 include/keys/trusted_caam.h
>>  create mode 100644 include/soc/fsl/caam-blob.h
>>  create mode 100644 security/keys/trusted-keys/trusted_caam.c
>>
>> base-commit: 97408d81ed533b953326c580ff2c3f1948b3fcee
>> --
>> git-series 0.9.1
> 
> Ahmad,
> 
> Thanks for your work!
> 
> I've been asked to integrate the capability of using CAAM to
> blob/deblob data to an older 5.4 kernel such as NXP's downstream
> vendor kernel does [1] and I'm trying to understand how your series
> works. I'm not at all familiar with the Linux Key Management API's or
> trusted keys. Can you provide an example of how this can be used for
> such a thing?

Here's an example with dm-crypt:

  https://lore.kernel.org/linux-integrity/5d44e50e-4309-830b-79f6-f5d888b1ef69@pengutronix.de/

dm-crypt is a bit special at the moment, because it has direct support for
trusted keys. For interfacing with other parts of the kernel like ecryptfs
or EVM, you have to create encrypted keys rooted to the trusted keys and use
those. The kernel documentation has an example:

  https://www.kernel.org/doc/html/v5.13/security/keys/trusted-encrypted.html

If you backport this series, you can include the typo fix spotted by David.

I'll send out a revised series, but given that a regression fix I want to
rebase on hasn't been picked up for 3 weeks now, I am not in a hurry.

Cheers,
Ahmad

> 
> Best regards,
> 
> Tim
> [1] https://source.codeaurora.org/external/imxsupport/imx_sec_apps/tree/demo-caam-blobs/README.txt
> 
> 


-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: Mimi Zohar @ 2021-08-20 15:48 UTC (permalink / raw)
  To: THOBY Simon, 李力琼
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <1f631c3d-5dce-e477-bfb3-05aa38836442@viveris.fr>

On Fri, 2021-08-20 at 13:23 +0000, THOBY Simon wrote:
> Hi Liqiong,
> 
> On 8/20/21 12:15 PM, 李力琼 wrote:
> > Hi, Simon:
> > 
> > This solution is better then rwsem, a temp "ima_rules" variable should 
> > can fix. I also have a another idea, with a little trick, default list
> > can traverse to the new list, so we don't need care about the read side. 
> > 
> > here is the patch:
> > 
> > @@ -918,8 +918,21 @@ void ima_update_policy(void)
> >         list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
> > 
> >         if (ima_rules != policy) {
> > +               struct list_head *prev_rules = ima_rules;
> > +               struct list_head *first = ima_rules->next;
> >                 ima_policy_flag = 0;
> > +
> > +               /*
> > +                * Make the previous list can traverse to new list,
> > +                * that is tricky, or there is a deadly loop whithin
> > +                * "list_for_each_entry_rcu(entry, ima_rules, list)"
> > +                *
> > +                * After update "ima_rules", restore the previous list.
> > +                */
> 
> I think this could be rephrased to be a tad clearer, I am not quite sure
> how I must interpret the first sentence of the comment.
> 
> 
> > +               prev_rules->next = policy->next;
> >                 ima_rules = policy;
> > +               syncchronize_rcu();
> 
> I'm a bit puzzled as you seem to imply in the mail this patch was tested,
> but there is no 'syncchronize_rcu' (with two 'c') symbol in the kernel.
> Was that a copy/paste error? Or maybe you forgot the 'not' in "This
> patch has been tested"? These errors happen, and I am myself quite an
> expert in doing them :)
> 
> > +               prev_rules->next = first;
> > 
> > 
> > The side effect is the "ima_default_rules" will be changed a little while.
> > But it make sense, the process should be checked again by the new policy.
> > 
> > This patch has been tested, if will do, I can resubmit this patch.> 
> > How about this ?
> 
> 
> Correct me if I'm wrong, here is how I think I understand you patch.
> We start with a situation like that (step 0):
> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
> 
> Then we decide to update the policy for the first time, so
> 'ima_rules [&ima_default_rules] != policy [&ima_policy_rules]'.
> We enter the condition.
> First we copy the current value of ima_rules (&ima_default_rules)
> to a temporary variable 'prev_rules'. We also create a pointer dubbed
> 'first' to the entry 1 in the default list (step 1):
> prev_rules -------------
>                        \/
> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>                                                                    /\
> first --------------------------------------------------------------
> 
> 
> Then we update prev_rules->next to point to policy->next (step 2):
> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>  /\
> first
> 	(notice that list entry 0 no longer points backwards to 'list entry 1',
> 	but I don't think there is any reverse iteration in IMA, so it should be
> 	safe)
> 
> prev_rules -------------
>                        \/
> ima_rules --> List entry 0 (head node) = ima_default_rules   
>                        |
>                        |
>                        -------------------------------------------
>                                                                  \/
> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
> 
> 
> We then update ima_rules to point to ima_policy_rules (step 3):
> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>  /\
> first
> 
> prev_rules -------------
>                        \/
> ima_rules     List entry 0 (head node) = ima_default_rules   
>      |                 |
>      |                 |
>      |                 ------------------------------------------
>      ---------------                                            |
>                    \/                                           \/
> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>                                                                    /\
> first --------------------------------------------------------------
> 
> Then we run synchronize_rcu() to wait for any RCU reader to exit their loops (step 4).
> 
> Finally we update prev_rules->next to point back to the ima policy and fix the loop (step 5):
> 
> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>  /\
> first
> 
> prev_rules ---> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>                                                                      /\
>                                                                  first (now useless)
> ima_rules        
>      |
>      |
>      |
>      ---------------
>                    \/
> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
> 
> The goal is that readers should still be able to loop
> (forward, as we saw that backward looping is temporarily broken)
> while in steps 0-4.
> 
> I'm not completely sure what would happen to a client that started iterating
> over ima_rules right after step 2.
> 
> Wouldn't they be able to start looping through the new policy
> as 'List entry 0 (head node) = ima_default_rules' points to ima_policy_rules?
> And if they, wouldn't they loop until the write to 'ima_rule' at step 3 (admittedly
> very shortly thereafter) completed?
> And would the compiler be allowed to optimize the read to 'ima_rules' in the
> list_for_each_entry() loop, thereby never reloading the new value for
> 'ima_rules', and thus looping forever, just what we are trying to avoid?
> 
> Overall, I'm tempted to say this is perhaps a bit too complex (at least,
> my head tells me it is, but that may very well be because I'm terrible
> at concurrency issues).
> 
> Honestly, in this case I think awaiting input from more experienced
> kernel devs than I is the best path forward :-)

I'm far from an expert on RCU locking, but __list_splice_init_rcu()
provides an example of how to make sure there aren't any readers
traversing the list, before two lists are spliced together.   In our
case, after there aren't any readers, instead of splicing two lists
together, it should be safe to point to the new list.

thanks,

Mimi


^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Tim Harvey @ 2021-08-20 15:39 UTC (permalink / raw)
  To: Ahmad Fatoum
  Cc: Jarkko Sakkinen, Horia Geantă, Mimi Zohar, Aymen Sghaier,
	Herbert Xu, David S. Miller, James Bottomley, Sascha Hauer,
	David Howells, James Morris, Serge E. Hallyn, Steffen Trumtrar,
	Udit Agarwal, Jan Luebbe, David Gstir, Eric Biggers,
	Richard Weinberger, Franck LENORMAND, Sumit Garg, linux-integrity,
	keyrings, linux-crypto, open list, linux-security-module
In-Reply-To: <cover.9fc9298fd9d63553491871d043a18affc2dbc8a8.1626885907.git-series.a.fatoum@pengutronix.de>

On Wed, Jul 21, 2021 at 9:49 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>
> Series applies on top of
> https://lore.kernel.org/linux-integrity/20210721160258.7024-1-a.fatoum@pengutronix.de/T/#u
>
> v2 -> v3:
>  - Split off first Kconfig preparation patch. It fixes a regression,
>    so sent that out, so it can be applied separately (Sumit)
>  - Split off second key import patch. I'll send that out separately
>    as it's a development aid and not required within the CAAM series
>  - add MAINTAINERS entry
>
> v1 -> v2:
>  - Added new commit to make trusted key Kconfig option independent
>    of TPM and added new Kconfig file for trusted keys
>  - Add new commit for importing existing key material
>  - Allow users to force use of kernel RNG (Jarkko)
>  - Enforce maximum keymod size (Horia)
>  - Use append_seq_(in|out)_ptr_intlen instead of append_seq_(in|out)_ptr
>    (Horia)
>  - Make blobifier handle private to CAAM glue code file (Horia)
>  - Extend trusted keys documentation for CAAM
>  - Rebased and updated original cover letter:
>
> The Cryptographic Acceleration and Assurance Module (CAAM) is an IP core
> built into many newer i.MX and QorIQ SoCs by NXP.
>
> Its blob mechanism can AES encrypt/decrypt user data using a unique
> never-disclosed device-specific key.
>
> There has been multiple discussions on how to represent this within the kernel:
>
> The Cryptographic Acceleration and Assurance Module (CAAM) is an IP core
> built into many newer i.MX and QorIQ SoCs by NXP.
>
> Its blob mechanism can AES encrypt/decrypt user data using a unique
> never-disclosed device-specific key. There has been multiple
> discussions on how to represent this within the kernel:
>
>  - [RFC] crypto: caam - add red blobifier
>    Steffen implemented[1] a PoC sysfs driver to start a discussion on how to
>    best integrate the blob mechanism.
>    Mimi suggested that it could be used to implement trusted keys.
>    Trusted keys back then were a TPM-only feature.
>
>  - security/keys/secure_key: Adds the secure key support based on CAAM.
>    Udit added[2] a new "secure" key type with the CAAM as backend. The key
>    material stays within the kernel only.
>    Mimi and James agreed that this needs a generic interface, not specific
>    to CAAM. Mimi suggested trusted keys. Jan noted that this could serve as
>    basis for TEE-backed keys.
>
>  - [RFC] drivers: crypto: caam: key: Add caam_tk key type
>    Franck added[3] a new "caam_tk" key type based on Udit's work. This time
>    it uses CAAM "black blobs" instead of "red blobs", so key material stays
>    within the CAAM and isn't exposed to kernel in plaintext.
>    James voiced the opinion that there should be just one user-facing generic
>    wrap/unwrap key type with multiple possible handlers.
>    David suggested trusted keys.
>
>  - Introduce TEE based Trusted Keys support
>    Sumit reworked[4] trusted keys to support multiple possible backends with
>    one chosen at boot time and added a new TEE backend along with TPM.
>    This now sits in Jarkko's master branch to be sent out for v5.13
>
> This patch series builds on top of Sumit's rework to have the CAAM as yet another
> trusted key backend.
>
> The CAAM bits are based on Steffen's initial patch from 2015. His work had been
> used in the field for some years now, so I preferred not to deviate too much from it.
>
> This series has been tested with dmcrypt[5] on an i.MX6DL.
>
> Looking forward to your feedback.
>
> Cheers,
> Ahmad
>
>  [1]: https://lore.kernel.org/linux-crypto/1447082306-19946-2-git-send-email-s.trumtrar@pengutronix.de/
>  [2]: https://lore.kernel.org/linux-integrity/20180723111432.26830-1-udit.agarwal@nxp.com/
>  [3]: https://lore.kernel.org/lkml/1551456599-10603-2-git-send-email-franck.lenormand@nxp.com/
>  [4]: https://lore.kernel.org/lkml/1604419306-26105-1-git-send-email-sumit.garg@linaro.org/
>  [5]: https://lore.kernel.org/linux-integrity/20210122084321.24012-2-a.fatoum@pengutronix.de/
>
> ---
> To: Jarkko Sakkinen <jarkko@kernel.org>
> To: "Horia Geantă" <horia.geanta@nxp.com>
> To: Mimi Zohar <zohar@linux.ibm.com>
> To: Aymen Sghaier <aymen.sghaier@nxp.com>
> To: Herbert Xu <herbert@gondor.apana.org.au>
> To: "David S. Miller" <davem@davemloft.net>
> To: James Bottomley <jejb@linux.ibm.com>
> Cc: David Howells <dhowells@redhat.com>
> Cc: James Morris <jmorris@namei.org>
> Cc: "Serge E. Hallyn" <serge@hallyn.com>
> Cc: Steffen Trumtrar <s.trumtrar@pengutronix.de>
> Cc: Udit Agarwal <udit.agarwal@nxp.com>
> Cc: Jan Luebbe <j.luebbe@pengutronix.de>
> Cc: David Gstir <david@sigma-star.at>
> Cc: Eric Biggers <ebiggers@kernel.org>
> Cc: Richard Weinberger <richard@nod.at>
> Cc: Franck LENORMAND <franck.lenormand@nxp.com>
> Cc: Sumit Garg <sumit.garg@linaro.org>
> Cc: linux-integrity@vger.kernel.org
> Cc: keyrings@vger.kernel.org
> Cc: linux-crypto@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-security-module@vger.kernel.org
>
> Ahmad Fatoum (4):
>   KEYS: trusted: allow users to use kernel RNG for key material
>   KEYS: trusted: allow trust sources to use kernel RNG for key material
>   crypto: caam - add in-kernel interface for blob generator
>   KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
>
>  Documentation/admin-guide/kernel-parameters.txt   |   8 +-
>  Documentation/security/keys/trusted-encrypted.rst |  60 +++-
>  MAINTAINERS                                       |   9 +-
>  drivers/crypto/caam/Kconfig                       |   3 +-
>  drivers/crypto/caam/Makefile                      |   1 +-
>  drivers/crypto/caam/blob_gen.c                    | 230 +++++++++++++++-
>  include/keys/trusted-type.h                       |   2 +-
>  include/keys/trusted_caam.h                       |  11 +-
>  include/soc/fsl/caam-blob.h                       |  56 ++++-
>  security/keys/trusted-keys/Kconfig                |  11 +-
>  security/keys/trusted-keys/Makefile               |   2 +-
>  security/keys/trusted-keys/trusted_caam.c         |  74 +++++-
>  security/keys/trusted-keys/trusted_core.c         |  23 +-
>  13 files changed, 477 insertions(+), 13 deletions(-)
>  create mode 100644 drivers/crypto/caam/blob_gen.c
>  create mode 100644 include/keys/trusted_caam.h
>  create mode 100644 include/soc/fsl/caam-blob.h
>  create mode 100644 security/keys/trusted-keys/trusted_caam.c
>
> base-commit: 97408d81ed533b953326c580ff2c3f1948b3fcee
> --
> git-series 0.9.1

Ahmad,

Thanks for your work!

I've been asked to integrate the capability of using CAAM to
blob/deblob data to an older 5.4 kernel such as NXP's downstream
vendor kernel does [1] and I'm trying to understand how your series
works. I'm not at all familiar with the Linux Key Management API's or
trusted keys. Can you provide an example of how this can be used for
such a thing?

Best regards,

Tim
[1] https://source.codeaurora.org/external/imxsupport/imx_sec_apps/tree/demo-caam-blobs/README.txt

^ permalink raw reply

* [PATCH v4 1/1] NAX LSM: Add initial support
From: Igor Zhbanov @ 2021-08-20 15:14 UTC (permalink / raw)
  To: linux-integrity, linux-security-module, Mimi Zohar, THOBY Simon,
	linux-kernel
In-Reply-To: <57e58b7f-e601-e9c7-5adf-1d189ba4982d@gmail.com>

Add initial support for NAX (No Anonymous Execution), which is a Linux
Security Module that extends DAC by making impossible to make anonymous
and modified pages executable for privileged processes.

Intercepts anonymous executable pages created with mmap() and mprotect()
system calls.

Log violations (in non-quiet mode) and block the action or kill the
offending process, depending on the enabled settings.

See Documentation/admin-guide/LSM/NAX.rst.

Signed-off-by: Igor Zhbanov <izh1979@gmail.com>
---
 Documentation/admin-guide/LSM/NAX.rst         |  72 +++
 Documentation/admin-guide/LSM/index.rst       |   1 +
 .../admin-guide/kernel-parameters.rst         |   1 +
 .../admin-guide/kernel-parameters.txt         |  32 ++
 security/Kconfig                              |  11 +-
 security/Makefile                             |   2 +
 security/nax/Kconfig                          | 113 +++++
 security/nax/Makefile                         |   4 +
 security/nax/nax-lsm.c                        | 472 ++++++++++++++++++
 9 files changed, 703 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/admin-guide/LSM/NAX.rst
 create mode 100644 security/nax/Kconfig
 create mode 100644 security/nax/Makefile
 create mode 100644 security/nax/nax-lsm.c

diff --git a/Documentation/admin-guide/LSM/NAX.rst b/Documentation/admin-guide/LSM/NAX.rst
new file mode 100644
index 000000000000..da54b3be4cda
--- /dev/null
+++ b/Documentation/admin-guide/LSM/NAX.rst
@@ -0,0 +1,72 @@
+=======
+NAX LSM
+=======
+
+:Author: Igor Zhbanov
+
+NAX (No Anonymous Execution) is a Linux Security Module that extends DAC
+by making impossible to make anonymous and modified pages executable for
+processes. The module intercepts anonymous executable pages created with
+mmap() and mprotect() system calls.
+
+To select it at boot time, add ``nax`` to ``security`` kernel command-line
+parameter.
+
+The following sysctl parameters are available:
+
+* ``kernel.nax.check_all``:
+ - 0: Check all processes.
+ - 1: Check only privileged processes. The privileged process is a process
+      for which any of the following is true:
+      - ``uid  == 0``
+      - ``euid == 0``
+      - ``suid == 0``
+      - ``cap_effective`` has any capability except for the ones allowed
+        in ``kernel.nax.allowed_caps``
+      - ``cap_permitted`` has any capability except for the ones allowed
+        in ``kernel.nax.allowed_caps``
+
+ Checking of uid/euid/suid is important because a process may call seteuid(0)
+ to gain privileges (if SECURE_NO_SETUID_FIXUP secure bit is not set).
+
+* ``kernel.nax.allowed_caps``:
+
+ Hexadecimal number representing the set of capabilities a non-root
+ process can possess without being considered "privileged" by NAX LSM.
+
+ For the meaning of the capabilities bits and their value, please check
+ ``include/uapi/linux/capability.h`` and ``capabilities(7)`` manual page.
+
+ For example, ``CAP_SYS_PTRACE`` has a number 19. Therefore, to add it to
+ allowed capabilities list, we need to set 19'th bit (2^19 or 1 << 19)
+ or 80000 in hexadecimal form. Capabilities can be bitwise ORed.
+
+* ``kernel.nax.mode``:
+
+ - 0: Only log errors (when enabled by ``kernel.nax.quiet``) (default mode)
+ - 1: Forbid unsafe pages mappings (and log when enabled)
+ - 2: Kill the violating process (and log when enabled)
+
+* ``kernel.nax.quiet``:
+
+ - 0: Log violations (default)
+ - 1: Be quiet
+
+* ``kernel.nax.locked``:
+
+ - 0: Changing of the module's sysctl parameters is allowed
+ - 1: Further changing of the module's sysctl parameters is forbidden
+
+ Setting this parameter to ``1`` after initial setup during the system boot
+ will prevent the module disabling at the later time.
+
+There are matching kernel command-line parameters (with the same values):
+
+- ``nax_allowed_caps``
+- ``nax_check_all``
+- ``nax_mode``
+- ``nax_quiet``
+- ``nax_locked``
+
+The ``nax_locked`` command-line parameter must be specified last to avoid
+premature setting locking.
diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst
index a6ba95fbaa9f..e9df7fc9a461 100644
--- a/Documentation/admin-guide/LSM/index.rst
+++ b/Documentation/admin-guide/LSM/index.rst
@@ -42,6 +42,7 @@ subdirectories.
 
    apparmor
    LoadPin
+   NAX
    SELinux
    Smack
    tomoyo
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 01ba293a2d70..f4e91dc729f0 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -136,6 +136,7 @@ parameter is applicable::
 	MOUSE	Appropriate mouse support is enabled.
 	MSI	Message Signaled Interrupts (PCI).
 	MTD	MTD (Memory Technology Device) support is enabled.
+	NAX	NAX support is enabled.
 	NET	Appropriate network support is enabled.
 	NUMA	NUMA support is enabled.
 	NFS	Appropriate NFS support is enabled.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdb22006f713..10ed55e28d49 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3100,6 +3100,38 @@
 
 	n2=		[NET] SDL Inc. RISCom/N2 synchronous serial card
 
+	nax_allowed_caps= [NAX] Hexadecimal number representing the set of
+			capabilities a non-root process can possess without
+			being considered "privileged" by NAX LSM.
+
+			For the meaning of the capabilities bits and their
+			value, please check include/uapi/linux/capability.h
+			and capabilities(7) manual page.
+
+			For example, `CAP_SYS_PTRACE` has a number 19.
+			Therefore, to add it to allowed capabilities list,
+			we need to set 19'th bit (2^19 or 1 << 19) or 80000
+			in hexadecimal form. Capabilities can be bitwise ORed.
+
+	nax_check_all=	[NAX] NAX LSM processes checking mode:
+			0 - Check only privileged processes (default).
+			1 - Check all processes.
+
+	nax_locked=	[NAX] NAX LSM settings' locking mode:
+			0 - Changing NAX sysctl parameters is allowed.
+			1 - Changing NAX sysctl parameters is forbidden until
+			    reboot.
+
+	nax_mode=	[NAX] NAX LSM violation reaction mode:
+			0 - Only log errors (when not in quiet mode; default).
+			1 - Forbid unsafe pages mappings (and log when
+			    enabled).
+			2 - Kill the violating process (and log when enabled).
+
+	nax_quiet=	[NAX] NAX LSM log verbosity:
+			0 - Log messages to syslog.
+			1 - Be quiet.
+
 	netdev=		[NET] Network devices parameters
 			Format: <irq>,<io>,<mem_start>,<mem_end>,<name>
 			Note that mem_start is often overloaded to mean
diff --git a/security/Kconfig b/security/Kconfig
index 0ced7fd33e4d..771419647ae1 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -239,6 +239,7 @@ source "security/yama/Kconfig"
 source "security/safesetid/Kconfig"
 source "security/lockdown/Kconfig"
 source "security/landlock/Kconfig"
+source "security/nax/Kconfig"
 
 source "security/integrity/Kconfig"
 
@@ -278,11 +279,11 @@ endchoice
 
 config LSM
 	string "Ordered list of enabled LSMs"
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor,bpf" if DEFAULT_SECURITY_SMACK
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo,bpf" if DEFAULT_SECURITY_APPARMOR
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,tomoyo,bpf" if DEFAULT_SECURITY_TOMOYO
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,bpf" if DEFAULT_SECURITY_DAC
-	default "landlock,lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf"
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor,bpf" if DEFAULT_SECURITY_SMACK
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo,bpf" if DEFAULT_SECURITY_APPARMOR
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,tomoyo,bpf" if DEFAULT_SECURITY_TOMOYO
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,bpf" if DEFAULT_SECURITY_DAC
+	default "nax,landlock,lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf"
 	help
 	  A comma-separated list of LSMs, in initialization order.
 	  Any LSMs left off this list will be ignored. This can be
diff --git a/security/Makefile b/security/Makefile
index 47e432900e24..5c261bbf4659 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -14,6 +14,7 @@ subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
 subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown
 subdir-$(CONFIG_BPF_LSM)		+= bpf
 subdir-$(CONFIG_SECURITY_LANDLOCK)	+= landlock
+subdir-$(CONFIG_SECURITY_NAX)		+= nax
 
 # always enable default capabilities
 obj-y					+= commoncap.o
@@ -34,6 +35,7 @@ obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown/
 obj-$(CONFIG_CGROUPS)			+= device_cgroup.o
 obj-$(CONFIG_BPF_LSM)			+= bpf/
 obj-$(CONFIG_SECURITY_LANDLOCK)		+= landlock/
+obj-$(CONFIG_SECURITY_NAX)		+= nax/
 
 # Object integrity file lists
 subdir-$(CONFIG_INTEGRITY)		+= integrity
diff --git a/security/nax/Kconfig b/security/nax/Kconfig
new file mode 100644
index 000000000000..71fc0175cfb2
--- /dev/null
+++ b/security/nax/Kconfig
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config SECURITY_NAX
+	bool "NAX support"
+	depends on SECURITY
+	help
+	  This selects NAX (No Anonymous Execution), which extends DAC
+	  support with additional system-wide security settings beyond
+	  regular Linux discretionary access controls. Currently, the only
+	  available behavior is restricting the execution of anonymous and
+	  modified pages.
+
+	  The module can restrict either privileged or all processes,
+	  depending on the settings. It is possible to configure action,
+	  performed when the violation is detected (log, log + block,
+	  log + kill).
+
+	  Further information can be found in
+	  Documentation/admin-guide/LSM/NAX.rst.
+
+	  If you are unsure how to answer this question, answer N.
+
+choice
+	prompt "NAX violation action mode"
+	default SECURITY_NAX_MODE_LOG
+	depends on SECURITY_NAX
+	help
+	  Select the NAX violation action mode.
+
+	  In the default permissive mode the violations are only logged
+	  (if logging is not suppressed). In the enforcing mode the violations
+	  are prohibited. And in the kill mode the process is terminated.
+
+	  The value can be overridden at boot time with the kernel command-line
+	  parameter "nax_mode=" (0, 1, 2) or "kernel.nax.mode=" (0, 1, 2)
+	  sysctl parameter (if the settings are not locked).
+
+	config SECURITY_NAX_MODE_LOG
+		bool "Permissive mode"
+		help
+		  In this mode violations are only logged (if logging is not
+		  suppressed by the "kernel.nax.quiet" parameter). The
+		  violating system call will not be prohibited.
+	config SECURITY_NAX_MODE_ENFORCING
+		bool "Enforcing mode"
+		help
+		  In this mode violations are prohibited and logged (if
+		  logging is not suppressed by the "kernel.nax.quiet"
+		  parameter). The violating system call will return -EACCES
+		  error.
+	config SECURITY_NAX_MODE_KILL
+		bool "Kill mode"
+		help
+		  In this mode the violating process is terminated on the
+		  first violation system call. The violation event is logged
+		  (if logging is not suppressed by the "kernel.nax.quiet"
+		  parameter).
+endchoice
+
+config SECURITY_NAX_MODE
+	int
+	depends on SECURITY_NAX
+	default 0 if SECURITY_NAX_MODE_LOG
+	default 1 if SECURITY_NAX_MODE_ENFORCING
+	default 2 if SECURITY_NAX_MODE_KILL
+
+config SECURITY_NAX_CHECK_ALL
+	bool "Check all processes"
+	depends on SECURITY_NAX
+	help
+	  If selected, NAX will check all processes. If not selected, NAX
+	  will check only privileged processes (which is determined either
+	  by having zero uid, euid, suid or fsuid; or by possessing
+	  capabilities outside of allowed set).
+
+	  The value can also be overridden at boot time with the kernel
+	  command-line parameter "nax_check_all=" (0, 1) or
+	  "kernel.nax.check_all=" (0, 1) sysctl parameter (if the settings
+	  are not locked).
+
+config SECURITY_NAX_ALLOWED_CAPS
+	hex "Process capabilities ignored by NAX"
+	default 0x0
+	range 0x0 0xffffffffffff
+	depends on SECURITY_NAX
+	help
+	  Hexadecimal number representing the set of capabilities
+	  a non-root process can possess without being considered
+	  "privileged" by NAX LSM.
+
+	  The value can be overridden at boot time with the command-line
+	  parameter "nax_allowed_caps=" or "kernel.nax.allowed_caps=" sysctl
+	  parameter (if the settings are not locked).
+
+config SECURITY_NAX_QUIET
+	bool "Silence NAX messages"
+	depends on SECURITY_NAX
+	help
+	  If selected, NAX will not print violations.
+
+	  The value can be overridden at boot with the command-line
+	  parameter "nax_quiet=" (0, 1) or "kernel.nax.quiet=" (0, 1) sysctl
+	  parameter (if the settings are not locked).
+
+config SECURITY_NAX_LOCKED
+	bool "Lock NAX settings"
+	depends on SECURITY_NAX
+	help
+	  Prevent any update to the settings of the NAX LSM. This applies to
+	  both sysctl writes and the kernel command line.
+
+	  If not selected, it can be enabled at boot time with the kernel
+	  command-line parameter "nax_locked=1" or "kernel.nax_locked=1"
+	  sysctl parameter (if the settings are not locked).
diff --git a/security/nax/Makefile b/security/nax/Makefile
new file mode 100644
index 000000000000..9c3372210c77
--- /dev/null
+++ b/security/nax/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_SECURITY_NAX) := nax.o
+
+nax-y := nax-lsm.o
diff --git a/security/nax/nax-lsm.c b/security/nax/nax-lsm.c
new file mode 100644
index 000000000000..5ff3ba12079d
--- /dev/null
+++ b/security/nax/nax-lsm.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016-2021 Open Mobile Platform LLC.
+ *
+ * Written by: Igor Zhbanov <i.zhbanov@omp.ru, izh1979@gmail.com>
+ *
+ * NAX (No Anonymous Execution) Linux Security Module
+ * This module prevents execution of the code in anonymous or modified pages.
+ * For more details, see Documentation/admin-guide/LSM/NAX.rst and
+ * Documentation/admin-guide/kernel-parameters.rst
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "NAX: " fmt
+
+#include <linux/capability.h>
+#include <linux/cred.h>
+#include <linux/ctype.h>
+#include <linux/lsm_hooks.h>
+#include <linux/mman.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/securebits.h>
+#include <linux/security.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/uidgid.h>
+
+#define NAX_MODE_PERMISSIVE 0 /* Log only             */
+#define NAX_MODE_ENFORCING  1 /* Enforce and log      */
+#define NAX_MODE_KILL       2 /* Kill process and log */
+
+static int mode      = CONFIG_SECURITY_NAX_MODE,
+	   quiet     = IS_ENABLED(CONFIG_SECURITY_NAX_QUIET),
+	   locked    = IS_ENABLED(CONFIG_SECURITY_NAX_LOCKED),
+	   check_all = IS_ENABLED(CONFIG_SECURITY_NAX_CHECK_ALL);
+
+#define ALLOWED_CAPS_HEX_LEN (_KERNEL_CAPABILITY_U32S * 8)
+
+static char allowed_caps_hex[ALLOWED_CAPS_HEX_LEN + 1];
+static kernel_cap_t __rcu *allowed_caps;
+DEFINE_SPINLOCK(allowed_caps_mutex);
+
+static bool
+is_interesting_process(void)
+{
+	bool ret = false;
+	const struct cred *cred;
+	kuid_t root_uid;
+	kernel_cap_t *caps;
+
+	if (check_all)
+		return true;
+
+	cred = current_cred();
+	root_uid = make_kuid(cred->user_ns, 0);
+
+	rcu_read_lock();
+	caps = rcu_dereference(allowed_caps);
+	/*
+	 * We count a process as interesting if it any of its uid/euid/suid
+	 * is zero (because it may call seteuid(0) to gain privileges) or
+	 * it has any not allowed capability (even in a user namespace)
+	 */
+	if ((!issecure(SECURE_NO_SETUID_FIXUP) &&
+	     (uid_eq(cred->uid,  root_uid) ||
+	      uid_eq(cred->euid, root_uid) ||
+	      uid_eq(cred->suid, root_uid))) ||
+	    (!cap_issubset(cred->cap_effective, *caps)) ||
+	    (!cap_issubset(cred->cap_permitted, *caps)))
+		ret = true;
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static void
+log_warn(const char *reason)
+{
+	if (quiet)
+		return;
+
+	pr_warn_ratelimited("%s: pid=%d, uid=%u, comm=\"%s\"\n",
+			    reason, current->pid,
+			    from_kuid(&init_user_ns, current_cred()->uid),
+				      current->comm);
+}
+
+static void
+kill_current_task(void)
+{
+	pr_warn("Killing pid=%d, uid=%u, comm=\"%s\"\n",
+		current->pid, from_kuid(&init_user_ns, current_cred()->uid),
+		current->comm);
+	force_sig(SIGKILL);
+}
+
+static int
+nax_mmap_file(struct file *file, unsigned long reqprot,
+	      unsigned long prot, unsigned long flags)
+{
+	int ret = 0;
+
+	if (mode == NAX_MODE_PERMISSIVE && quiet)
+		return 0; /* Skip further checks in this case */
+
+	if (!(prot & PROT_EXEC)) /* Not executable memory */
+		return 0;
+
+	if (!is_interesting_process())
+		return 0; /* Not interesting processes can do anything */
+
+	if (!file) { /* Anonymous executable memory */
+		log_warn("MMAP_ANON_EXEC");
+		ret = -EACCES;
+	} else if (prot & PROT_WRITE) { /* Mapping file RWX */
+		log_warn("MMAP_FILE_WRITE_EXEC");
+		ret = -EACCES;
+	}
+
+	if (ret && mode == NAX_MODE_KILL)
+		kill_current_task();
+
+	return (mode != NAX_MODE_PERMISSIVE) ? ret : 0;
+}
+
+static int
+nax_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+		  unsigned long prot)
+{
+	int ret = 0;
+
+	if (mode == NAX_MODE_PERMISSIVE && quiet)
+		return 0; /* Skip further checks in this case */
+
+	if (!(prot & PROT_EXEC)) /* Not executable memory */
+		return 0;
+
+	if (!is_interesting_process())
+		return 0; /* Not interesting processes can do anything */
+
+	if (!(vma->vm_flags & VM_EXEC)) {
+		if (vma->vm_start >= vma->vm_mm->start_brk &&
+		    vma->vm_end   <= vma->vm_mm->brk) {
+			log_warn("MPROTECT_EXEC_HEAP");
+			ret = -EACCES;
+		} else if (!vma->vm_file &&
+			   ((vma->vm_start <= vma->vm_mm->start_stack &&
+			     vma->vm_end   >= vma->vm_mm->start_stack) ||
+			    vma_is_stack_for_current(vma))) {
+			log_warn("MPROTECT_EXEC_STACK");
+			ret = -EACCES;
+		} else if (vma->vm_file && vma->anon_vma) {
+			/*
+			 * We are making executable a file mapping that has
+			 * had some COW done. Since pages might have been
+			 * written, check ability to execute the possibly
+			 * modified content. This typically should only
+			 * occur for text relocations.
+			 */
+			log_warn("MPROTECT_EXEC_MODIFIED");
+			ret = -EACCES;
+		}
+	}
+
+	if (!ret) {
+		if (!vma->vm_file) { /* Anonymous executable memory */
+			log_warn("MPROTECT_ANON_EXEC");
+			ret = -EACCES;
+		} else if (prot & PROT_WRITE) { /* Remapping file as RWX */
+			log_warn("MPROTECT_FILE_WRITE_EXEC");
+			ret = -EACCES;
+		}
+	}
+
+	if (ret && mode == NAX_MODE_KILL)
+		kill_current_task();
+
+	return (mode != NAX_MODE_PERMISSIVE) ? ret : 0;
+}
+
+static struct security_hook_list nax_hooks[] __lsm_ro_after_init = {
+	LSM_HOOK_INIT(mmap_file, nax_mmap_file),
+	LSM_HOOK_INIT(file_mprotect, nax_file_mprotect),
+};
+
+static void
+update_allowed_caps(kernel_cap_t *caps)
+{
+	kernel_cap_t *old_caps;
+
+	*caps = cap_intersect(*caps, CAP_FULL_SET); /* Drop unsupported */
+	spin_lock(&allowed_caps_mutex);
+	old_caps = rcu_dereference_protected(allowed_caps,
+					     lockdep_is_held(&allowed_caps_mutex));
+	rcu_assign_pointer(allowed_caps, caps);
+	spin_unlock(&allowed_caps_mutex);
+	synchronize_rcu();
+	kfree(old_caps);
+}
+
+static int
+set_default_allowed_caps(void)
+{
+	size_t i;
+	kernel_cap_t *caps;
+
+	caps = kmalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	CAP_FOR_EACH_U32(i)
+		caps->cap[i] = (CONFIG_SECURITY_NAX_ALLOWED_CAPS >> (i * 8)) &
+			       0xff;
+
+	update_allowed_caps(caps);
+	return 0;
+}
+
+static int
+parse_and_set_caps(char *str)
+{
+	size_t len, i;
+	kernel_cap_t *caps;
+
+	/* len is guaranteed not to exceed ALLOWED_CAPS_HEX_LEN */
+	len = strlen(str);
+	for (i = 0; i < len; i++)
+		if (!isxdigit(str[i]))
+			return -EINVAL;
+
+	caps = kmalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	CAP_FOR_EACH_U32(i) {
+		unsigned long l;
+
+		if (kstrtoul(str + (len >= 8 ? len - 8 : 0), 16, &l))
+			return -EINVAL;
+
+		caps->cap[i] = l;
+		if (len < 8)
+			break;
+
+		len -= 8;
+		str[len] = '\0';
+	}
+
+	update_allowed_caps(caps);
+	return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+
+static int
+nax_dointvec_minmax(struct ctl_table *table, int write,
+		    void *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (write && (!capable(CAP_SYS_ADMIN) || locked))
+		return -EPERM;
+
+	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+
+static int
+nax_dostring(struct ctl_table *table, int write, void *buffer,
+	     size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	if (write) { /* A user is setting the allowed capabilities */
+		int error;
+		char *buf = (char *)buffer;
+		size_t len = *lenp;
+
+		if (!capable(CAP_SYS_ADMIN) || locked)
+			return -EPERM;
+
+		/* Do not allow trailing garbage or excessive length */
+		if (len == ALLOWED_CAPS_HEX_LEN + 1) {
+			if (buf[--len] != '\n')
+				return -EINVAL;
+		} else if (len > ALLOWED_CAPS_HEX_LEN || len <= 0) {
+			return -EINVAL;
+		}
+
+		error = proc_dostring(table, write, buffer, lenp, ppos);
+		if (error)
+			return error;
+
+		ret = parse_and_set_caps(allowed_caps_hex);
+	} else { /* A user is getting the allowed capabilities */
+		unsigned int i;
+		kernel_cap_t *caps;
+
+		rcu_read_lock();
+		caps = rcu_dereference(allowed_caps);
+		CAP_FOR_EACH_U32(i)
+			snprintf(allowed_caps_hex + i * 8, 9, "%08x",
+				 caps->cap[CAP_LAST_U32 - i]);
+
+		rcu_read_unlock();
+		ret = proc_dostring(table, write, buffer, lenp, ppos);
+	}
+
+	return ret;
+}
+
+struct ctl_path nax_sysctl_path[] = {
+	{ .procname = "kernel" },
+	{ .procname = "nax"    },
+	{ }
+};
+
+static int max_mode = NAX_MODE_KILL;
+
+static struct ctl_table nax_sysctl_table[] = {
+	{
+		.procname     = "allowed_caps",
+		.data         = allowed_caps_hex,
+		.maxlen       = ALLOWED_CAPS_HEX_LEN + 1,
+		.mode         = 0644,
+		.proc_handler = nax_dostring,
+	}, {
+		.procname     = "check_all",
+		.data         = &check_all,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE,
+	}, {
+		.procname     = "locked",
+		.data         = &locked,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE,
+	}, {
+		.procname     = "mode",
+		.data         = &mode,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = &max_mode,
+	}, {
+		.procname     = "quiet",
+		.data         = &quiet,
+		.maxlen       = sizeof(int),
+		.mode         = 0644,
+		.proc_handler = nax_dointvec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE,
+	},
+	{ }
+};
+
+static void __init
+nax_init_sysctl(void)
+{
+	if (!register_sysctl_paths(nax_sysctl_path, nax_sysctl_table))
+		panic("NAX: sysctl registration failed.\n");
+}
+
+#else /* !CONFIG_SYSCTL */
+
+static inline void
+nax_init_sysctl(void)
+{
+
+}
+
+#endif /* !CONFIG_SYSCTL */
+
+static int __init setup_allowed_caps(char *str)
+{
+	if (locked)
+		return 1;
+
+	/* Do not allow trailing garbage or excessive length */
+	if (strlen(str) > ALLOWED_CAPS_HEX_LEN) {
+		pr_err("Invalid 'nax_allowed_caps' parameter value (%s)\n",
+		       str);
+		return 1;
+	}
+
+	strscpy(allowed_caps_hex, str, sizeof(allowed_caps_hex));
+	if (parse_and_set_caps(allowed_caps_hex))
+		pr_err("Invalid 'nax_allowed_caps' parameter value (%s)\n",
+		       str);
+
+	return 1;
+}
+__setup("nax_allowed_caps=", setup_allowed_caps);
+
+static int __init setup_check_all(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val))
+		check_all = val ? 1 : 0;
+
+	return 1;
+}
+__setup("nax_quiet=", setup_check_all);
+
+static int __init setup_locked(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val))
+		locked = val ? 1 : 0;
+
+	return 1;
+}
+__setup("nax_locked=", setup_locked);
+
+static int __init setup_mode(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val)) {
+		if (val > max_mode) {
+			pr_err("Invalid 'nax_mode' parameter value (%s)\n",
+			       str);
+			val = max_mode;
+		}
+
+		mode = val;
+	}
+
+	return 1;
+}
+__setup("nax_mode=", setup_mode);
+
+static int __init setup_quiet(char *str)
+{
+	unsigned long val;
+
+	if (!locked && !kstrtoul(str, 0, &val))
+		quiet = val ? 1 : 0;
+
+	return 1;
+}
+__setup("nax_quiet=", setup_quiet);
+
+static __init int
+nax_init(void)
+{
+	int rc;
+
+	pr_info("Starting.\n");
+	rc = set_default_allowed_caps();
+	if (rc < 0)
+		return rc;
+
+	security_add_hooks(nax_hooks, ARRAY_SIZE(nax_hooks), "nax");
+	nax_init_sysctl();
+
+	return 0;
+}
+
+DEFINE_LSM(nax) = {
+	.name = "nax",
+	.init = nax_init,
+};
-- 
2.26.2



^ permalink raw reply related

* [PATCH v4 0/1] NAX (No Anonymous Execution) LSM
From: Igor Zhbanov @ 2021-08-20 15:13 UTC (permalink / raw)
  To: linux-integrity, linux-security-module, Mimi Zohar, THOBY Simon,
	linux-kernel

[Overview]

Fileless malware attacks are becoming more and more popular, and even
ready-to-use frameworks are available [1], [2], [3]. They are based on
running of the malware code from anonymous executable memory pages (which
are not backed by an executable file or a library on a filesystem.) This
allows effectively hiding malware presence in a system, making filesystem
integrity checking tools unable to detect the intrusion.

Typically, the malware first needs to intercept the execution flow (e.g.,
by the means of ROP-based exploit). Then it needs to download the main
part (in the form of normal executable or library) from its server,
because it is hard to implement the entire exploit in ROP-based form.
There are a number of security mechanisms that can ensure the integrity
of the file-system, but we need to ensure the integrity of the code in
memory too, to be sure, that only authorized code is running in the
system.

The proposed LSM is preventing the creation of anonymous executable pages
for the processes. The LSM intercepts mmap() and mprotect() system calls
and handles it similarly to SELinux handlers.

The module allows to block the violating system call or to kill the
violating process, depending on the settings, along with rate-limited
logging.

Currently, the module restricts ether all processes or only the
privileged processes, depending on the settings. The privileged process
is a process for which any of the following is true:
+ uid   == 0 && !issecure(SECURE_NOROOT)
+ euid  == 0 && !issecure(SECURE_NOROOT)
+ suid  == 0 && !issecure(SECURE_NOROOT)
+ cap_effective has any capability except of kernel.nax.allowed_caps
+ cap_permitted has any capability except of kernel.nax.allowed_caps

Checking of uid/euid/suid is important because a process may call
seteuid(0) to gain privileges (if SECURE_NO_SETUID_FIXUP secure bit
is not set).

The sysctl parameter kernel.nax.allowed_caps allows to define safe
capabilities set for the privileged processes.

[JIT]

Because of blocked anonymous code execution, JIT-compiled code, some
interpreters (which are using JIT) and libffi-based projects can be
broken.

Our observation shows that such processes are typically running by a
user, so they will not be privileged, so they will be allowed to use
anonymous executable pages.

But for small embedded set-ups it could be possible to get rid of such
processes at all, so the module could be enabled without further
restrictions to protect both privileged and non-privileged processes.

In addition, libffi can be modified not to use anonymous executable
pages.

[Similar implementations]

Although SELinux could be used to enable similar functionality, this LSM
is simpler. It could be used in set-ups, where SELinux would be overkill.

There is also SARA LSM module, which solves similar task, but it is more
complex.

[Cooperation with other security mechanisms]

NAX LSM is more useful in conjunction with IMA. IMA would be responsible
for integrity checking of file-based executables and libraries, and
NAX LSM would be responsible for preventing of anonymous code execution.

Alternatively, NAX LSM can be used with read-only root file system,
protected by dm-verity/fs-verity.

[TODO]
- Implement xattrs support for marking privileged binaries on a per-file
  basis and protect them with EVM.
- Store NAX attributes in the per-task LSM blob to implement special
  launchers for the privileged processes, so all of the children processes
  of such a launcher would be allowed to have anonymous executable pages
  (but not to grandchildren).

[Links]

[1] https://blog.fbkcs.ru/elf-in-memory-execution/
[2] https://magisterquis.github.io/2018/03/31/in-memory-only-elf-execution.html
[3] https://www.prodefence.org/fireelf-fileless-linux-malware-framework/

[Credits]

Thanks to Mimi Zohar for consulting and to Simon Thoby and Randy Dunlap for
thorough review.

[Changelog]

V4
- Fix indentation issues and typos in Kconfig.

V3
- Fix memory leak in allowed_caps assigning code.
- Protect allowed_caps updating with a spinlock.
- Fix Kconfig options description.
- Add example for allowed_caps value.
- Fix typo in documentation.

V2
- Fixed typo in Kconfig.
- Fixed "cap_effective" and "cap_permitted" parameters description in NAX.rst.
- Added "nax_allowed_caps" setup parameter. Factored out capabilities parsing
  logic.
- Added parameter for checking all processes (not only privileged).
- Added Kconfig parameter for setting allowed capabilities.
- Updated nax_file_mprotect() to avoid calling of nax_mmap_file() to avoid
  duplicated checks.
- Protect allowed_caps with RCU.
- Fixed all errors and most warning found by checkpatch.pl.
- Updated the module documentation. Added description of the boot parameters to
  kernel-parameters.
- Updated commit message.

V1:
- Initial implementation.

Igor Zhbanov (1):
  NAX LSM: Add initial support

 Documentation/admin-guide/LSM/NAX.rst         |  72 +++
 Documentation/admin-guide/LSM/index.rst       |   1 +
 .../admin-guide/kernel-parameters.rst         |   1 +
 .../admin-guide/kernel-parameters.txt         |  32 ++
 security/Kconfig                              |  11 +-
 security/Makefile                             |   2 +
 security/nax/Kconfig                          | 113 +++++
 security/nax/Makefile                         |   4 +
 security/nax/nax-lsm.c                        | 472 ++++++++++++++++++
 9 files changed, 703 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/admin-guide/LSM/NAX.rst
 create mode 100644 security/nax/Kconfig
 create mode 100644 security/nax/Makefile
 create mode 100644 security/nax/nax-lsm.c

-- 
2.26.2

^ permalink raw reply

* Re: [PATCH v3 0/1] NAX (No Anonymous Execution) LSM
From: THOBY Simon @ 2021-08-20 13:35 UTC (permalink / raw)
  To: Igor Zhbanov, linux-integrity, linux-security-module, Mimi Zohar,
	linux-kernel@vger.kernel.org
In-Reply-To: <adc0e031-f02d-775c-1148-e808013c1b97@gmail.com>

Hi Igor,

On 8/20/21 12:12 AM, Igor Zhbanov wrote:
> [Overview]
> 
> Fileless malware attacks are becoming more and more popular, and even
> ready-to-use frameworks are available [1], [2], [3]. They are based on
> running of the malware code from anonymous executable memory pages (which
> are not backed by an executable file or a library on a filesystem.) This
> allows effectively hiding malware presence in a system, making filesystem
> integrity checking tools unable to detect the intrusion.
> 

[snip]

> 
> [TODO]
> - Implement xattrs support for marking privileged binaries on a per-file
>   basis.

If/when you plan to add that, adding the new xattr to the list of EVM-protected xattrs
may be worth discussing.

> - Store NAX attributes in the per-task LSM blob to implement special
>   launchers for the privileged processes, so all of the children processes
>   of such a launcher would be allowed to have anonymous executable pages
>   (but not to grandchildren).
> 

[snip]

Overall I'm pleased to see this patch and I have no more remarks,
outside of the few points Randy Dunlap raised.

Reviewed-by: THOBY Simon <Simon.THOBY@viveris.fr>

Thanks,
Simon

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: THOBY Simon @ 2021-08-20 13:23 UTC (permalink / raw)
  To: 李力琼, zohar@linux.ibm.com
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <d385686b-ffa5-5794-2cf2-b87f2a471e78@nfschina.com>

Hi Liqiong,

On 8/20/21 12:15 PM, 李力琼 wrote:
> Hi, Simon:
> 
> This solution is better then rwsem, a temp "ima_rules" variable should 
> can fix. I also have a another idea, with a little trick, default list
> can traverse to the new list, so we don't need care about the read side. 
> 
> here is the patch:
> 
> @@ -918,8 +918,21 @@ void ima_update_policy(void)
>         list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
> 
>         if (ima_rules != policy) {
> +               struct list_head *prev_rules = ima_rules;
> +               struct list_head *first = ima_rules->next;
>                 ima_policy_flag = 0;
> +
> +               /*
> +                * Make the previous list can traverse to new list,
> +                * that is tricky, or there is a deadly loop whithin
> +                * "list_for_each_entry_rcu(entry, ima_rules, list)"
> +                *
> +                * After update "ima_rules", restore the previous list.
> +                */

I think this could be rephrased to be a tad clearer, I am not quite sure
how I must interpret the first sentence of the comment.


> +               prev_rules->next = policy->next;
>                 ima_rules = policy;
> +               syncchronize_rcu();

I'm a bit puzzled as you seem to imply in the mail this patch was tested,
but there is no 'syncchronize_rcu' (with two 'c') symbol in the kernel.
Was that a copy/paste error? Or maybe you forgot the 'not' in "This
patch has been tested"? These errors happen, and I am myself quite an
expert in doing them :)

> +               prev_rules->next = first;
> 
> 
> The side effect is the "ima_default_rules" will be changed a little while.
> But it make sense, the process should be checked again by the new policy.
> 
> This patch has been tested, if will do, I can resubmit this patch.> 
> How about this ?


Correct me if I'm wrong, here is how I think I understand you patch.
We start with a situation like that (step 0):
ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0

Then we decide to update the policy for the first time, so
'ima_rules [&ima_default_rules] != policy [&ima_policy_rules]'.
We enter the condition.
First we copy the current value of ima_rules (&ima_default_rules)
to a temporary variable 'prev_rules'. We also create a pointer dubbed
'first' to the entry 1 in the default list (step 1):
prev_rules -------------
                       \/
ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
                                                                   /\
first --------------------------------------------------------------


Then we update prev_rules->next to point to policy->next (step 2):
List entry 1 <-> List entry 2 <-> ... -> List entry 0
 /\
first
	(notice that list entry 0 no longer points backwards to 'list entry 1',
	but I don't think there is any reverse iteration in IMA, so it should be
	safe)

prev_rules -------------
                       \/
ima_rules --> List entry 0 (head node) = ima_default_rules   
                       |
                       |
                       -------------------------------------------
                                                                 \/
policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'


We then update ima_rules to point to ima_policy_rules (step 3):
List entry 1 <-> List entry 2 <-> ... -> List entry 0
 /\
first

prev_rules -------------
                       \/
ima_rules     List entry 0 (head node) = ima_default_rules   
     |                 |
     |                 |
     |                 ------------------------------------------
     ---------------                                            |
                   \/                                           \/
policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
                                                                   /\
first --------------------------------------------------------------

Then we run synchronize_rcu() to wait for any RCU reader to exit their loops (step 4).

Finally we update prev_rules->next to point back to the ima policy and fix the loop (step 5):

List entry 1 <-> List entry 2 <-> ... -> List entry 0
 /\
first

prev_rules ---> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
                                                                     /\
                                                                 first (now useless)
ima_rules        
     |
     |
     |
     ---------------
                   \/
policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'

The goal is that readers should still be able to loop
(forward, as we saw that backward looping is temporarily broken)
while in steps 0-4.

I'm not completely sure what would happen to a client that started iterating
over ima_rules right after step 2.

Wouldn't they be able to start looping through the new policy
as 'List entry 0 (head node) = ima_default_rules' points to ima_policy_rules?
And if they, wouldn't they loop until the write to 'ima_rule' at step 3 (admittedly
very shortly thereafter) completed?
And would the compiler be allowed to optimize the read to 'ima_rules' in the
list_for_each_entry() loop, thereby never reloading the new value for
'ima_rules', and thus looping forever, just what we are trying to avoid?

Overall, I'm tempted to say this is perhaps a bit too complex (at least,
my head tells me it is, but that may very well be because I'm terrible
at concurrency issues).

Honestly, in this case I think awaiting input from more experienced
kernel devs than I is the best path forward :-)

> 
> ----------
> Regards,
> liqiong
> 

Thanks,
Simon

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: 李力琼 @ 2021-08-20 10:15 UTC (permalink / raw)
  To: THOBY Simon, zohar@linux.ibm.com
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <8d17f252-4a93-f430-3f25-e75556ab01e8@viveris.fr>

Hi, Simon:

This solution is better then rwsem, a temp "ima_rules" variable should 
can fix. I also have a another idea, with a little trick, default list
can traverse to the new list, so we don't need care about the read side. 

here is the patch:

@@ -918,8 +918,21 @@ void ima_update_policy(void)
        list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);

        if (ima_rules != policy) {
+               struct list_head *prev_rules = ima_rules;
+               struct list_head *first = ima_rules->next;
                ima_policy_flag = 0;
+
+               /*
+                * Make the previous list can traverse to new list,
+                * that is tricky, or there is a deadly loop whithin
+                * "list_for_each_entry_rcu(entry, ima_rules, list)"
+                *
+                * After update "ima_rules", restore the previous list.
+                */
+               prev_rules->next = policy->next;
                ima_rules = policy;
+               syncchronize_rcu();
+               prev_rules->next = first;


The side effect is the "ima_default_rules" will be changed a little while.
But it make sense, the process should be checked again by the new policy.

This patch has been tested, if will do, I can resubmit this patch.

How about this ?

----------
Regards,
liqiong

在 2021年08月19日 20:58, THOBY Simon 写道:
> Hi Liqiong,
>
> On 8/19/21 12:15 PM, liqiong wrote:
>> When "ima_match_policy" is looping while "ima_update_policy" changs
>> the variable "ima_rules", then "ima_match_policy" may can't exit loop,
>> and kernel keeps printf "rcu_sched detected stall on CPU ...".
>>
>> It occurs at boot phase, systemd-services are being checked within
>> "ima_match_policy,at the same time, the variable "ima_rules"
>> is changed by a service.
> First off, thanks for finding and identifying this nasty bug.
>
>> Signed-off-by: liqiong <liqiong@nfschina.com>
>> ---
>>  security/integrity/ima/ima_policy.c | 5 +++++
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
>> index fd5d46e511f1..7e71e643457c 100644
>> --- a/security/integrity/ima/ima_policy.c
>> +++ b/security/integrity/ima/ima_policy.c
>> @@ -217,6 +217,7 @@ static LIST_HEAD(ima_default_rules);
>>  static LIST_HEAD(ima_policy_rules);
>>  static LIST_HEAD(ima_temp_rules);
>>  static struct list_head *ima_rules = &ima_default_rules;
>> +static DECLARE_RWSEM(ima_rules_sem);
>>  
>>  static int ima_policy __initdata;
>>  
>> @@ -666,6 +667,7 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
>>  	if (template_desc && !*template_desc)
>>  		*template_desc = ima_template_desc_current();
>>  
>> +	down_read(&ima_rules_sem);
>>  	rcu_read_lock();
>>  	list_for_each_entry_rcu(entry, ima_rules, list) {
>>  
>> @@ -702,6 +704,7 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
>>  			break;
>>  	}
>>  	rcu_read_unlock();
>> +	up_read(&ima_rules_sem);
>>  
>>  	return action;
>>  }
>> @@ -919,7 +922,9 @@ void ima_update_policy(void)
>>  
>>  	if (ima_rules != policy) {
>>  		ima_policy_flag = 0;
>> +		down_write(&ima_rules_sem);
>>  		ima_rules = policy;
>> +		up_write(&ima_rules_sem);
>>  
>>  		/*
>>  		 * IMA architecture specific policy rules are specified
>>
> Rather than introducing a new semaphore, I wonder if you couldn't have done something
> like the following?
>
> @@ -674,13 +674,15 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
>                      const char *func_data, unsigned int *allowed_algos)
>  {
>         struct ima_rule_entry *entry;
> +       struct list_head *ima_rules_tmp;
>         int action = 0, actmask = flags | (flags << 1);
>
>         if (template_desc && !*template_desc)
>                 *template_desc = ima_template_desc_current();
>
>         rcu_read_lock();
> -       list_for_each_entry_rcu(entry, ima_rules, list) {
> +       ima_rules_tmp = rcu_dereference(ima_rules);
> +       list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
>
>                 if (!(entry->action & actmask))
>                         continue;
> @@ -970,7 +972,7 @@ void ima_update_policy(void)
>
>         if (ima_rules != policy) {
>                 ima_policy_flag = 0;
> -               ima_rules = policy;
> +               rcu_assign_pointer(ima_rules, policy);
>
>                 /*
>                  * IMA architecture specific policy rules are specified
>
>
> Also, ima_match_policy is not the only place where we iterate over ima_rules, maybe
> this change should be applied to every function that perform a call the like of
> "list_for_each_entry_rcu(entry, ima_rules_tmp, list)" ?
>
> All that being said, your change is quite small and I have no objection to it,
> I was just wondering whether we could achieve the same effect without locks
> with RCU.
>
> What do you think?
>
> Thanks,
> Simon

-- 
李力琼<liqiong@nfschina.com>  13524287433
上海市浦东新区海科路99号中科院上海高等研究院3号楼3楼


^ permalink raw reply

* Re: [PATCH v28 22/25] Audit: Add record for multiple process LSM attributes
From: Casey Schaufler @ 2021-08-19 22:41 UTC (permalink / raw)
  To: Paul Moore
  Cc: casey.schaufler, James Morris, linux-security-module, selinux,
	linux-audit, keescook, john.johansen, penguin-kernel,
	Stephen Smalley, Casey Schaufler
In-Reply-To: <f3137410-185a-3012-1e38-e05a175495cc@schaufler-ca.com>


On 8/18/2021 5:56 PM, Casey Schaufler wrote:
> On 8/18/2021 5:47 PM, Paul Moore wrote:
>> ...
>> I just spent a few minutes tracing the code paths up from audit
>> through netlink and then through the socket layer and I'm not seeing
>> anything obvious where the path differs from any other syscall;
>> current->audit_context *should* be valid just like any other syscall.
>> However, I do have to ask, are you only seeing these audit records
>> with a current->audit_context equal to NULL during early boot?
> Nope. Sorry.

It looks as if all of the NULL audit_context cases are for either
auditd or systemd. Given what the events are, this isn't especially
surprising.



^ permalink raw reply

* Re: [PATCH v3 1/1] NAX LSM: Add initial support
From: Randy Dunlap @ 2021-08-19 22:29 UTC (permalink / raw)
  To: Igor Zhbanov, linux-integrity, linux-security-module, Mimi Zohar,
	THOBY Simon, linux-kernel
In-Reply-To: <3f7db609-6393-163f-287f-2211ed6239a5@gmail.com>

Hi--

On 8/19/21 3:13 PM, Igor Zhbanov wrote:
> diff --git a/security/nax/Kconfig b/security/nax/Kconfig
> new file mode 100644
> index 000000000000..f0777cc38e17
> --- /dev/null
> +++ b/security/nax/Kconfig
> @@ -0,0 +1,114 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +config SECURITY_NAX
> +	bool "NAX support"
> +	depends on SECURITY
> +	default n

'default n' is the default value and hence it is redundant.
We usually omit it.

> +	help
> +	  This selects NAX (No Anonymous Execution), which extends DAC
> +	  support with additional system-wide security settings beyond
> +	  regular Linux discretionary access controls. Currently, the only
> +	  available behavior is restricting the execution of anonymous and
> +	  modified pages.
> +
> +	  The module can restrict either privileged or all processes,
> +	  depending on the settings. It is possible to configure action,
> +	  performed when the violation is detected (log, log + block,
> +	  log + kill).
> +
> +	  Further information can be found in
> +	  Documentation/admin-guide/LSM/NAX.rst.
> +
> +	  If you are unsure how to answer this question, answer N.
> +
> +choice
> +	prompt "NAX violation action mode"
> +	default SECURITY_NAX_MODE_LOG
> +	depends on SECURITY_NAX
> +	help
> +	  Select the NAX violation action mode.
> +
> +	  In the default permissive mode the violations are only logged
> +	  (if logging is not suppressed). In the enforcing mode the violations
> +	  are prohibited. And in the kill mode the process is terminated.
> +
> +	  The value can be overridden at boot time with the kernel command-line
> +	  parameter "nax_mode=" (0, 1, 2) or "kernel.nax.mode=" (0, 1, 2)
> +	  sysctl parameter (if the settings are not locked).
> +
> +	config SECURITY_NAX_MODE_LOG
> +		bool "Permissive mode"
> +		help
> +		  In this mode violations are only logged (if logging is not
> +		  suppressed by the "kernel.nax.quiet" parameter). The
> +		  violating system call will not be prohibited.
> +	config SECURITY_NAX_MODE_ENFORCING
> +		bool "Enforcing mode"
> +		help
> +		  In this mode violations are prohibited and logged (if
> +		  logging is not suppressed by the "kernel.nax.quiet"
> +		  parameter). The violating system call will return -EACCES
> +		  error.
> +	config SECURITY_NAX_MODE_KILL
> +		bool "Kill mode"
> +		help
> +		  In this mode the violating process is terminated on the
> +		  first violation system call. The violation event is logged
> +		  (if logging is not suppressed by the "kernel.nax.quiet"
> +		  parameter).
> +endchoice
> +
> +config SECURITY_NAX_MODE
> +        int
> +        depends on SECURITY_NAX
> +        default 0 if SECURITY_NAX_MODE_LOG
> +        default 1 if SECURITY_NAX_MODE_ENFORCING
> +        default 2 if SECURITY_NAX_MODE_KILL
> +
> +config SECURITY_NAX_CHECK_ALL
> +	bool "Check all processes"
> +	depends on SECURITY_NAX
> +	help
> +	  If selected, NAX will check all processes. If not selected, NAX
> +	  will check only privileged processes (which is determined either
> +	  by having zero uid, euid, suid or fsuid; or by possessing
> +	  capabilities outside of allowed set).
> +
> +	  The value can also be overridden at boot time with the kernel
> +	  command-line parameter "nax_check_all=" (0, 1) or
> +	  "kernel.nax_check_all=" (0, 1) sysctl parameter (if the settings

	   kernel.nax.check_all ?

> +	  are not locked).
> +
> +config SECURITY_NAX_ALLOWED_CAPS
> +	hex "Process capabilities ignored by NAX"
> +	default 0x0
> +        range 0x0 0xffffffffffff

Indent above line with tab + 2 spaces instead of all spaces.

> +	depends on SECURITY_NAX
> +	help
> +	  Hexadecimal number representing the set of capabilities
> +	  a non-root process can possess without being considered
> +	  "privileged" by NAX LSM.
> +
> +	  The value can be overridden at boot time with the command-line
> +	  parameter "nax_allowed_caps=" or "kernel.nax.allowed_caps=" sysctl
> +	  parameter (if the settings are not locked).
> +
> +config SECURITY_NAX_QUIET
> +	bool "Silence NAX messages"
> +	depends on SECURITY_NAX
> +	help
> +	  If selected, NAX will not print violations.
> +
> +	  The value can be overridden at boot with the command-line
> +	  parameter "nax_quiet=" (0, 1) or "kernel.nax_quiet=" (0, 1) sysctl

	                                    kernel.nax.quiet

> +	  parameter (if the settings are not locked).
> +
> +config SECURITY_NAX_LOCKED
> +	bool "Lock NAX settings"
> +	depends on SECURITY_NAX
> +	help
> +	  Pevent any update to the settings of the NAX LSM. This applies to

	  Prevent

> +	  both sysctl writes and the kernel command line.
> +
> +	  If not selected, it can be enabled at boot time with the kernel
> +	  command-line parameter "nax_locked=1" or "kernel.nax_locked=1"

	                                            kernel.nax.locked

> +	  sysctl parameter (if the settings are not locked).


-- 
~Randy


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox