Linux Security Modules development

Linux Security Modules development
 help / color / mirror / Atom feed

* [PATCH bpf-next RFC v1 1/5] bpf: Implement file local storage
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module
In-Reply-To: <20210821184824.2052643-1-memxor@gmail.com>

This map is useful in general to tie data associated with a open file
(not fd) from eBPF programs, such that data is released when the file
goes away (e.g. checkpoint/restore usecase).

Another usecase is implementing Capsicum [0] style capability sandbox in
userspace using eBPF LSM, enforcing rights at the file level using this
mechanism.

The implementation is exactly similar to bpf_inode_storage, with some
minor changes where required.

[0]: https://www.usenix.org/legacy/event/sec10/tech/full_papers/Watson.pdf

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 include/linux/bpf_lsm.h       |  21 +++
 include/linux/bpf_types.h     |   1 +
 include/uapi/linux/bpf.h      |  39 ++++++
 kernel/bpf/Makefile           |   2 +-
 kernel/bpf/bpf_file_storage.c | 244 ++++++++++++++++++++++++++++++++++
 kernel/bpf/bpf_lsm.c          |   4 +
 kernel/bpf/syscall.c          |   3 +-
 kernel/bpf/verifier.c         |  10 ++
 security/bpf/hooks.c          |   2 +
 9 files changed, 324 insertions(+), 2 deletions(-)
 create mode 100644 kernel/bpf/bpf_file_storage.c

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 479c101546ad..5901a39cd5ac 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -42,6 +42,18 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
 
+static inline struct bpf_storage_blob *bpf_file(const struct file *file)
+{
+	if (unlikely(!file->f_security))
+		return NULL;
+
+	return file->f_security + bpf_lsm_blob_sizes.lbs_file;
+}
+
+extern const struct bpf_func_proto bpf_file_storage_get_proto;
+extern const struct bpf_func_proto bpf_file_storage_delete_proto;
+void bpf_file_storage_free(struct file *file);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -65,6 +77,15 @@ static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline struct bpf_storage_blob *bpf_file(const struct file *file)
+{
+	return NULL;
+}
+
+static inline void bpf_file_storage_free(struct file *file)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..c68cc6d9e7da 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -107,6 +107,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_FILE_STORAGE, file_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c4f7892edb2b..d4bf4e4d56b5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -906,6 +906,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_FILE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -4871,6 +4872,42 @@ union bpf_attr {
  * 	Return
  *		Value specified by user at BPF link creation/attachment time
  *		or 0, if it was not specified.
+ *
+ * void *bpf_file_storage_get(struct bpf_map *map, void *file, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from a *file*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *file* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *file*) except this
+ *		helper enforces the key must be an file and the map must also
+ *		be a **BPF_MAP_TYPE_FILE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *file* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *file*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_file_storage_delete(struct bpf_map *map, void *file)
+ *	Description
+ *		Delete a bpf_local_storage from a *file*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5048,6 +5085,8 @@ union bpf_attr {
 	FN(timer_cancel),		\
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
+	FN(file_storage_get),		\
+	FN(file_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 7f33098ca63f..98a18e402a0a 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
-obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
+obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o bpf_file_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_file_storage.c b/kernel/bpf/bpf_file_storage.c
new file mode 100644
index 000000000000..c826bc0405c4
--- /dev/null
+++ b/kernel/bpf/bpf_file_storage.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/filter.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+
+DEFINE_BPF_STORAGE_CACHE(file_cache);
+
+static struct bpf_local_storage __rcu **file_storage_ptr(void *owner)
+{
+	struct bpf_storage_blob *bsb;
+	struct file *file = owner;
+
+	bsb = bpf_file(file);
+	if (!bsb)
+		return NULL;
+	return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *
+file_storage_lookup(struct file *file, struct bpf_map *map, bool cacheit_lockit)
+{
+	struct bpf_local_storage *file_storage;
+	struct bpf_local_storage_map *smap;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_file(file);
+	if (!bsb)
+		return NULL;
+
+	file_storage = rcu_dereference(bsb->storage);
+	if (!file_storage)
+		return NULL;
+
+	smap = (struct bpf_local_storage_map *)map;
+	return bpf_local_storage_lookup(file_storage, smap, cacheit_lockit);
+}
+
+void bpf_file_storage_free(struct file *file)
+{
+	struct bpf_local_storage *local_storage;
+	struct bpf_local_storage_elem *selem;
+	bool free_file_storage = false;
+	struct bpf_storage_blob *bsb;
+	struct hlist_node *n;
+
+	bsb = bpf_file(file);
+	if (!bsb)
+		return;
+
+	rcu_read_lock();
+
+	local_storage = rcu_dereference(bsb->storage);
+	if (!local_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	raw_spin_lock_bh(&local_storage->lock);
+	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+		bpf_selem_unlink_map(selem);
+		free_file_storage = bpf_selem_unlink_storage_nolock(local_storage,
+								    selem, false);
+	}
+	raw_spin_unlock_bh(&local_storage->lock);
+	rcu_read_unlock();
+
+	if (free_file_storage)
+		kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_fd_file_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_local_storage_data *sdata;
+	struct file *file;
+	int fd;
+
+	fd = *(int *)key;
+	file = fget_raw(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	sdata = file_storage_lookup(file, map, true);
+	fput(file);
+	return sdata ? sdata->data : NULL;
+}
+
+static int bpf_fd_file_storage_update_elem(struct bpf_map *map, void *key,
+					   void *value, u64 map_flags)
+{
+	struct bpf_local_storage_data *sdata;
+	struct file *file;
+	int fd;
+
+	fd = *(int *)key;
+	file = fget_raw(fd);
+	if (!file)
+		return -EBADF;
+	if (!file_storage_ptr(file)) {
+		fput(file);
+		return -EBADF;
+	}
+
+	sdata = bpf_local_storage_update(file,
+					 (struct bpf_local_storage_map *)map,
+					 value, map_flags);
+	fput(file);
+	return PTR_ERR_OR_ZERO(sdata);
+}
+
+static int file_storage_delete(struct file *file, struct bpf_map *map)
+{
+	struct bpf_local_storage_data *sdata;
+
+	sdata = file_storage_lookup(file, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	bpf_selem_unlink(SELEM(sdata));
+
+	return 0;
+}
+
+static int bpf_fd_file_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct file *file;
+	int fd, err;
+
+	fd = *(int *)key;
+	file = fget_raw(fd);
+	if (!file)
+		return -EBADF;
+
+	err = file_storage_delete(file, map);
+	fput(file);
+	return err;
+}
+
+BPF_CALL_4(bpf_file_storage_get, struct bpf_map *, map, struct file *, file,
+	   void *, value, u64, flags)
+{
+	struct bpf_local_storage_data *sdata;
+
+	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return (unsigned long)NULL;
+
+	if (!file || !file_storage_ptr(file))
+		return (unsigned long)NULL;
+
+	sdata = file_storage_lookup(file, map, true);
+	if (sdata)
+		return (unsigned long)sdata->data;
+
+	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+		sdata = bpf_local_storage_update(
+			file, (struct bpf_local_storage_map *)map, value,
+			BPF_NOEXIST);
+		return IS_ERR(sdata) ? (unsigned long)NULL :
+					     (unsigned long)sdata->data;
+	}
+
+	return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_file_storage_delete, struct bpf_map *, map, struct file *, file)
+{
+	if (!file)
+		return -EINVAL;
+
+	return file_storage_delete(file, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static struct bpf_map *file_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&file_cache);
+	return &smap->map;
+}
+
+static void file_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&file_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap, NULL);
+}
+
+static int file_storage_map_btf_id;
+
+const struct bpf_map_ops file_storage_map_ops = {
+	.map_meta_equal        = bpf_map_meta_equal,
+	.map_alloc_check       = bpf_local_storage_map_alloc_check,
+	.map_alloc             = file_storage_map_alloc,
+	.map_free              = file_storage_map_free,
+	.map_get_next_key      = notsupp_get_next_key,
+	.map_lookup_elem       = bpf_fd_file_storage_lookup_elem,
+	.map_update_elem       = bpf_fd_file_storage_update_elem,
+	.map_delete_elem       = bpf_fd_file_storage_delete_elem,
+	.map_check_btf         = bpf_local_storage_map_check_btf,
+	.map_btf_name          = "bpf_local_storage_map",
+	.map_btf_id            = &file_storage_map_btf_id,
+	.map_owner_storage_ptr = file_storage_ptr,
+};
+
+BTF_ID_LIST_SINGLE(bpf_file_storage_btf_ids, struct, file)
+
+const struct bpf_func_proto bpf_file_storage_get_proto = {
+	.func        = bpf_file_storage_get,
+	.gpl_only    = false,
+	.ret_type    = RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type   = ARG_CONST_MAP_PTR,
+	.arg2_type   = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_file_storage_btf_ids[0],
+	.arg3_type   = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type   = ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_file_storage_delete_proto = {
+	.func        = bpf_file_storage_delete,
+	.gpl_only    = false,
+	.ret_type    = RET_INTEGER,
+	.arg1_type   = ARG_CONST_MAP_PTR,
+	.arg2_type   = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_file_storage_btf_ids[0],
+};
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 06062370c3b8..48c2022fd958 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -121,6 +121,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_bprm_opts_set_proto;
 	case BPF_FUNC_ima_inode_hash:
 		return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
+	case BPF_FUNC_file_storage_get:
+		return &bpf_file_storage_get_proto;
+	case BPF_FUNC_file_storage_delete:
+		return &bpf_file_storage_delete_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e50c0bfdb7d..946a85945776 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -783,7 +783,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
-		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
+		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_FILE_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f5a0077c9981..dc615658c92d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5390,6 +5390,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_task_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_FILE_STORAGE:
+		if (func_id != BPF_FUNC_file_storage_get &&
+		    func_id != BPF_FUNC_file_storage_delete)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -5473,6 +5478,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_file_storage_get:
+	case BPF_FUNC_file_storage_delete:
+		if (map->map_type != BPF_MAP_TYPE_FILE_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index e5971fa74fd7..faa70467db4d 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -13,6 +13,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = {
 	#undef LSM_HOOK
 	LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free),
 	LSM_HOOK_INIT(task_free, bpf_task_storage_free),
+	LSM_HOOK_INIT(file_free_security, bpf_file_storage_free),
 };
 
 static int __init bpf_lsm_init(void)
@@ -25,6 +26,7 @@ static int __init bpf_lsm_init(void)
 struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = {
 	.lbs_inode = sizeof(struct bpf_storage_blob),
 	.lbs_task = sizeof(struct bpf_storage_blob),
+	.lbs_file = sizeof(struct bpf_storage_blob),
 };
 
 DEFINE_LSM(bpf) = {
-- 
2.33.0


^ permalink raw reply related

* [PATCH bpf-next RFC v1 2/5] tools: sync bpf.h header
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module
In-Reply-To: <20210821184824.2052643-1-memxor@gmail.com>

Update the bpf.h UAPI header with changes made from file local storage
additions.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 tools/include/uapi/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c4f7892edb2b..d4bf4e4d56b5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -906,6 +906,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_FILE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -4871,6 +4872,42 @@ union bpf_attr {
  * 	Return
  *		Value specified by user at BPF link creation/attachment time
  *		or 0, if it was not specified.
+ *
+ * void *bpf_file_storage_get(struct bpf_map *map, void *file, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from a *file*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *file* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *file*) except this
+ *		helper enforces the key must be an file and the map must also
+ *		be a **BPF_MAP_TYPE_FILE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *file* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *file*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_file_storage_delete(struct bpf_map *map, void *file)
+ *	Description
+ *		Delete a bpf_local_storage from a *file*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5048,6 +5085,8 @@ union bpf_attr {
 	FN(timer_cancel),		\
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
+	FN(file_storage_get),		\
+	FN(file_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.33.0


^ permalink raw reply related

* [PATCH bpf-next RFC v1 3/5] libbpf: Add bpf_probe_map_type support for file local storage
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module
In-Reply-To: <20210821184824.2052643-1-memxor@gmail.com>

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 tools/lib/bpf/libbpf_probes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index cd8c703dde71..a97f2088c53a 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -233,6 +233,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_SK_STORAGE:
 	case BPF_MAP_TYPE_INODE_STORAGE:
 	case BPF_MAP_TYPE_TASK_STORAGE:
+	case BPF_MAP_TYPE_FILE_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;
-- 
2.33.0


^ permalink raw reply related

* [PATCH bpf-next RFC v1 4/5] tools: bpf: update bpftool for file_storage map
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module
In-Reply-To: <20210821184824.2052643-1-memxor@gmail.com>

This updates bpftool to recognise the new file local storage map type.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +-
 tools/bpf/bpftool/bash-completion/bpftool       | 3 ++-
 tools/bpf/bpftool/map.c                         | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index d0c4abe08aba..aff192eb6e37 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -52,7 +52,7 @@ MAP COMMANDS
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
 |		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-		| **task_storage** }
+		| **task_storage** | **file_storage** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 88e2bcf16cca..e7939e82bda4 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -710,7 +710,8 @@ _bpftool()
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
                                 percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops inode_storage task_storage ringbuf'
+                                struct_ops inode_storage task_storage ringbuf \
+                                file_storage'
                             COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 407071d54ab1..f3c6ea47f846 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -52,6 +52,7 @@ const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
 	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
 	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
+	[BPF_MAP_TYPE_FILE_STORAGE]		= "file_storage",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -1466,7 +1467,7 @@ static int do_help(int argc, char **argv)
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
 		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
-		"                 task_storage }\n"
+		"		  task_storage | file_storage }\n"
 		"       " HELP_SPEC_OPTIONS " |\n"
 		"                    {-f|--bpffs} | {-n|--nomount} }\n"
 		"",
-- 
2.33.0


^ permalink raw reply related

* [PATCH bpf-next RFC v1 5/5] tools: testing: Add selftest for file local storage map
From: Kumar Kartikeya Dwivedi @ 2021-08-21 18:48 UTC (permalink / raw)
  To: bpf
  Cc: Kumar Kartikeya Dwivedi, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, KP Singh, Spencer Baugh,
	Andy Lutomirski, Pavel Emelyanov, Alexander Mihalicyn,
	Andrei Vagin, linux-security-module
In-Reply-To: <20210821184824.2052643-1-memxor@gmail.com>

This adds a test case for verifying that file local storage map works as
intended.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 .../bpf/prog_tests/test_local_storage.c       | 51 +++++++++++++++++++
 .../selftests/bpf/progs/local_storage.c       | 23 +++++++++
 2 files changed, 74 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index d2c16eaae367..154dee32320c 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -24,6 +24,7 @@ static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 static unsigned int duration;
 
 #define TEST_STORAGE_VALUE 0xbeefdead
+#define DUMMY_STORAGE_VALUE 0xdeadbeef
 
 struct storage {
 	void *inode;
@@ -111,6 +112,51 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
 	return true;
 }
 
+int test_file_local_storage(struct bpf_map *map)
+{
+	struct storage ls;
+	int fd, ret;
+
+	fd = open("/dev/null", O_RDONLY);
+	if (!ASSERT_GE(fd, 0, "open(/dev/null)"))
+		return -errno;
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(map), &fd, &ls);
+	if (!ASSERT_OK(ret, "bpf_map_lookup_elem for file local storage"))
+		goto end;
+
+	ASSERT_EQ(ls.value, DUMMY_STORAGE_VALUE, "file local value match");
+
+	ret = bpf_map_delete_elem(bpf_map__fd(map), &fd);
+	if (!ASSERT_OK(ret, "bpf_map_delete_elem for file local storage"))
+		goto end;
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(map), &fd, &ls);
+	if (!ASSERT_EQ(ret, -ENOENT, "bpf_map_lookup_elem should fail"))
+		goto end;
+
+	memset(&ls, 0, sizeof(ls));
+	ls.value = DUMMY_STORAGE_VALUE;
+	ret = bpf_map_update_elem(bpf_map__fd(map), &fd, &ls, BPF_NOEXIST);
+	if (!ASSERT_OK(ret, "bpf_map_update_elem for file local storage"))
+		goto end;
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(map), &fd, &ls);
+	if (!ASSERT_OK(ret, "bpf_map_lookup_elem for file local storage"))
+		goto end;
+
+	close(fd);
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(map), &fd, &ls);
+	if (!ASSERT_EQ(ret, -EBADF, "bpf_map_lookup_elem should fail"))
+		return -EINVAL;
+
+	return 0;
+end:
+	close(fd);
+	return ret;
+}
+
 void test_test_local_storage(void)
 {
 	char tmp_dir_path[] = "/tmp/local_storageXXXXXX";
@@ -167,6 +213,11 @@ void test_test_local_storage(void)
 	/* Set the process being monitored to be the current process */
 	skel->bss->monitored_pid = getpid();
 
+	/* Test file local storage */
+	err = test_file_local_storage(skel->maps.file_storage_map);
+	if (!ASSERT_OK(err, "test_file_local_storage"))
+		goto close_prog_rmdir;
+
 	/* Move copy_of_rm to a new location so that it triggers the
 	 * inode_rename LSM hook with a new_dentry that has a NULL inode ptr.
 	 */
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 95868bc7ada9..68561812f454 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -44,6 +44,13 @@ struct {
 	__type(value, struct local_storage);
 } task_storage_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_FILE_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct local_storage);
+} file_storage_map SEC(".maps");
+
 SEC("lsm/inode_unlink")
 int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 {
@@ -181,3 +188,19 @@ void BPF_PROG(exec, struct linux_binprm *bprm)
 	storage->value = DUMMY_STORAGE_VALUE;
 	bpf_spin_unlock(&storage->lock);
 }
+
+SEC("lsm/file_open")
+int BPF_PROG(file_open, struct file *file)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct local_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	storage = bpf_file_storage_get(&file_storage_map, file, 0,
+				       BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (storage)
+		storage->value = DUMMY_STORAGE_VALUE;
+	return 0;
+}
-- 
2.33.0


^ permalink raw reply related

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: 李力琼 @ 2021-08-23  3:04 UTC (permalink / raw)
  To: Mimi Zohar, THOBY Simon
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <96037695de6125c701889c168550def278adfd4b.camel@linux.ibm.com>

Hi Mimi :

The situation is a little different,'list_splice_init_rcu'
don't change the list head. If "ima_rules" being changed,
readers may can't reload the new value in time for cpu cache
or compiler optimization. Defining "ima_rules" as a volatile 
variable can fix, but It is inefficient.

Maybe using a temporary ima_rules variable for every 
"list_for_each_entry_rcu(entry, ima_rules, list)" loop is 
a better solution to fix the "endless loop" bug. 

Regards,

liqiong

在 2021年08月20日 23:48, Mimi Zohar 写道:
> On Fri, 2021-08-20 at 13:23 +0000, THOBY Simon wrote:
>> Hi Liqiong,
>>
>> On 8/20/21 12:15 PM, 李力琼 wrote:
>>> Hi, Simon:
>>>
>>> This solution is better then rwsem, a temp "ima_rules" variable should 
>>> can fix. I also have a another idea, with a little trick, default list
>>> can traverse to the new list, so we don't need care about the read side. 
>>>
>>> here is the patch:
>>>
>>> @@ -918,8 +918,21 @@ void ima_update_policy(void)
>>>         list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
>>>
>>>         if (ima_rules != policy) {
>>> +               struct list_head *prev_rules = ima_rules;
>>> +               struct list_head *first = ima_rules->next;
>>>                 ima_policy_flag = 0;
>>> +
>>> +               /*
>>> +                * Make the previous list can traverse to new list,
>>> +                * that is tricky, or there is a deadly loop whithin
>>> +                * "list_for_each_entry_rcu(entry, ima_rules, list)"
>>> +                *
>>> +                * After update "ima_rules", restore the previous list.
>>> +                */
>> I think this could be rephrased to be a tad clearer, I am not quite sure
>> how I must interpret the first sentence of the comment.
>>
>>
>>> +               prev_rules->next = policy->next;
>>>                 ima_rules = policy;
>>> +               syncchronize_rcu();
>> I'm a bit puzzled as you seem to imply in the mail this patch was tested,
>> but there is no 'syncchronize_rcu' (with two 'c') symbol in the kernel.
>> Was that a copy/paste error? Or maybe you forgot the 'not' in "This
>> patch has been tested"? These errors happen, and I am myself quite an
>> expert in doing them :)
>>
>>> +               prev_rules->next = first;
>>>
>>>
>>> The side effect is the "ima_default_rules" will be changed a little while.
>>> But it make sense, the process should be checked again by the new policy.
>>>
>>> This patch has been tested, if will do, I can resubmit this patch.> 
>>> How about this ?
>> least
>>
>> Correct me if I'm wrong, here is how I think I understand you patch.
>> We start with a situation like that (step 0):
>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>
>> Then we decide to update the policy for the first time, so
>> 'ima_rules [&ima_default_rules] != policy [&ima_policy_rules]'.
>> We enter the condition.
>> First we copy the current value of ima_rules (&ima_default_rules)
>> to a temporary variable 'prev_rules'. We also create a pointer dubbed
>> 'first' to the entry 1 in the default list (step 1):
>> prev_rules -------------
>>                        \/
>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>                                                                    /\
>> first --------------------------------------------------------------
>>
>>
>> Then we update prev_rules->next to point to policy->next (step 2):
>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>  /\
>> first
>> 	(notice that list entry 0 no longer points backwards to 'list entry 1',
>> 	but I don't think there is any reverse iteration in IMA, so it should be
>> 	safe)
>>
>> prev_rules -------------
>>                        \/
>> ima_rules --> List entry 0 (head node) = ima_default_rules   
>>                        |
>>                        |
>>                        -------------------------------------------
>>                                                                  \/
>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>
>>
>> We then update ima_rules to point to ima_policy_rules (step 3):
>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>  /\
>> first
>>
>> prev_rules -------------
>>                        \/
>> ima_rules     List entry 0 (head node) = ima_default_rules   
>>      |                 |
>>      |                 |
>>      |                 ------------------------------------------
>>      ---------------                                            |
>>                    \/                                           \/
>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>                                                   synchronize_rcu                 /\
>> first --------------------------------------------------------------
>>
>> Then we run synchronize_rcu() to wait for any RCU reader to exit their loops (step 4).
>>
>> Finally we update prev_rules->next to point back to the ima policy and fix the loop (step 5):
>>
>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>  /\
>> first
>>
>> prev_rules ---> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>                                                                      /\
>>                                                                  first (now useless)
>> ima_rules        
>>      |
>>      |
>>      |
>>      ---------------
>>                    \/
>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>
>> The goal is that readers should still be able to loop
>> (forward, as we saw that backward looping is temporarily broken)
>> while in steps 0-4.
>>
>> I'm not completely sure what would happen to a client that started iterating
>> over ima_rules right after step 2.
>>
>> Wouldn't they be able to start looping through the new policy
>> as 'List entry 0 (head node) = ima_default_rules' points to ima_policy_rules?
>> And if they, wouldn't they loop until the write to 'ima_rule' at step 3 (admittedly
>> very shortly thereafter) completed?
>> And would the compiler be allowed to optimize the read to 'ima_rules' in the
>> list_for_each_entry() loop, thereby never reloading the new value for
>> 'ima_rules', and thus looping forever, just what we are trying to avoid?
>>
>> Overall, I'm tempted to say this is perhaps a bit too complex (at least,
>> my head tells me it is, but that may very well be because I'm terrible
>> at concurrency issues).
>>
>> Honestly, in this case I think awaiting input from more experienced
>> kernel devs than I is the best path forward :-)
> I'm far from an expert on RCU locking, but __list_splice_init_rcu()
> provides an example of how to make sure there aren't any readers
> traversing the list, before two lists are spliced together.   In our
> case, after there aren't any readers, instead of splicing two lists
> together, it should be safe to point to the new list.
>
> thanks,
>
> Mimi
>

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: THOBY Simon @ 2021-08-23  7:13 UTC (permalink / raw)
  To: liqiong, zohar@linux.ibm.com
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <6d60893c-63dc-394f-d43c-9ecab7b6d06e@nfschina.com>

Hi Liqiong,

On 8/20/21 7:53 PM, liqiong wrote:
> Hi Simon,
> 
> On 2021/8/20 21:23, THOBY Simon wrote:
>> Hi Liqiong,
>>
>> On 8/20/21 12:15 PM, 李力琼 wrote:
>>> Hi, Simon:
>>>
>>> This solution is better then rwsem, a temp "ima_rules" variable should
>>> can fix. I also have a another idea, with a little trick, default list
>>> can traverse to the new list, so we don't need care about the read side.
>>>
>>> here is the patch:
>>>
>>> @@ -918,8 +918,21 @@ void ima_update_policy(void)
>>>          list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
>>>
>>>          if (ima_rules != policy) {
>>> +               struct list_head *prev_rules = ima_rules;
>>> +               struct list_head *first = ima_rules->next;
>>>                  ima_policy_flag = 0;
>>> +
>>> +               /*
>>> +                * Make the previous list can traverse to new list,
>>> +                * that is tricky, or there is a deadly loop whithin
>>> +                * "list_for_each_entry_rcu(entry, ima_rules, list)"
>>> +                *
>>> +                * After update "ima_rules", restore the previous list.
>>> +                */
>> I think this could be rephrased to be a tad clearer, I am not quite sure
>> how I must interpret the first sentence of the comment.
> I got it,  how about this:
>  /*
>   * The previous list has to traverse to new list,
>   * Or there may be a deadly loop within

Maybe 'deadlock' would be clearer than 'deadly loop'?

>   * "list_for_each_entry_rcu(entry, ima_rules, list)"
>   *
>   * That is tricky, after updated "ima_rules", restore the previous list.

Maybe something like "This is tricky, so we restore the previous list (ima_default_rules)
once 'ima_rules' is updated" ?

>   */>>
>>
>>> +               prev_rules->next = policy->next;
>>>                  ima_rules = policy;
>>> +               syncchronize_rcu();
>> I'm a bit puzzled as you seem to imply in the mail this patch was tested,
>> but there is no 'syncchronize_rcu' (with two 'c') symbol in the kernel.
>> Was that a copy/paste error? Or maybe you forgot the 'not' in "This
>> patch has been tested"? These errors happen, and I am myself quite an
>> expert in doing them :)
> 
> 
> Sorry for the mistake, I copy/paste the patch and delete/edit some lines,
> have reviewed before sending, but not found. I have made a case to reproduce
> the error, dumping "ima_rules" and every item address of list in the error
> situaiton, I can watchthe "ima_rules" change, old list traversing to the new list.
> And I have been doing a reboot test which found this bug. This patch seems to work fine.
> 

No worries, i just wanted to make sure I understood you correctly.

> 
>>
>>> +               prev_rules->next = first;
>>>
>>>
>>> The side effect is the "ima_default_rules" will be changed a little while.
>>> But it make sense, the process should be checked again by the new policy.
>>>
>>> This patch has been tested, if will do, I can resubmit this patch.>
>>> How about this ?
>>
>> Correct me if I'm wrong, here is how I think I understand you patch.
>> We start with a situation like that (step 0):
>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>
>> Then we decide to update the policy for the first time, so
>> 'ima_rules [&ima_default_rules] != policy [&ima_policy_rules]'.
>> We enter the condition.
>> First we copy the current value of ima_rules (&ima_default_rules)
>> to a temporary variable 'prev_rules'. We also create a pointer dubbed
>> 'first' to the entry 1 in the default list (step 1):
>> prev_rules -------------
>>                         \/
>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>                                                                     /\
>> first --------------------------------------------------------------
>>
>>
>> Then we update prev_rules->next to point to policy->next (step 2):
>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>   /\
>> first
>>     (notice that list entry 0 no longer points backwards to 'list entry 1',
>>     but I don't think there is any reverse iteration in IMA, so it should be
>>     safe)
>>
>> prev_rules -------------
>>                         \/
>> ima_rules --> List entry 0 (head node) = ima_default_rules
>>                         |
>>                         |
>>                         -------------------------------------------
>>                                                                   \/
>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>
>>
>> We then update ima_rules to point to ima_policy_rules (step 3):
>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>   /\
>> first
>>
>> prev_rules -------------
>>                         \/
>> ima_rules     List entry 0 (head node) = ima_default_rules
>>       |                 |
>>       |                 |
>>       |                 ------------------------------------------
>>       ---------------                                            |
>>                     \/                                           \/
>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>                                                                     /\
>> first --------------------------------------------------------------
>>
>> Then we run synchronize_rcu() to wait for any RCU reader to exit their loops (step 4).
>>
>> Finally we update prev_rules->next to point back to the ima policy and fix the loop (step 5):
>>
>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>   /\
>> first
>>
>> prev_rules ---> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>                                                                       /\
>>                                                                   first (now useless)
>> ima_rules
>>       |
>>       |
>>       |
>>       ---------------
>>                     \/
>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>
>> The goal is that readers should still be able to loop
>> (forward, as we saw that backward looping is temporarily broken)
>> while in steps 0-4.
> 
> 
> Yes, It's the workflow.
> 
> 
>> I'm not completely sure what would happen to a client that started iterating
>> over ima_rules right after step 2.
>>
>> Wouldn't they be able to start looping through the new policy
>> as 'List entry 0 (head node) = ima_default_rules' points to ima_policy_rules?
>> And if they, wouldn't they loop until the write to 'ima_rule' at step 3 (admittedly
>> very shortly thereafter) completed?
>> And would the compiler be allowed to optimize the read to 'ima_rules' in the
>> list_for_each_entry() loop, thereby never reloading the new value for
>> 'ima_rules', and thus looping forever, just what we are trying to avoid?
> 
> 
> Yes,  "ima_rules" cache not update in time, It's a risk. I am not sure if "WRITE_ONCE"
> can do this trick. How about:
>     WRITE_ONCE(prev_rules->next, policy->next);
>     WRITE_ONCE(ima_rules, policy);

Quite frankly, I don't know. As I said earlier, this is really way above my level.
I'm fine waiting for more experienced opinions on this one.

On the aspect of maintainability, I do think this solution is perhaps too complex
when compared to other solutions like the semaphore you first proposed.
A solution of similar complexity with RCU would be ideal to prevent adding a
semaphore on a read-mostly scenario, but I'm still more confident in the semaphore
than in the solution above, because it is easy to have confidence in the semaphore,
while this patch is not at all obvious to me, and maybe the next person who will
have to edit that piece of code.

> 
> If can't fix the cache issue, maybe the "ima_rules_tmp" solution is the best way.
> I will test it.
> 
> 
>> Overall, I'm tempted to say this is perhaps a bit too complex (at least,
>> my head tells me it is, but that may very well be because I'm terrible
>> at concurrency issues).
>>
>> Honestly, in this case I think awaiting input from more experienced
>> kernel devs than I is the best path forward :-)
>>
>>> ----------
>>> Regards,
>>> liqiong

Thanks,
Simon

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: 李力琼 @ 2021-08-23  7:51 UTC (permalink / raw)
  To: Mimi Zohar, THOBY Simon
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <f9798484-7090-0ddf-50a6-7c7c5bf0606c@nfschina.com>

Hi Simon :

Using a temporary ima_rules variable is not working for "ima_policy_next". void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos) { struct ima_rule_entry *entry = v; + struct list_head *ima_rules_tmp = rcu_dereference(ima_rules); rcu_read_lock(); entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list); rcu_read_unlock(); (*pos)++; - return (&entry->list == ima_rules) ? NULL : entry; + return (&entry->list == ima_rules_tmp) ? NULL : entry; }

It seems no way to fix "ima_rules" change within this function, it will alway
return a entry if "ima_rules" being changed.

Regrads,

liqiong

在 2021年08月23日 11:04, 李力琼 写道:
> Hi Mimi :
>
> The situation is a little different,'list_splice_init_rcu'
> don't change the list head. If "ima_rules" being changed,
> readers may can't reload the new value in time for cpu cache
> or compiler optimization. Defining "ima_rules" as a volatile 
> variable can fix, but It is inefficient.
>
> Maybe using a temporary ima_rules variable for every 
> "list_for_each_entry_rcu(entry, ima_rules, list)" loop is 
> a better solution to fix the "endless loop" bug. 
>
> Regards,
>
> liqiong
>
> 在 2021年08月20日 23:48, Mimi Zohar 写道:
>> On Fri, 2021-08-20 at 13:23 +0000, THOBY Simon wrote:
>>> Hi Liqiong,
>>>
>>> On 8/20/21 12:15 PM, 李力琼 wrote:
>>>> Hi, Simon:
>>>>
>>>> This solution is better then rwsem, a temp "ima_rules" variable should 
>>>> can fix. I also have a another idea, with a little trick, default list
>>>> can traverse to the new list, so we don't need care about the read side. 
>>>>
>>>> here is the patch:
>>>>
>>>> @@ -918,8 +918,21 @@ void ima_update_policy(void)
>>>>         list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
>>>>
>>>>         if (ima_rules != policy) {
>>>> +               struct list_head *prev_rules = ima_rules;
>>>> +               struct list_head *first = ima_rules->next;
>>>>                 ima_policy_flag = 0;
>>>> +
>>>> +               /*
>>>> +                * Make the previous list can traverse to new list,
>>>> +                * that is tricky, or there is a deadly loop whithin
>>>> +                * "list_for_each_entry_rcu(entry, ima_rules, list)"
>>>> +                *
>>>> +                * After update "ima_rules", restore the previous list.
>>>> +                */
>>> I think this could be rephrased to be a tad clearer, I am not quite sure
>>> how I must interpret the first sentence of the comment.
>>>
>>>
>>>> +               prev_rules->next = policy->next;
>>>>                 ima_rules = policy;
>>>> +               syncchronize_rcu();
>>> I'm a bit puzzled as you seem to imply in the mail this patch was tested,
>>> but there is no 'syncchronize_rcu' (with two 'c') symbol in the kernel.
>>> Was that a copy/paste error? Or maybe you forgot the 'not' in "This
>>> patch has been tested"? These errors happen, and I am myself quite an
>>> expert in doing them :)
>>>
>>>> +               prev_rules->next = first;
>>>>
>>>>
>>>> The side effect is the "ima_default_rules" will be changed a little while.
>>>> But it make sense, the process should be checked again by the new policy.
>>>>
>>>> This patch has been tested, if will do, I can resubmit this patch.> 
>>>> How about this ?
>>> least
>>>
>>> Correct me if I'm wrong, here is how I think I understand you patch.
>>> We start with a situation like that (step 0):
>>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>>
>>> Then we decide to update the policy for the first time, so
>>> 'ima_rules [&ima_default_rules] != policy [&ima_policy_rules]'.
>>> We enter the condition.
>>> First we copy the current value of ima_rules (&ima_default_rules)
>>> to a temporary variable 'prev_rules'. We also create a pointer dubbed
>>> 'first' to the entry 1 in the default list (step 1):
>>> prev_rules -------------
>>>                        \/
>>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>>                                                                    /\
>>> first --------------------------------------------------------------
>>>
>>>
>>> Then we update prev_rules->next to point to policy->next (step 2):
>>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>>  /\
>>> first
>>> 	(notice that list entry 0 no longer points backwards to 'list entry 1',
>>> 	but I don't think there is any reverse iteration in IMA, so it should be
>>> 	safe)
>>>
>>> prev_rules -------------
>>>                        \/
>>> ima_rules --> List entry 0 (head node) = ima_default_rules   
>>>                        |
>>>                        |
>>>                        -------------------------------------------
>>>                                                                  \/
>>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>>
>>>
>>> We then update ima_rules to point to ima_policy_rules (step 3):
>>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>>  /\
>>> first
>>>
>>> prev_rules -------------
>>>                        \/
>>> ima_rules     List entry 0 (head node) = ima_default_rules   
>>>      |                 |
>>>      |                 |
>>>      |                 ------------------------------------------
>>>      ---------------                                            |
>>>                    \/                                           \/
>>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>>                                                   synchronize_rcu                 /\
>>> first --------------------------------------------------------------
>>>
>>> Then we run synchronize_rcu() to wait for any RCU reader to exit their loops (step 4).
>>>
>>> Finally we update prev_rules->next to point back to the ima policy and fix the loop (step 5):
>>>
>>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>>  /\
>>> first
>>>
>>> prev_rules ---> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>>                                                                      /\
>>>                                                                  first (now useless)
>>> ima_rules        
>>>      |
>>>      |
>>>      |
>>>      ---------------
>>>                    \/
>>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>>
>>> The goal is that readers should still be able to loop
>>> (forward, as we saw that backward looping is temporarily broken)
>>> while in steps 0-4.
>>>
>>> I'm not completely sure what would happen to a client that started iterating
>>> over ima_rules right after step 2.
>>>
>>> Wouldn't they be able to start looping through the new policy
>>> as 'List entry 0 (head node) = ima_default_rules' points to ima_policy_rules?
>>> And if they, wouldn't they loop until the write to 'ima_rule' at step 3 (admittedly
>>> very shortly thereafter) completed?
>>> And would the compiler be allowed to optimize the read to 'ima_rules' in the
>>> list_for_each_entry() loop, thereby never reloading the new value for
>>> 'ima_rules', and thus looping forever, just what we are trying to avoid?
>>>
>>> Overall, I'm tempted to say this is perhaps a bit too complex (at least,
>>> my head tells me it is, but that may very well be because I'm terrible
>>> at concurrency issues).
>>>
>>> Honestly, in this case I think awaiting input from more experienced
>>> kernel devs than I is the best path forward :-)
>> I'm far from an expert on RCU locking, but __list_splice_init_rcu()
>> provides an example of how to make sure there aren't any readers
>> traversing the list, before two lists are spliced together.   In our
>> case, after there aren't any readers, instead of splicing two lists
>> together, it should be safe to point to the new list.
>>
>> thanks,
>>
>> Mimi
>>


^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: liqiong @ 2021-08-23  8:06 UTC (permalink / raw)
  To: Mimi Zohar, THOBY Simon
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <f9798484-7090-0ddf-50a6-7c7c5bf0606c@nfschina.com>

Hi Simon :

Using a temporary ima_rules variable is not working for "ima_policy_next". 

 void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ima_rule_entry *entry = v;
-
+	struct list_head *ima_rules_tmp = rcu_dereference(ima_rules);
 	rcu_read_lock();
 	entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
 	rcu_read_unlock();
 	(*pos)++;
 
-	return (&entry->list == ima_rules) ? NULL : entry;
+	return (&entry->list == ima_rules_tmp) ? NULL : entry;
 }

It seems no way to fix "ima_rules" change within this function, it will alway
return a entry if "ima_rules" being changed.

Regrads,

liqiong



在 2021年08月23日 11:04, 李力琼 写道:
> Hi Mimi :
>
> The situation is a little different,'list_splice_init_rcu'
> don't change the list head. If "ima_rules" being changed,
> readers may can't reload the new value in time for cpu cache
> or compiler optimization. Defining "ima_rules" as a volatile 
> variable can fix, but It is inefficient.
>
> Maybe using a temporary ima_rules variable for every 
> "list_for_each_entry_rcu(entry, ima_rules, list)" loop is 
> a better solution to fix the "endless loop" bug. 
>
> Regards,
>
> liqiong
>
> 在 2021年08月20日 23:48, Mimi Zohar 写道:
>> On Fri, 2021-08-20 at 13:23 +0000, THOBY Simon wrote:
>>> Hi Liqiong,
>>>
>>> On 8/20/21 12:15 PM, 李力琼 wrote:
>>>> Hi, Simon:
>>>>
>>>> This solution is better then rwsem, a temp "ima_rules" variable should 
>>>> can fix. I also have a another idea, with a little trick, default list
>>>> can traverse to the new list, so we don't need care about the read side. 
>>>>
>>>> here is the patch:
>>>>
>>>> @@ -918,8 +918,21 @@ void ima_update_policy(void)
>>>>         list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
>>>>
>>>>         if (ima_rules != policy) {
>>>> +               struct list_head *prev_rules = ima_rules;
>>>> +               struct list_head *first = ima_rules->next;
>>>>                 ima_policy_flag = 0;
>>>> +
>>>> +               /*
>>>> +                * Make the previous list can traverse to new list,
>>>> +                * that is tricky, or there is a deadly loop whithin
>>>> +                * "list_for_each_entry_rcu(entry, ima_rules, list)"
>>>> +                *
>>>> +                * After update "ima_rules", restore the previous list.
>>>> +                */
>>> I think this could be rephrased to be a tad clearer, I am not quite sure
>>> how I must interpret the first sentence of the comment.
>>>
>>>
>>>> +               prev_rules->next = policy->next;
>>>>                 ima_rules = policy;
>>>> +               syncchronize_rcu();
>>> I'm a bit puzzled as you seem to imply in the mail this patch was tested,
>>> but there is no 'syncchronize_rcu' (with two 'c') symbol in the kernel.
>>> Was that a copy/paste error? Or maybe you forgot the 'not' in "This
>>> patch has been tested"? These errors happen, and I am myself quite an
>>> expert in doing them :)
>>>
>>>> +               prev_rules->next = first;
>>>>
>>>>
>>>> The side effect is the "ima_default_rules" will be changed a little while.
>>>> But it make sense, the process should be checked again by the new policy.
>>>>
>>>> This patch has been tested, if will do, I can resubmit this patch.> 
>>>> How about this ?
>>> least
>>>
>>> Correct me if I'm wrong, here is how I think I understand you patch.
>>> We start with a situation like that (step 0):
>>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>>
>>> Then we decide to update the policy for the first time, so
>>> 'ima_rules [&ima_default_rules] != policy [&ima_policy_rules]'.
>>> We enter the condition.
>>> First we copy the current value of ima_rules (&ima_default_rules)
>>> to a temporary variable 'prev_rules'. We also create a pointer dubbed
>>> 'first' to the entry 1 in the default list (step 1):
>>> prev_rules -------------
>>>                        \/
>>> ima_rules --> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>>                                                                    /\
>>> first --------------------------------------------------------------
>>>
>>>
>>> Then we update prev_rules->next to point to policy->next (step 2):
>>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>>  /\
>>> first
>>> 	(notice that list entry 0 no longer points backwards to 'list entry 1',
>>> 	but I don't think there is any reverse iteration in IMA, so it should be
>>> 	safe)
>>>
>>> prev_rules -------------
>>>                        \/
>>> ima_rules --> List entry 0 (head node) = ima_default_rules   
>>>                        |
>>>                        |
>>>                        -------------------------------------------
>>>                                                                  \/
>>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>>
>>>
>>> We then update ima_rules to point to ima_policy_rules (step 3):
>>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>>  /\
>>> first
>>>
>>> prev_rules -------------
>>>                        \/
>>> ima_rules     List entry 0 (head node) = ima_default_rules   
>>>      |                 |
>>>      |                 |
>>>      |                 ------------------------------------------
>>>      ---------------                                            |
>>>                    \/                                           \/
>>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>>                                                   synchronize_rcu                 /\
>>> first --------------------------------------------------------------
>>>
>>> Then we run synchronize_rcu() to wait for any RCU reader to exit their loops (step 4).
>>>
>>> Finally we update prev_rules->next to point back to the ima policy and fix the loop (step 5):
>>>
>>> List entry 1 <-> List entry 2 <-> ... -> List entry 0
>>>  /\
>>> first
>>>
>>> prev_rules ---> List entry 0 (head node) = ima_default_rules <-> List entry 1 <-> List entry 2 <-> ... <-> List entry 0
>>>                                                                      /\
>>>                                                                  first (now useless)
>>> ima_rules        
>>>      |
>>>      |
>>>      |
>>>      ---------------
>>>                    \/
>>> policy --> policy entry 0' (head node) = ima_policy_rules <-> policy entry 1' <-> policy entry 2' <-> .... <-> policy entry 0'
>>>
>>> The goal is that readers should still be able to loop
>>> (forward, as we saw that backward looping is temporarily broken)
>>> while in steps 0-4.
>>>
>>> I'm not completely sure what would happen to a client that started iterating
>>> over ima_rules right after step 2.
>>>
>>> Wouldn't they be able to start looping through the new policy
>>> as 'List entry 0 (head node) = ima_default_rules' points to ima_policy_rules?
>>> And if they, wouldn't they loop until the write to 'ima_rule' at step 3 (admittedly
>>> very shortly thereafter) completed?
>>> And would the compiler be allowed to optimize the read to 'ima_rules' in the
>>> list_for_each_entry() loop, thereby never reloading the new value for
>>> 'ima_rules', and thus looping forever, just what we are trying to avoid?
>>>
>>> Overall, I'm tempted to say this is perhaps a bit too complex (at least,
>>> my head tells me it is, but that may very well be because I'm terrible
>>> at concurrency issues).
>>>
>>> Honestly, in this case I think awaiting input from more experienced
>>> kernel devs than I is the best path forward :-)
>> I'm far from an expert on RCU locking, but __list_splice_init_rcu()
>> provides an example of how to make sure there aren't any readers
>> traversing the list, before two lists are spliced together.   In our
>> case, after there aren't any readers, instead of splicing two lists
>> together, it should be safe to point to the new list.
>>
>> thanks,
>>
>> Mimi
>>


^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: THOBY Simon @ 2021-08-23  8:14 UTC (permalink / raw)
  To: liqiong, Mimi Zohar
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <fee498ec-087c-b52d-102c-d29d98f9b794@nfschina.com>

Hi Liqiong,

On 8/23/21 10:06 AM, liqiong wrote:
> Hi Simon :
> 
> Using a temporary ima_rules variable is not working for "ima_policy_next". 
> 
>  void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
>  {
>  	struct ima_rule_entry *entry = v;
> -
> +	struct list_head *ima_rules_tmp = rcu_dereference(ima_rules);
>  	rcu_read_lock();
>  	entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
>  	rcu_read_unlock();
>  	(*pos)++;
>  
> -	return (&entry->list == ima_rules) ? NULL : entry;
> +	return (&entry->list == ima_rules_tmp) ? NULL : entry;
>  }
> 
> It seems no way to fix "ima_rules" change within this function, it will alway
> return a entry if "ima_rules" being changed.

- I think rcu_dereference() should be called inside the RCU read lock
- Maybe we could cheat with:
	return (&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules) ? NULL : entry;
  as that's the only two rulesets IMA ever use?
  Admittedly, this is not as clean as previously, but it should work too.

The way I see it, the semaphore solution would not work here either,
as ima_policy_next() is called repeatedly as a seq_file
(it is set up in ima_fs.c) and we can't control the locking there:
we cannot lock across the seq_read() call (that cure could end up be
worse than the disease, deadlock-wise), so I fear we cannot protect
against a list update while a user is iterating with a lock.

So in both cases a cheat like "&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules"
maybe need to be considered.

What do you think?


> 
> Regrads,
> 
> liqiong

Thanks,
Simon

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: Mimi Zohar @ 2021-08-23 11:22 UTC (permalink / raw)
  To: 李力琼, THOBY Simon
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <f9798484-7090-0ddf-50a6-7c7c5bf0606c@nfschina.com>

Hi Liqiong,

On Mon, 2021-08-23 at 11:04 +0800, 李力琼 wrote:
> Hi Mimi :
> 
> The situation is a little different,'list_splice_init_rcu'
> don't change the list head. If "ima_rules" being changed,
> readers may can't reload the new value in time for cpu cache
> or compiler optimization. Defining "ima_rules" as a volatile 
> variable can fix, but It is inefficient.

After looking at list_splice_tail_init_rcu() some more, it is
actually making sure there aren't any readers traversing
"ima_temp_rules", not "ima_policy_rules".   There aren't any readers
traversing "ima_temp_rules". 

> 
> Maybe using a temporary ima_rules variable for every 
> "list_for_each_entry_rcu(entry, ima_rules, list)" loop is 
> a better solution to fix the "endless loop" bug. 

Agreed

thanks,

Mimi



^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: Mimi Zohar @ 2021-08-23 11:57 UTC (permalink / raw)
  To: THOBY Simon, liqiong
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <cf715a40-b255-c688-578c-7f8bcd004ee3@viveris.fr>

On Mon, 2021-08-23 at 08:14 +0000, THOBY Simon wrote:
> Hi Liqiong,
> 
> On 8/23/21 10:06 AM, liqiong wrote:
> > Hi Simon :
> > 
> > Using a temporary ima_rules variable is not working for "ima_policy_next". 
> > 
> >  void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
> >  {
> >  	struct ima_rule_entry *entry = v;
> > -
> > +	struct list_head *ima_rules_tmp = rcu_dereference(ima_rules);
> >  	rcu_read_lock();
> >  	entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
> >  	rcu_read_unlock();
> >  	(*pos)++;
> >  
> > -	return (&entry->list == ima_rules) ? NULL : entry;
> > +	return (&entry->list == ima_rules_tmp) ? NULL : entry;
> >  }
> > 
> > It seems no way to fix "ima_rules" change within this function, it will alway
> > return a entry if "ima_rules" being changed.
> 
> - I think rcu_dereference() should be called inside the RCU read lock
> - Maybe we could cheat with:
> 	return (&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules) ? NULL : entry;
>   as that's the only two rulesets IMA ever use?
>   Admittedly, this is not as clean as previously, but it should work too.
> 
> The way I see it, the semaphore solution would not work here either,
> as ima_policy_next() is called repeatedly as a seq_file
> (it is set up in ima_fs.c) and we can't control the locking there:
> we cannot lock across the seq_read() call (that cure could end up be
> worse than the disease, deadlock-wise), so I fear we cannot protect
> against a list update while a user is iterating with a lock.
> 
> So in both cases a cheat like "&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules"
> maybe need to be considered.
> 
> What do you think?

Is this an overall suggestion or limited to just ima_policy_next()?

thanks,

Mimi



^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: THOBY Simon @ 2021-08-23 12:02 UTC (permalink / raw)
  To: Mimi Zohar, liqiong
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <c02ff60205fdb343cb5a2ff0e4384fc7b47635a3.camel@linux.ibm.com>

Hi Mimi,

On 8/23/21 1:57 PM, Mimi Zohar wrote:
> On Mon, 2021-08-23 at 08:14 +0000, THOBY Simon wrote:
>> Hi Liqiong,
>>
>> On 8/23/21 10:06 AM, liqiong wrote:
>>> Hi Simon :
>>>
>>> Using a temporary ima_rules variable is not working for "ima_policy_next". 
>>>
>>>  void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
>>>  {
>>>  	struct ima_rule_entry *entry = v;
>>> -
>>> +	struct list_head *ima_rules_tmp = rcu_dereference(ima_rules);
>>>  	rcu_read_lock();
>>>  	entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
>>>  	rcu_read_unlock();
>>>  	(*pos)++;
>>>  
>>> -	return (&entry->list == ima_rules) ? NULL : entry;
>>> +	return (&entry->list == ima_rules_tmp) ? NULL : entry;
>>>  }
>>>
>>> It seems no way to fix "ima_rules" change within this function, it will alway
>>> return a entry if "ima_rules" being changed.
>>
>> - I think rcu_dereference() should be called inside the RCU read lock
>> - Maybe we could cheat with:
>> 	return (&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules) ? NULL : entry;
>>   as that's the only two rulesets IMA ever use?
>>   Admittedly, this is not as clean as previously, but it should work too.
>>
>> The way I see it, the semaphore solution would not work here either,
>> as ima_policy_next() is called repeatedly as a seq_file
>> (it is set up in ima_fs.c) and we can't control the locking there:
>> we cannot lock across the seq_read() call (that cure could end up be
>> worse than the disease, deadlock-wise), so I fear we cannot protect
>> against a list update while a user is iterating with a lock.
>>
>> So in both cases a cheat like "&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules"
>> maybe need to be considered.
>>
>> What do you think?
> 
> Is this an overall suggestion or limited to just ima_policy_next()?

I was thinking only of ima_policy_next(), I don't think (from what I could see in a short glance)
that other functions need such a treatment. The ima_rules_tmp dance is probably safe for the
other uses of ima_rules.

> 
> thanks,
> 
> Mimi
> 
> 

Thanks,
Simon

^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: Mimi Zohar @ 2021-08-23 12:09 UTC (permalink / raw)
  To: THOBY Simon, liqiong
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <a4302c76-5865-a8f5-e754-c5dd04030533@viveris.fr>

Hi Simon,

On Mon, 2021-08-23 at 12:02 +0000, THOBY Simon wrote:
> Hi Mimi,
> 
> On 8/23/21 1:57 PM, Mimi Zohar wrote:
> > On Mon, 2021-08-23 at 08:14 +0000, THOBY Simon wrote:
> >> Hi Liqiong,
> >>
> >> On 8/23/21 10:06 AM, liqiong wrote:
> >>> Hi Simon :
> >>>
> >>> Using a temporary ima_rules variable is not working for "ima_policy_next". 
> >>>
> >>>  void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
> >>>  {
> >>>  	struct ima_rule_entry *entry = v;
> >>> -
> >>> +	struct list_head *ima_rules_tmp = rcu_dereference(ima_rules);
> >>>  	rcu_read_lock();
> >>>  	entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
> >>>  	rcu_read_unlock();
> >>>  	(*pos)++;
> >>>  
> >>> -	return (&entry->list == ima_rules) ? NULL : entry;
> >>> +	return (&entry->list == ima_rules_tmp) ? NULL : entry;
> >>>  }
> >>>
> >>> It seems no way to fix "ima_rules" change within this function, it will alway
> >>> return a entry if "ima_rules" being changed.
> >>
> >> - I think rcu_dereference() should be called inside the RCU read lock
> >> - Maybe we could cheat with:
> >> 	return (&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules) ? NULL : entry;
> >>   as that's the only two rulesets IMA ever use?
> >>   Admittedly, this is not as clean as previously, but it should work too.
> >>
> >> The way I see it, the semaphore solution would not work here either,
> >> as ima_policy_next() is called repeatedly as a seq_file
> >> (it is set up in ima_fs.c) and we can't control the locking there:
> >> we cannot lock across the seq_read() call (that cure could end up be
> >> worse than the disease, deadlock-wise), so I fear we cannot protect
> >> against a list update while a user is iterating with a lock.
> >>
> >> So in both cases a cheat like "&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules"
> >> maybe need to be considered.
> >>
> >> What do you think?
> > 
> > Is this an overall suggestion or limited to just ima_policy_next()?
> 
> I was thinking only of ima_policy_next(), I don't think (from what I could see in a short glance)
> that other functions need such a treatment. The ima_rules_tmp dance is probably safe for the
> other uses of ima_rules.

Thanks, just making sure it is limited to here.

Mimi




^ permalink raw reply

* Re: [PATCH] ima: fix infinite loop within "ima_match_policy" function.
From: liqiong @ 2021-08-23 12:56 UTC (permalink / raw)
  To: THOBY Simon, Mimi Zohar
  Cc: dmitry.kasatkin@gmail.com, jmorris@namei.org, serge@hallyn.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <cf715a40-b255-c688-578c-7f8bcd004ee3@viveris.fr>

Hi Simon :

在 2021年08月23日 16:14, THOBY Simon 写道:
> Hi Liqiong,
>
> On 8/23/21 10:06 AM, liqiong wrote:
>> Hi Simon :
>>
>> Using a temporary ima_rules variable is not working for "ima_policy_next". 
>>
>>  void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
>>  {
>>  	struct ima_rule_entry *entry = v;
>> -
>> +	struct list_head *ima_rules_tmp = rcu_dereference(ima_rules);
>>  	rcu_read_lock();
>>  	entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
>>  	rcu_read_unlock();
>>  	(*pos)++;
>>  
>> -	return (&entry->list == ima_rules) ? NULL : entry;
>> +	return (&entry->list == ima_rules_tmp) ? NULL : entry;
>>  }
>>
>> It seems no way to fix "ima_rules" change within this function, it will alway
>> return a entry if "ima_rules" being changed.
> - I think rcu_dereference() should be called inside the RCU read lock
> - Maybe we could cheat with:
> 	return (&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules) ? NULL : entry;
>   as that's the only two rulesets IMA ever use?
>   Admittedly, this is not as clean as previously, but it should work too.
>
> The way I see it, the semaphore solution would not work here either,
> as ima_policy_next() is called repeatedly as a seq_file
> (it is set up in ima_fs.c) and we can't control the locking there:
> we cannot lock across the seq_read() call (that cure could end up be
> worse than the disease, deadlock-wise), so I fear we cannot protect
> against a list update while a user is iterating with a lock.
>
> So in both cases a cheat like "&entry->list == &ima_policy_rules || &entry->list == &ima_default_rules"
> maybe need to be considered.
>
> What do you think?

Yes,  semaphore solution not work here,  splicing two list is a little complex.
This solution is  simple and clear,  should  work.  I will work on that, test and 
confirm  the patch. 

"rcu_dereference() should be called inside the RCU read lock", I will correct this.

Thanks for your help.


Regrads,

liqiong

>
>
>> Regrads,
>>
>> liqiong
> Thanks,
> Simon


^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Ahmad Fatoum @ 2021-08-23 13:29 UTC (permalink / raw)
  To: Tim Harvey
  Cc: David Gstir, Aymen Sghaier, Mimi Zohar, Jan Luebbe, keyrings,
	Steffen Trumtrar, linux-security-module, Udit Agarwal, Herbert Xu,
	Horia Geantă, Richard Weinberger, James Morris, Eric Biggers,
	Serge E. Hallyn, Sumit Garg, James Bottomley, Franck LENORMAND,
	David Howells, open list, Jarkko Sakkinen, linux-crypto,
	Sascha Hauer, linux-integrity, David S. Miller
In-Reply-To: <CAJ+vNU19z0syr0oHOrSGxL0cVW+Kjv76kmp6uvGc2akHbtX0Nw@mail.gmail.com>

Hello Tim,

On 20.08.21 23:19, Tim Harvey wrote:
> On Fri, Aug 20, 2021 at 1:36 PM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>>
>> On 20.08.21 22:20, Tim Harvey wrote:
>>> On Fri, Aug 20, 2021 at 9:20 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>>>> On 20.08.21 17:39, Tim Harvey wrote:
>>>>> Thanks for your work!
>>>>>
>>>>> I've been asked to integrate the capability of using CAAM to
>>>>> blob/deblob data to an older 5.4 kernel such as NXP's downstream
>>>>> vendor kernel does [1] and I'm trying to understand how your series
>>>>> works. I'm not at all familiar with the Linux Key Management API's or
>>>>> trusted keys. Can you provide an example of how this can be used for
>>>>> such a thing?
>>>>
>>>> Here's an example with dm-crypt:
>>>>
>>>>   https://lore.kernel.org/linux-integrity/5d44e50e-4309-830b-79f6-f5d888b1ef69@pengutronix.de/
>>>>
>>>> dm-crypt is a bit special at the moment, because it has direct support for
>>>> trusted keys. For interfacing with other parts of the kernel like ecryptfs
>>>> or EVM, you have to create encrypted keys rooted to the trusted keys and use
>>>> those. The kernel documentation has an example:
>>>>
>>>>   https://www.kernel.org/doc/html/v5.13/security/keys/trusted-encrypted.html
>>>>
>>>> If you backport this series, you can include the typo fix spotted by David.
>>>>
>>>> I'll send out a revised series, but given that a regression fix I want to
>>>> rebase on hasn't been picked up for 3 weeks now, I am not in a hurry.
>>>>
>>> Thanks for the reference.
>>>
>>> I'm still trying to understand the keyctl integration with caam. For
>>> the 'data' param to keyctl you are using tings like 'new <len>' and
>>> 'load <data>'. Where are these 'commands' identified?
>>
>> Search for match_table_t in security/keys/trusted-keys/trusted_core.c
>>
>>> I may still be missing something. I'm using 4.14-rc6 with your series
>>> and seeing the following:
>>
>> That's an odd version to backport stuff to..
>>
>>> # cat /proc/cmdline
>>> trusted.source=caam
>>> # keyctl add trusted mykey 'new 32' @s)# create new trusted key named
>>> 'mykey' of 32 bytes in the session keyring
>>> 480104283
>>> # keyctl print 480104283 # dump the key
>>> keyctl_read_alloc: Unknown error 126
>>> ^^^ not clear what this is
>>
>> Not sure what returns -ENOKEY for you. I haven't been using trusted
>> keys on v4.14, but you can try tracing the keyctl syscall.
> 
> yikes... that would be painful. I typo'd and meant 5.14-rc6 :) 

^^

> I'm working with mainline first to make sure I understand everything. If I
> backport this it would be to 5.4 but that looks to be extremely
> painful. It looks like there was a lot of activity around trusted keys
> in 5.13.

Ye. It used to be limited to TPM before that.

> It works for a user keyring but not a session keyring... does that
> explain anything?
> # keyctl add trusted mykey 'new 32' @u
> 941210782
> # keyctl print 941210782
> 83b7845cb45216496aead9ee2c6a406f587d64aad47bddc539d8947a247e618798d9306b36398b5dc2722a4c3f220a3a763ee175f6bd64758fdd49ca4db597e8ce328121b60edbba9b8d8d55056be896
> # keyctl add trusted mykey 'new 32' @s
> 310571960
> # keyctl print 310571960
> keyctl_read_alloc: Unknown error 126

Both sequences work for me.

My getty is started by systemd. I think systemd allocates a new session
keyring for the getty that's inherited by the shell and the commands I run
it in. If you don't do that, each command will get its own session key.
 
> Sorry, I'm still trying to wrap my head around the differences in
> keyrings and trusted vs user keys.

No problem. HTH.

Cheers,
Ahmad

> 
> Tim
> 


-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* Re: [PATCH v4 00/12] Enroll kernel keys thru MOK
From: Jarkko Sakkinen @ 2021-08-23 17:35 UTC (permalink / raw)
  To: Mimi Zohar, Eric Snowberg, keyrings, linux-integrity, dhowells,
	dwmw2, herbert, davem, jmorris, serge
  Cc: keescook, gregkh, torvalds, scott.branden, weiyongjun1, nayna,
	ebiggers, ardb, nramas, lszubowi, linux-kernel, linux-crypto,
	linux-security-module, James.Bottomley, pjones, konrad.wilk,
	Patrick Uiterwijk
In-Reply-To: <f76fcf41728fbdd65f2b3464df0821f248b2cba0.camel@linux.ibm.com>

On Thu, 2021-08-19 at 09:10 -0400, Mimi Zohar wrote:
> On Thu, 2021-08-19 at 14:38 +0300, Jarkko Sakkinen wrote:
> > On Wed, 2021-08-18 at 20:20 -0400, Eric Snowberg wrote:
> > > Many UEFI Linux distributions boot using shim.  The UEFI shim provides
> > > what is called Machine Owner Keys (MOK).  Shim uses both the UEFI Secure
> > > Boot DB and MOK keys to validate the next step in the boot chain.  The
> > > MOK facility can be used to import user generated keys.  These keys can
> > > be used to sign an end-user development kernel build.  When Linux boots,
> > > pre-boot keys (both UEFI Secure Boot DB and MOK keys) get loaded in the
> > > Linux .platform keyring.  
> > > 
> > > Currently, pre-boot keys are not trusted within the Linux trust boundary
> > > [1]. These platform keys can only be used for kexec. If an end-user
> > > wants to use their own key within the Linux trust boundary, they must
> > > either compile it into the kernel themselves or use the insert-sys-cert
> > > script. Both options present a problem. Many end-users do not want to
> > > compile their own kernels. With the insert-sys-cert option, there are
> > > missing upstream changes [2].  Also, with the insert-sys-cert option,
> > > the end-user must re-sign their kernel again with their own key, and
> > > then insert that key into the MOK db. Another problem with
> > > insert-sys-cert is that only a single key can be inserted into a
> > > compressed kernel.
> > > 
> > > Having the ability to insert a key into the Linux trust boundary opens
> > > up various possibilities.  The end-user can use a pre-built kernel and
> > > sign their own kernel modules.  It also opens up the ability for an
> > > end-user to more easily use digital signature based IMA-appraisal.  To
> > > get a key into the ima keyring, it must be signed by a key within the
> > > Linux trust boundary.
> > 
> > As of today, I can use a prebuilt kernel, crate my own MOK key and sign
> > modules. What will be different?
> 
> The UEFI db and MOK keys are being loaded onto the .platform keyring,
> which is suppose to be limited to verifying the kexec kernel image
> signature.  With a downstream patch, kernel modules are being verified
> as well.
> 
> Initially Patrick Uiterwijk's "[PATCH 0/3] Load keys from TPM2 NV Index
> on IMA keyring" patch set attempted to define a new root of trust based
> on a key stored in the TPM.  This patch set is similarly attempting to
> define a new root of trust based on CA keys stored in the MOK db.
> 
> The purpose of this patch set is to define a new, safe trust source
> parallel to the builtin keyring, without relying on a downstream patch.
> With the new root of trust, the end user could sign his own kernel
> modules, sign third party keys, and load keys onto the IMA keyring,
> which can be used for signing the IMA policy and other files.

I can, as of today, generate my own mok key and sign my LKM's, and
kernel will verify my LKM's.

What is different?


/Jarkko

^ permalink raw reply

* Re: [PATCH v4 00/12] Enroll kernel keys thru MOK
From: Jarkko Sakkinen @ 2021-08-23 17:37 UTC (permalink / raw)
  To: Eric Snowberg, Mimi Zohar, David Howells
  Cc: keyrings, linux-integrity, David Woodhouse, Herbert Xu,
	David S . Miller, James Morris, Serge E . Hallyn, keescook,
	gregkh, torvalds, scott.branden, weiyongjun1, nayna, ebiggers,
	ardb, Lakshmi Ramasubramanian, lszubowi, linux-kernel,
	linux-crypto, linux-security-module, James Bottomley, pjones,
	konrad.wilk@oracle.com, Patrick Uiterwijk
In-Reply-To: <91B1FE51-C6FC-4ADF-B05A-B1E59E20132E@oracle.com>

On Thu, 2021-08-19 at 09:23 -0600, Eric Snowberg wrote:
> > On Aug 19, 2021, at 7:10 AM, Mimi Zohar <zohar@linux.ibm.com> wrote:
> > 
> > On Thu, 2021-08-19 at 14:38 +0300, Jarkko Sakkinen wrote:
> > > On Wed, 2021-08-18 at 20:20 -0400, Eric Snowberg wrote:
> > > > Downstream Linux distros try to have a single signed kernel for each
> > > > architecture.  Each end-user may use this kernel in entirely different
> > > > ways.  Some downstream kernels have chosen to always trust platform keys
> > > > within the Linux trust boundary for kernel module signing.  These
> > > > kernels have no way of using digital signature base IMA appraisal.
> > > > 
> > > > This series introduces a new Linux kernel keyring containing the Machine
> > > > Owner Keys (MOK) called .mok. It also adds a new MOK variable to shim.
> > > 
> > > I would name it as ".machine" because it is more "re-usable" name, e.g.
> > > could be used for similar things as MOK. ".mok" is a bad name because
> > > it binds directly to a single piece of user space software.
> > 
> > Nayna previously said,
> >   "I believe the underlying source from where CA keys are loaded might vary 
> >   based on the architecture (".mok" is UEFI specific.). The key part is 
> >   that this new keyring should contain only CA keys which can be later 
> >   used to vouch for user keys loaded onto IMA or secondary keyring at 
> >   runtime. It would be good to have a "ca" in the name, like .xxxx-ca, 
> >   where xxxx can be machine, owner, or system. I prefer .system-ca."
> > 
> > The CA keys on the MOK db is simply the first root of trust being
> > defined, but other roots of trust are sure to follow.  For this reason,
> > I agree naming the new keyring "mok" should be avoided.
> 
> As I said previously, I’m open to renaming, I just would like to have an 
> agreement on the new name before changing everything.  The current proposed 
> names I have heard are “.machine" and ".system-ca".  Is there a preference 
> the maintainers feel is appropriate?  If so, please let me know and I’ll 
> rename it. Thanks.


Just ".system" would be good. It's informative enough.


/Jarkko

^ permalink raw reply

* Re: [PATCH v4 00/12] Enroll kernel keys thru MOK
From: Eric Snowberg @ 2021-08-23 17:48 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Mimi Zohar, keyrings, linux-integrity, David Howells,
	David Woodhouse, Herbert Xu, David S . Miller, James Morris,
	Serge E . Hallyn, keescook, gregkh, torvalds, scott.branden,
	weiyongjun1, nayna, ebiggers, ardb, Lakshmi Ramasubramanian,
	lszubowi, linux-kernel, linux-crypto, linux-security-module,
	James Bottomley, pjones, konrad.wilk@oracle.com,
	Patrick Uiterwijk
In-Reply-To: <335ba50bcb9069faac135bce77c6f7ba19bd90ca.camel@kernel.org>


> On Aug 23, 2021, at 11:35 AM, Jarkko Sakkinen <jarkko@kernel.org> wrote:
> 
> On Thu, 2021-08-19 at 09:10 -0400, Mimi Zohar wrote:
>> On Thu, 2021-08-19 at 14:38 +0300, Jarkko Sakkinen wrote:
>>> On Wed, 2021-08-18 at 20:20 -0400, Eric Snowberg wrote:
>>>> Many UEFI Linux distributions boot using shim.  The UEFI shim provides
>>>> what is called Machine Owner Keys (MOK).  Shim uses both the UEFI Secure
>>>> Boot DB and MOK keys to validate the next step in the boot chain.  The
>>>> MOK facility can be used to import user generated keys.  These keys can
>>>> be used to sign an end-user development kernel build.  When Linux boots,
>>>> pre-boot keys (both UEFI Secure Boot DB and MOK keys) get loaded in the
>>>> Linux .platform keyring.  
>>>> 
>>>> Currently, pre-boot keys are not trusted within the Linux trust boundary
>>>> [1]. These platform keys can only be used for kexec. If an end-user
>>>> wants to use their own key within the Linux trust boundary, they must
>>>> either compile it into the kernel themselves or use the insert-sys-cert
>>>> script. Both options present a problem. Many end-users do not want to
>>>> compile their own kernels. With the insert-sys-cert option, there are
>>>> missing upstream changes [2].  Also, with the insert-sys-cert option,
>>>> the end-user must re-sign their kernel again with their own key, and
>>>> then insert that key into the MOK db. Another problem with
>>>> insert-sys-cert is that only a single key can be inserted into a
>>>> compressed kernel.
>>>> 
>>>> Having the ability to insert a key into the Linux trust boundary opens
>>>> up various possibilities.  The end-user can use a pre-built kernel and
>>>> sign their own kernel modules.  It also opens up the ability for an
>>>> end-user to more easily use digital signature based IMA-appraisal.  To
>>>> get a key into the ima keyring, it must be signed by a key within the
>>>> Linux trust boundary.
>>> 
>>> As of today, I can use a prebuilt kernel, crate my own MOK key and sign
>>> modules. What will be different?
>> 
>> The UEFI db and MOK keys are being loaded onto the .platform keyring,
>> which is suppose to be limited to verifying the kexec kernel image
>> signature.  With a downstream patch, kernel modules are being verified
>> as well.
>> 
>> Initially Patrick Uiterwijk's "[PATCH 0/3] Load keys from TPM2 NV Index
>> on IMA keyring" patch set attempted to define a new root of trust based
>> on a key stored in the TPM.  This patch set is similarly attempting to
>> define a new root of trust based on CA keys stored in the MOK db.
>> 
>> The purpose of this patch set is to define a new, safe trust source
>> parallel to the builtin keyring, without relying on a downstream patch.
>> With the new root of trust, the end user could sign his own kernel
>> modules, sign third party keys, and load keys onto the IMA keyring,
>> which can be used for signing the IMA policy and other files.
> 
> I can, as of today, generate my own mok key and sign my LKM's, and
> kernel will verify my LKM's.
> 
> What is different?

Are you sure your kernel doesn’t contain some version of the rejected 
patch referenced in the cover letter  [1]?

https://lore.kernel.org/lkml/1556221605.24945.3.camel@HansenPartnership.com/


^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Tim Harvey @ 2021-08-23 17:50 UTC (permalink / raw)
  To: Ahmad Fatoum
  Cc: David Gstir, Aymen Sghaier, Mimi Zohar, Jan Luebbe, keyrings,
	Steffen Trumtrar, linux-security-module, Udit Agarwal, Herbert Xu,
	Horia Geantă, Richard Weinberger, James Morris, Eric Biggers,
	Serge E. Hallyn, Sumit Garg, James Bottomley, Franck LENORMAND,
	David Howells, open list, Jarkko Sakkinen, linux-crypto,
	Sascha Hauer, linux-integrity, David S. Miller
In-Reply-To: <fa530833-2bb9-f8f3-68c6-99423d29e2ca@pengutronix.de>

On Mon, Aug 23, 2021 at 6:29 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>
> Hello Tim,
>
> On 20.08.21 23:19, Tim Harvey wrote:
> > On Fri, Aug 20, 2021 at 1:36 PM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
> >>
> >> On 20.08.21 22:20, Tim Harvey wrote:
> >>> On Fri, Aug 20, 2021 at 9:20 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
> >>>> On 20.08.21 17:39, Tim Harvey wrote:
> >>>>> Thanks for your work!
> >>>>>
> >>>>> I've been asked to integrate the capability of using CAAM to
> >>>>> blob/deblob data to an older 5.4 kernel such as NXP's downstream
> >>>>> vendor kernel does [1] and I'm trying to understand how your series
> >>>>> works. I'm not at all familiar with the Linux Key Management API's or
> >>>>> trusted keys. Can you provide an example of how this can be used for
> >>>>> such a thing?
> >>>>
> >>>> Here's an example with dm-crypt:
> >>>>
> >>>>   https://lore.kernel.org/linux-integrity/5d44e50e-4309-830b-79f6-f5d888b1ef69@pengutronix.de/
> >>>>
> >>>> dm-crypt is a bit special at the moment, because it has direct support for
> >>>> trusted keys. For interfacing with other parts of the kernel like ecryptfs
> >>>> or EVM, you have to create encrypted keys rooted to the trusted keys and use
> >>>> those. The kernel documentation has an example:
> >>>>
> >>>>   https://www.kernel.org/doc/html/v5.13/security/keys/trusted-encrypted.html
> >>>>
> >>>> If you backport this series, you can include the typo fix spotted by David.
> >>>>
> >>>> I'll send out a revised series, but given that a regression fix I want to
> >>>> rebase on hasn't been picked up for 3 weeks now, I am not in a hurry.
> >>>>
> >>> Thanks for the reference.
> >>>
> >>> I'm still trying to understand the keyctl integration with caam. For
> >>> the 'data' param to keyctl you are using tings like 'new <len>' and
> >>> 'load <data>'. Where are these 'commands' identified?
> >>
> >> Search for match_table_t in security/keys/trusted-keys/trusted_core.c
> >>
> >>> I may still be missing something. I'm using 4.14-rc6 with your series
> >>> and seeing the following:
> >>
> >> That's an odd version to backport stuff to..
> >>
> >>> # cat /proc/cmdline
> >>> trusted.source=caam
> >>> # keyctl add trusted mykey 'new 32' @s)# create new trusted key named
> >>> 'mykey' of 32 bytes in the session keyring
> >>> 480104283
> >>> # keyctl print 480104283 # dump the key
> >>> keyctl_read_alloc: Unknown error 126
> >>> ^^^ not clear what this is
> >>
> >> Not sure what returns -ENOKEY for you. I haven't been using trusted
> >> keys on v4.14, but you can try tracing the keyctl syscall.
> >
> > yikes... that would be painful. I typo'd and meant 5.14-rc6 :)
>
> ^^
>
> > I'm working with mainline first to make sure I understand everything. If I
> > backport this it would be to 5.4 but that looks to be extremely
> > painful. It looks like there was a lot of activity around trusted keys
> > in 5.13.
>
> Ye. It used to be limited to TPM before that.
>
> > It works for a user keyring but not a session keyring... does that
> > explain anything?
> > # keyctl add trusted mykey 'new 32' @u
> > 941210782
> > # keyctl print 941210782
> > 83b7845cb45216496aead9ee2c6a406f587d64aad47bddc539d8947a247e618798d9306b36398b5dc2722a4c3f220a3a763ee175f6bd64758fdd49ca4db597e8ce328121b60edbba9b8d8d55056be896
> > # keyctl add trusted mykey 'new 32' @s
> > 310571960
> > # keyctl print 310571960
> > keyctl_read_alloc: Unknown error 126
>
> Both sequences work for me.
>
> My getty is started by systemd. I think systemd allocates a new session
> keyring for the getty that's inherited by the shell and the commands I run
> it in. If you don't do that, each command will get its own session key.
>
> > Sorry, I'm still trying to wrap my head around the differences in
> > keyrings and trusted vs user keys.
>
> No problem. HTH.

Ahmad,

Ok that explains it - my testing is using a very basic buildroot
ramdisk rootfs. If I do a 'keyctl new_session' first I can use the
system keyring fine as well.

Thanks - hoping to see this merged soon!

Tim

^ permalink raw reply

* Re: [PATCH v4 00/12] Enroll kernel keys thru MOK
From: Jarkko Sakkinen @ 2021-08-23 17:51 UTC (permalink / raw)
  To: Mimi Zohar, Eric Snowberg, David Howells
  Cc: keyrings, linux-integrity, David Woodhouse, Herbert Xu,
	David S . Miller, James Morris, Serge E . Hallyn, keescook,
	gregkh, torvalds, scott.branden, weiyongjun1, nayna, ebiggers,
	ardb, Lakshmi Ramasubramanian, lszubowi, linux-kernel,
	linux-crypto, linux-security-module, James Bottomley, pjones,
	konrad.wilk@oracle.com, Patrick Uiterwijk
In-Reply-To: <e7e251000432cf7c475e19c56b0f438b92fec16e.camel@linux.ibm.com>

On Thu, 2021-08-19 at 13:32 -0400, Mimi Zohar wrote:
> On Thu, 2021-08-19 at 09:23 -0600, Eric Snowberg wrote:
> > > On Aug 19, 2021, at 7:10 AM, Mimi Zohar <zohar@linux.ibm.com> wrote:
> > > 
> > > On Thu, 2021-08-19 at 14:38 +0300, Jarkko Sakkinen wrote:
> > > > On Wed, 2021-08-18 at 20:20 -0400, Eric Snowberg wrote:
> > > > > Downstream Linux distros try to have a single signed kernel for each
> > > > > architecture.  Each end-user may use this kernel in entirely different
> > > > > ways.  Some downstream kernels have chosen to always trust platform keys
> > > > > within the Linux trust boundary for kernel module signing.  These
> > > > > kernels have no way of using digital signature base IMA appraisal.
> > > > > 
> > > > > This series introduces a new Linux kernel keyring containing the Machine
> > > > > Owner Keys (MOK) called .mok. It also adds a new MOK variable to shim.
> > > > 
> > > > I would name it as ".machine" because it is more "re-usable" name, e.g.
> > > > could be used for similar things as MOK. ".mok" is a bad name because
> > > > it binds directly to a single piece of user space software.
> > > 
> > > Nayna previously said,
> > >   "I believe the underlying source from where CA keys are loaded might vary 
> > >   based on the architecture (".mok" is UEFI specific.). The key part is 
> > >   that this new keyring should contain only CA keys which can be later 
> > >   used to vouch for user keys loaded onto IMA or secondary keyring at 
> > >   runtime. It would be good to have a "ca" in the name, like .xxxx-ca, 
> > >   where xxxx can be machine, owner, or system. I prefer .system-ca."
> > > 
> > > The CA keys on the MOK db is simply the first root of trust being
> > > defined, but other roots of trust are sure to follow.  For this reason,
> > > I agree naming the new keyring "mok" should be avoided.
> > 
> > As I said previously, I’m open to renaming, I just would like to have an 
> > agreement on the new name before changing everything.  The current proposed 
> > names I have heard are “.machine" and ".system-ca".  Is there a preference 
> > the maintainers feel is appropriate?  If so, please let me know and I’ll 
> > rename it. Thanks.
> > 
> 
> Jarkko, I think the emphasis should not be on "machine" from Machine
> Owner Key (MOK), but on "owner".  Whereas Nayna is focusing more on the
> "_ca" aspect of the name.   Perhaps consider naming it
> "system_owner_ca" or something along those lines.

What do you gain such overly long identifier? Makes no sense. What 
is "ca aspect of the name" anyway?

/Jarkko

^ permalink raw reply

* Re: [PATCH 3/3] virt: Add sev_secret module to expose confidential computing secrets
From: Andrew Scull @ 2021-08-23 19:21 UTC (permalink / raw)
  To: Dov Murik
  Cc: Ard Biesheuvel, linux-efi, Borislav Petkov, Ashish Kalra,
	Brijesh Singh, Tom Lendacky, James Morris, Serge E. Hallyn,
	Andi Kleen, Dr. David Alan Gilbert, James Bottomley,
	Tobin Feldman-Fitzthum, Jim Cadden, linux-coco,
	linux-security-module, Linux Kernel Mailing List
In-Reply-To: <b3c65f9d-5fd3-22c5-cd23-481774d92222@linux.ibm.com>

On Fri, 20 Aug 2021 at 19:36, Dov Murik <dovmurik@linux.ibm.com> wrote:
>
>
>
> On 19/08/2021 16:02, Andrew Scull wrote:
> > On Mon, 16 Aug 2021 at 10:57, Ard Biesheuvel <ardb@kernel.org> wrote:
> >>
> >> On Fri, 13 Aug 2021 at 15:05, Andrew Scull <ascull@google.com> wrote:
> >>>
> >>> On Mon, Aug 09, 2021 at 07:01:57PM +0000, Dov Murik wrote:
>
> [...]
>
> >>>
> >>>> +static int sev_secret_unlink(struct inode *dir, struct dentry *dentry)
> >>>> +{
> >>>> +     struct sev_secret *s = sev_secret_get();
> >>>> +     struct inode *inode = d_inode(dentry);
> >>>> +     struct secret_entry *e = (struct secret_entry *)inode->i_private;
> >>>> +     int i;
> >>>> +
> >>>> +     if (e) {
> >>>> +             /* Zero out the secret data */
> >>>> +             memzero_explicit(e->data, secret_entry_data_len(e));
> >>>
> >>> Would there be a benefit in flushing these zeros?
> >>>
> >>
> >> Do you mean cache clean+invalidate? Better to be precise here.
> >
> > At least a clean, to have the zeros written back to memory from the
> > cache, in order to overwrite the secret.
> >
>
> I agree, but not sure how to implement this:
>
> I see there's an arch_wb_cache_pmem exported function which internally
> (in arch/x86/lib/usercopy_64.c) calls clean_cache_range which seems to
> do what we want (assume the secret can be longer than the cache line).
>
> But arch_wb_cache_pmem is declared in include/linux/libnvdimm.h and
> guarded with #ifdef CONFIG_ARCH_HAS_PMEM_API -- both seem not related to
> what I'm trying to do.
>
> I see there's an exported clflush_cache_range for x86 -- but that's a
> clean+flush if I understand correctly.

This would be perfectly correct, the invalidation is just unnecessary.

> Suggestions on how to approach? I can copy the clean_cache_range
> implementation into the sev_secret module but hopefully there's a better
> way to reuse.  Maybe export clean_cache_range in x86?

Exporting sounds much better than duplicating.

It looks like the clean-only instruction was added to x86 more
recently and with persistent memory as the intended application.

d9dc64f30 "x86/asm: Add support for the CLWB instruction" says:

"This should be used in favor of clflushopt or clflush in cases where
you require the cache line to be written to memory but plan to access
the data cache line to be written to memory but plan to access the
data"

I don't expect the secret table would be accessed with such frequency
that it would actually make a difference, but if it's just a quirk of
history that the clean-only version isn't exported, now seems as good
a time as any to change that!

> Since this is for SEV the solution can be x86-specific, but if there's a
> generic way I guess it's better (I think all of sev_secret module
> doesn't have x86-specific stuff).

arch_wb_cache_pmem is the closest to arch agnostic I've seen, but that
has it own problems :/

^ permalink raw reply

* Re: [PATCH v4 00/12] Enroll kernel keys thru MOK
From: Nayna @ 2021-08-23 20:48 UTC (permalink / raw)
  To: Jarkko Sakkinen, Mimi Zohar, Eric Snowberg, David Howells
  Cc: keyrings, linux-integrity, David Woodhouse, Herbert Xu,
	David S . Miller, James Morris, Serge E . Hallyn, keescook,
	gregkh, torvalds, scott.branden, weiyongjun1, nayna, ebiggers,
	ardb, Lakshmi Ramasubramanian, lszubowi, linux-kernel,
	linux-crypto, linux-security-module, James Bottomley, pjones,
	konrad.wilk@oracle.com, Patrick Uiterwijk
In-Reply-To: <cedc77fefdf22b2cec086f3e0dd9cc698db9bca2.camel@kernel.org>


On 8/23/21 1:51 PM, Jarkko Sakkinen wrote:
> On Thu, 2021-08-19 at 13:32 -0400, Mimi Zohar wrote:
>> On Thu, 2021-08-19 at 09:23 -0600, Eric Snowberg wrote:
>>>> On Aug 19, 2021, at 7:10 AM, Mimi Zohar <zohar@linux.ibm.com> wrote:
>>>>
>>>> On Thu, 2021-08-19 at 14:38 +0300, Jarkko Sakkinen wrote:
>>>>> On Wed, 2021-08-18 at 20:20 -0400, Eric Snowberg wrote:
>>>>>> Downstream Linux distros try to have a single signed kernel for each
>>>>>> architecture.  Each end-user may use this kernel in entirely different
>>>>>> ways.  Some downstream kernels have chosen to always trust platform keys
>>>>>> within the Linux trust boundary for kernel module signing.  These
>>>>>> kernels have no way of using digital signature base IMA appraisal.
>>>>>>
>>>>>> This series introduces a new Linux kernel keyring containing the Machine
>>>>>> Owner Keys (MOK) called .mok. It also adds a new MOK variable to shim.
>>>>> I would name it as ".machine" because it is more "re-usable" name, e.g.
>>>>> could be used for similar things as MOK. ".mok" is a bad name because
>>>>> it binds directly to a single piece of user space software.
>>>> Nayna previously said,
>>>>    "I believe the underlying source from where CA keys are loaded might vary
>>>>    based on the architecture (".mok" is UEFI specific.). The key part is
>>>>    that this new keyring should contain only CA keys which can be later
>>>>    used to vouch for user keys loaded onto IMA or secondary keyring at
>>>>    runtime. It would be good to have a "ca" in the name, like .xxxx-ca,
>>>>    where xxxx can be machine, owner, or system. I prefer .system-ca."
>>>>
>>>> The CA keys on the MOK db is simply the first root of trust being
>>>> defined, but other roots of trust are sure to follow.  For this reason,
>>>> I agree naming the new keyring "mok" should be avoided.
>>> As I said previously, I’m open to renaming, I just would like to have an
>>> agreement on the new name before changing everything.  The current proposed
>>> names I have heard are “.machine" and ".system-ca".  Is there a preference
>>> the maintainers feel is appropriate?  If so, please let me know and I’ll
>>> rename it. Thanks.
>>>
>> Jarkko, I think the emphasis should not be on "machine" from Machine
>> Owner Key (MOK), but on "owner".  Whereas Nayna is focusing more on the
>> "_ca" aspect of the name.   Perhaps consider naming it
>> "system_owner_ca" or something along those lines.
> What do you gain such overly long identifier? Makes no sense. What
> is "ca aspect of the name" anyway?

As I mentioned previously, the main usage of this new keyring is that it 
should contain only CA keys which can be later used to vouch for user 
keys loaded onto secondary or IMA keyring at runtime. Having ca in the 
name like .xxxx_ca, would make the keyring name self-describing. Since 
you preferred .system, we can call it .system_ca.

Thanks & Regards,

        - Nayna


^ permalink raw reply

* Re: [PATCH 0/4] KEYS: trusted: Introduce support for NXP CAAM-based trusted keys
From: Ahmad Fatoum @ 2021-08-24  7:33 UTC (permalink / raw)
  To: Tim Harvey
  Cc: David Gstir, Aymen Sghaier, Mimi Zohar, Jan Luebbe, keyrings,
	Steffen Trumtrar, linux-security-module, Udit Agarwal, Herbert Xu,
	Horia Geantă, Richard Weinberger, James Morris, Eric Biggers,
	Serge E. Hallyn, Sumit Garg, James Bottomley, Franck LENORMAND,
	David Howells, open list, Jarkko Sakkinen, linux-crypto,
	Sascha Hauer, linux-integrity, David S. Miller
In-Reply-To: <CAJ+vNU0iRTagc5_qvsG4jvt=B_wruj=1O2ZRixqWek8JTN=aeg@mail.gmail.com>

On 23.08.21 19:50, Tim Harvey wrote:
> On Mon, Aug 23, 2021 at 6:29 AM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>> On 20.08.21 23:19, Tim Harvey wrote:
>>> On Fri, Aug 20, 2021 at 1:36 PM Ahmad Fatoum <a.fatoum@pengutronix.de> wrote:
>>>> On 20.08.21 22:20, Tim Harvey wrote:
>>> It works for a user keyring but not a session keyring... does that
>>> explain anything?
>>> # keyctl add trusted mykey 'new 32' @u
>>> 941210782
>>> # keyctl print 941210782
>>> 83b7845cb45216496aead9ee2c6a406f587d64aad47bddc539d8947a247e618798d9306b36398b5dc2722a4c3f220a3a763ee175f6bd64758fdd49ca4db597e8ce328121b60edbba9b8d8d55056be896
>>> # keyctl add trusted mykey 'new 32' @s
>>> 310571960
>>> # keyctl print 310571960
>>> keyctl_read_alloc: Unknown error 126
>>
>> Both sequences work for me.
>>
>> My getty is started by systemd. I think systemd allocates a new session
>> keyring for the getty that's inherited by the shell and the commands I run
>> it in. If you don't do that, each command will get its own session key.
>>
>>> Sorry, I'm still trying to wrap my head around the differences in
>>> keyrings and trusted vs user keys.
>>
>> No problem. HTH.
> 
> Ahmad,
> 
> Ok that explains it - my testing is using a very basic buildroot
> ramdisk rootfs. If I do a 'keyctl new_session' first I can use the
> system keyring fine as well.

Great. Does this mean I can get your Tested-by: ? :)

> Thanks - hoping to see this merged soon!

You and me both.

Cheers,
Ahmad


> 
> Tim
> 


-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* [PATCH] ima: fix deadlock within "ima_match_policy" function.
From: liqiong @ 2021-08-24  8:57 UTC (permalink / raw)
  To: Simon.THOBY, zohar
  Cc: dmitry.kasatkin, jmorris, serge, linux-integrity,
	linux-security-module, linux-kernel, liqiong
In-Reply-To: <20210819101529.28001-1-liqiong@nfschina.com>

When "ima_match_policy" is looping while "ima_update_policy" changs
the variable "ima_rules", then "ima_match_policy" may can't exit
loop, Finally cause RCU CPU Stall Warnings: "rcu_sched detected
stall on CPU ...".

The problem is limited to transitioning from the builtin policy to
the custom policy. Eg. At boot time, systemd-services are being
checked within "ima_match_policy", at the same time, the variable
"ima_rules" is changed by another service.

Signed-off-by: liqiong <liqiong@nfschina.com>
---
 security/integrity/ima/ima_policy.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index fd5d46e511f1..e92b197bfd3c 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -662,12 +662,14 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
 {
 	struct ima_rule_entry *entry;
 	int action = 0, actmask = flags | (flags << 1);
+	struct list_head *ima_rules_tmp;
 
 	if (template_desc && !*template_desc)
 		*template_desc = ima_template_desc_current();
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(entry, ima_rules, list) {
+	ima_rules_tmp = rcu_dereference(ima_rules);
+	list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
 
 		if (!(entry->action & actmask))
 			continue;
@@ -919,8 +921,8 @@ void ima_update_policy(void)
 
 	if (ima_rules != policy) {
 		ima_policy_flag = 0;
-		ima_rules = policy;
 
+		rcu_assign_pointer(ima_rules, policy);
 		/*
 		 * IMA architecture specific policy rules are specified
 		 * as strings and converted to an array of ima_entry_rules
@@ -1649,9 +1651,11 @@ void *ima_policy_start(struct seq_file *m, loff_t *pos)
 {
 	loff_t l = *pos;
 	struct ima_rule_entry *entry;
+	struct list_head *ima_rules_tmp;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(entry, ima_rules, list) {
+	ima_rules_tmp = rcu_dereference(ima_rules);
+	list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
 		if (!l--) {
 			rcu_read_unlock();
 			return entry;
@@ -1670,7 +1674,8 @@ void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
 	rcu_read_unlock();
 	(*pos)++;
 
-	return (&entry->list == ima_rules) ? NULL : entry;
+	return (&entry->list == &ima_default_rules ||
+		&entry->list == &ima_policy_rules) ? NULL : entry;
 }
 
 void ima_policy_stop(struct seq_file *m, void *v)
@@ -1872,6 +1877,7 @@ bool ima_appraise_signature(enum kernel_read_file_id id)
 	struct ima_rule_entry *entry;
 	bool found = false;
 	enum ima_hooks func;
+	struct list_head *ima_rules_tmp;
 
 	if (id >= READING_MAX_ID)
 		return false;
@@ -1879,7 +1885,8 @@ bool ima_appraise_signature(enum kernel_read_file_id id)
 	func = read_idmap[id] ?: FILE_CHECK;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(entry, ima_rules, list) {
+	ima_rules_tmp = rcu_dereference(ima_rules);
+	list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
 		if (entry->action != APPRAISE)
 			continue;
 
-- 
2.11.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox