Linux Security Modules development

Linux Security Modules development
 help / color / mirror / Atom feed

* [RFC PATCH 01/30] ima: Introduce ima namespace
From: krzysztof.struczynski @ 2020-08-18 15:20 UTC (permalink / raw)
  To: linux-integrity, linux-kernel, containers, linux-security-module
  Cc: Krzysztof Struczynski, zohar, stefanb, sunyuqiong1988, mkayaalp,
	dmitry.kasatkin, serge, jmorris, christian, silviu.vlasceanu,
	roberto.sassu
In-Reply-To: <20200818152037.11869-1-krzysztof.struczynski@huawei.com>

From: Krzysztof Struczynski <krzysztof.struczynski@huawei.com>

IMA namespace wraps global ima resources in an abstraction, to enable ima
to work with the containers. Currently, ima namespace contains no useful
data but a dummy interface. IMA resources related to different aspects of
IMA, namely IMA-audit, IMA-measurement, IMA-appraisal will be added in the
following patches.

The way how ima namespace is created is analogous to the time namespace:
unshare(CLONE_NEWIMA) system call creates a new ima namespace but doesn't
assign it to the current process. All children of the process will be born
in the new ima namespace, or a process can use setns() system call to join
the new ima namespace. Call to clone3(CLONE_NEWIMA) system call creates a
new namespace, which the new process joins instantly.

This scheme, allows to configure the new ima namespace before any process
appears in it. If user initially unshares the new ima namespace, ima can
be configured using ima entries in the securityfs. If user calls clone3()
system call directly, the new ima namespace can be configured using clone
arguments. To allow this, new securityfs entries have to be added, and
structures clone_args and kernel_clone_args have to be extended.

Early configuration is crucial. The new ima polices must apply to the
first process in the new namespace, and the appraisal key has to be loaded
beforehand.

Add a new CONFIG_IMA_NS option to the kernel configuration, that enables
one to create a new IMA namespace. IMA namespace functionality is disabled
by default.

Signed-off-by: Krzysztof Struczynski <krzysztof.struczynski@huawei.com>
---
 fs/proc/namespaces.c              |   4 +
 include/linux/ima.h               |  57 ++++++++
 include/linux/nsproxy.h           |   3 +
 include/linux/proc_ns.h           |   5 +-
 include/linux/user_namespace.h    |   1 +
 include/uapi/linux/sched.h        |   1 +
 init/Kconfig                      |  12 ++
 kernel/fork.c                     |  24 +++-
 kernel/nsproxy.c                  |  34 ++++-
 kernel/ucount.c                   |   1 +
 security/integrity/ima/Makefile   |   1 +
 security/integrity/ima/ima.h      |  13 ++
 security/integrity/ima/ima_init.c |  13 ++
 security/integrity/ima/ima_ns.c   | 232 ++++++++++++++++++++++++++++++
 14 files changed, 392 insertions(+), 9 deletions(-)
 create mode 100644 security/integrity/ima/ima_ns.c

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 8e159fc78c0a..117812a59e5d 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -37,6 +37,10 @@ static const struct proc_ns_operations *ns_entries[] = {
 	&timens_operations,
 	&timens_for_children_operations,
 #endif
+#ifdef CONFIG_IMA_NS
+	&imans_operations,
+	&imans_for_children_operations,
+#endif
 };
 
 static const char *proc_ns_get_link(struct dentry *dentry,
diff --git a/include/linux/ima.h b/include/linux/ima.h
index d15100de6cdd..4a9c29d4d056 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -12,6 +12,9 @@
 #include <linux/kexec.h>
 struct linux_binprm;
 
+struct nsproxy;
+struct task_struct;
+
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
@@ -167,4 +170,58 @@ static inline bool ima_appraise_signature(enum kernel_read_file_id func)
 	return false;
 }
 #endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
+
+struct ima_namespace {
+	struct kref kref;
+	struct ns_common ns;
+	struct ucounts *ucounts;
+	struct user_namespace *user_ns;
+} __randomize_layout;
+
+extern struct ima_namespace init_ima_ns;
+
+#ifdef CONFIG_IMA_NS
+struct ima_namespace *copy_ima_ns(unsigned long flags,
+				  struct user_namespace *user_ns,
+				  struct ima_namespace *old_ns);
+
+void free_ima_ns(struct kref *kref);
+
+int imans_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
+
+static inline struct ima_namespace *get_ima_ns(struct ima_namespace *ns)
+{
+	if (ns)
+		kref_get(&ns->kref);
+	return ns;
+}
+static inline void put_ima_ns(struct ima_namespace *ns)
+{
+	if (ns)
+		kref_put(&ns->kref, free_ima_ns);
+}
+
+#else
+static inline struct ima_namespace *copy_ima_ns(unsigned long flags,
+						struct user_namespace *user_ns,
+						struct ima_namespace *old_ns)
+{
+	return old_ns;
+}
+
+static inline int imans_on_fork(struct nsproxy *nsproxy,
+				struct task_struct *tsk)
+{
+	return 0;
+}
+
+static inline struct ima_namespace *get_ima_ns(struct ima_namespace *ns)
+{
+	return ns;
+}
+
+static inline void put_ima_ns(struct ima_namespace *ns)
+{
+}
+#endif /* CONFIG_IMA_NS */
 #endif /* _LINUX_IMA_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cdb171efc7cb..56216a94009d 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -10,6 +10,7 @@ struct uts_namespace;
 struct ipc_namespace;
 struct pid_namespace;
 struct cgroup_namespace;
+struct ima_namespace;
 struct fs_struct;
 
 /*
@@ -38,6 +39,8 @@ struct nsproxy {
 	struct time_namespace *time_ns;
 	struct time_namespace *time_ns_for_children;
 	struct cgroup_namespace *cgroup_ns;
+	struct ima_namespace *ima_ns;
+	struct ima_namespace *ima_ns_for_children;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 75807ecef880..93735b7bbb65 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -16,7 +16,7 @@ struct inode;
 struct proc_ns_operations {
 	const char *name;
 	const char *real_ns_name;
-	int type;
+	unsigned long type;
 	struct ns_common *(*get)(struct task_struct *task);
 	void (*put)(struct ns_common *ns);
 	int (*install)(struct nsset *nsset, struct ns_common *ns);
@@ -34,6 +34,8 @@ extern const struct proc_ns_operations mntns_operations;
 extern const struct proc_ns_operations cgroupns_operations;
 extern const struct proc_ns_operations timens_operations;
 extern const struct proc_ns_operations timens_for_children_operations;
+extern const struct proc_ns_operations imans_operations;
+extern const struct proc_ns_operations imans_for_children_operations;
 
 /*
  * We always define these enumerators
@@ -46,6 +48,7 @@ enum {
 	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
 	PROC_CGROUP_INIT_INO	= 0xEFFFFFFBU,
 	PROC_TIME_INIT_INO	= 0xEFFFFFFAU,
+	PROC_IMA_INIT_INO	= 0xEFFFFFF9U,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6ef1c7109fc4..d9759c54fead 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -46,6 +46,7 @@ enum ucount_type {
 	UCOUNT_MNT_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_TIME_NAMESPACES,
+	UCOUNT_IMA_NAMESPACES,
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..b30e27efee92 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -36,6 +36,7 @@
 /* Flags for the clone3() syscall. */
 #define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
 #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#define CLONE_NEWIMA 0x400000000ULL /* New IMA namespace. */
 
 /*
  * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
diff --git a/init/Kconfig b/init/Kconfig
index d6a0b31b13dc..f188b33588a2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1184,6 +1184,18 @@ config NET_NS
 	  Allow user space to create what appear to be multiple instances
 	  of the network stack.
 
+config IMA_NS
+	bool "IMA namespace"
+	depends on IMA
+	default n
+	help
+	  This allows container engines to use ima namespaces to provide
+	  different IMA policy rules for different containers. Also, it allows
+	  to create what appear to be multiple instances of the IMA measurement
+	  list and other IMA related resources.
+
+	  If unsure, say N.
+
 endif # NAMESPACES
 
 config CHECKPOINT_RESTORE
diff --git a/kernel/fork.c b/kernel/fork.c
index 35e9894d394c..b977fd92fe3f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1876,11 +1876,24 @@ static __latent_entropy struct task_struct *copy_process(
 	}
 
 	/*
-	 * If the new process will be in a different time namespace
-	 * do not allow it to share VM or a thread group with the forking task.
+	 * If the new process will be in a different time namespace or a
+	 * different ima namespace, do not allow it to share VM or a thread
+	 * group with the forking task.
 	 */
 	if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
-		if (nsp->time_ns != nsp->time_ns_for_children)
+		if ((nsp->time_ns != nsp->time_ns_for_children) ||
+		    ((clone_flags & CLONE_NEWIMA) ||
+		     (nsp->ima_ns != nsp->ima_ns_for_children)))
+			return ERR_PTR(-EINVAL);
+	}
+
+	/*
+	 * If the new process will be in a different ima namespace
+	 * do not allow it to share the same file descriptor table.
+	 */
+	if (clone_flags & CLONE_FILES) {
+		if ((clone_flags & CLONE_NEWIMA) ||
+		    (nsp->ima_ns != nsp->ima_ns_for_children))
 			return ERR_PTR(-EINVAL);
 	}
 
@@ -2649,7 +2662,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
 {
 	/* Verify that no unknown flags are passed along. */
 	if (kargs->flags &
-	    ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
+	    ~(CLONE_LEGACY_FLAGS |
+	      CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP | CLONE_NEWIMA))
 		return false;
 
 	/*
@@ -2796,7 +2810,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
 				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
-				CLONE_NEWTIME))
+				CLONE_NEWTIME|CLONE_NEWIMA))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 12dd41b39a7f..791efffd7a03 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -19,6 +19,7 @@
 #include <net/net_namespace.h>
 #include <linux/ipc_namespace.h>
 #include <linux/time_namespace.h>
+#include <linux/ima.h>
 #include <linux/fs_struct.h>
 #include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
@@ -47,6 +48,10 @@ struct nsproxy init_nsproxy = {
 	.time_ns		= &init_time_ns,
 	.time_ns_for_children	= &init_time_ns,
 #endif
+#ifdef CONFIG_IMA_NS
+	.ima_ns			= &init_ima_ns,
+	.ima_ns_for_children	= &init_ima_ns,
+#endif
 };
 
 static inline struct nsproxy *create_nsproxy(void)
@@ -121,8 +126,19 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	}
 	new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
 
+	new_nsp->ima_ns_for_children = copy_ima_ns(flags, user_ns,
+					tsk->nsproxy->ima_ns_for_children);
+	if (IS_ERR(new_nsp->ima_ns_for_children)) {
+		err = PTR_ERR(new_nsp->ima_ns_for_children);
+		goto out_ima;
+	}
+	new_nsp->ima_ns = get_ima_ns(tsk->nsproxy->ima_ns);
+
 	return new_nsp;
 
+out_ima:
+	put_time_ns(new_nsp->time_ns);
+	put_time_ns(new_nsp->time_ns_for_children);
 out_time:
 	put_net(new_nsp->net_ns);
 out_net:
@@ -157,8 +173,10 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 
 	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 			      CLONE_NEWPID | CLONE_NEWNET |
-			      CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
-		if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
+			      CLONE_NEWCGROUP | CLONE_NEWTIME |
+			      CLONE_NEWIMA)))) {
+		if (likely((old_ns->time_ns_for_children == old_ns->time_ns) &&
+			   (old_ns->ima_ns_for_children == old_ns->ima_ns))) {
 			get_nsproxy(old_ns);
 			return 0;
 		}
@@ -186,6 +204,12 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 		return ret;
 	}
 
+	ret = imans_on_fork(new_ns, tsk);
+	if (ret) {
+		free_nsproxy(new_ns);
+		return ret;
+	}
+
 	tsk->nsproxy = new_ns;
 	return 0;
 }
@@ -204,6 +228,10 @@ void free_nsproxy(struct nsproxy *ns)
 		put_time_ns(ns->time_ns);
 	if (ns->time_ns_for_children)
 		put_time_ns(ns->time_ns_for_children);
+	if (ns->ima_ns)
+		put_ima_ns(ns->ima_ns);
+	if (ns->ima_ns_for_children)
+		put_ima_ns(ns->ima_ns_for_children);
 	put_cgroup_ns(ns->cgroup_ns);
 	put_net(ns->net_ns);
 	kmem_cache_free(nsproxy_cachep, ns);
@@ -221,7 +249,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
-			       CLONE_NEWTIME)))
+			       CLONE_NEWTIME | CLONE_NEWIMA)))
 		return 0;
 
 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 11b1596e2542..3f4768d62b8f 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -70,6 +70,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_mnt_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	UCOUNT_ENTRY("max_time_namespaces"),
+	UCOUNT_ENTRY("max_ima_namespaces"),
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile
index 67dabca670e2..d804d93f1a99 100644
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -14,3 +14,4 @@ ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o
 ima-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o
 ima-$(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) += ima_asymmetric_keys.o
 ima-$(CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS) += ima_queue_keys.o
+ima-$(CONFIG_IMA_NS) += ima_ns.o
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 38043074ce5e..603da5b2db08 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -20,6 +20,7 @@
 #include <linux/hash.h>
 #include <linux/tpm.h>
 #include <linux/audit.h>
+#include <linux/ima.h>
 #include <crypto/hash_info.h>
 
 #include "../integrity.h"
@@ -371,6 +372,18 @@ static inline int ima_read_xattr(struct dentry *dentry,
 
 #endif /* CONFIG_IMA_APPRAISE */
 
+#ifdef CONFIG_IMA_NS
+static inline struct ima_namespace *get_current_ns(void)
+{
+	return current->nsproxy->ima_ns;
+}
+#else
+static inline struct ima_namespace *get_current_ns(void)
+{
+	return &init_ima_ns;
+}
+#endif /* CONFIG_IMA_NS */
+
 #ifdef CONFIG_IMA_APPRAISE_MODSIG
 int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
 		    struct modsig **modsig);
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 4902fe7bd570..013bbec16849 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -15,6 +15,9 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/kref.h>
+#include <linux/proc_ns.h>
+#include <linux/user_namespace.h>
 
 #include "ima.h"
 
@@ -22,6 +25,16 @@
 const char boot_aggregate_name[] = "boot_aggregate";
 struct tpm_chip *ima_tpm_chip;
 
+struct ima_namespace init_ima_ns = {
+	.kref = KREF_INIT(2),
+	.user_ns = &init_user_ns,
+	.ns.inum = PROC_IMA_INIT_INO,
+#ifdef CONFIG_IMA_NS
+	.ns.ops = &imans_operations,
+#endif
+};
+EXPORT_SYMBOL(init_ima_ns);
+
 /* Add the boot aggregate to the IMA measurement list and extend
  * the PCR register.
  *
diff --git a/security/integrity/ima/ima_ns.c b/security/integrity/ima/ima_ns.c
new file mode 100644
index 000000000000..8f5f301406a2
--- /dev/null
+++ b/security/integrity/ima/ima_ns.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2020 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Krzysztof Struczynski <krzysztof.struczynski@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_ns.c
+ *      Functions to manage the IMA namespace.
+ */
+
+#include <linux/export.h>
+#include <linux/ima.h>
+#include <linux/kref.h>
+#include <linux/proc_ns.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+
+#include "ima.h"
+
+static struct ucounts *inc_ima_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, current_euid(), UCOUNT_IMA_NAMESPACES);
+}
+
+static void dec_ima_namespaces(struct ucounts *ucounts)
+{
+	return dec_ucount(ucounts, UCOUNT_IMA_NAMESPACES);
+}
+
+static struct ima_namespace *ima_ns_alloc(void)
+{
+	struct ima_namespace *ima_ns;
+
+	ima_ns = kzalloc(sizeof(*ima_ns), GFP_KERNEL);
+	if (!ima_ns)
+		return NULL;
+
+	return ima_ns;
+}
+
+/**
+ * Clone a new ns copying an original ima namespace, setting refcount to 1
+ *
+ * @user_ns:	User namespace that current task runs in
+ * @old_ns:	Old ima namespace to clone
+ * Return:	ERR_PTR(-ENOMEM) on error (failure to kmalloc), new ns otherwise
+ */
+static struct ima_namespace *clone_ima_ns(struct user_namespace *user_ns,
+					   struct ima_namespace *old_ns)
+{
+	struct ima_namespace *ns;
+	struct ucounts *ucounts;
+	int err;
+
+	err = -ENOSPC;
+	ucounts = inc_ima_namespaces(user_ns);
+	if (!ucounts)
+		goto fail;
+
+	err = -ENOMEM;
+	ns = ima_ns_alloc();
+	if (!ns)
+		goto fail_dec;
+
+	kref_init(&ns->kref);
+
+	err = ns_alloc_inum(&ns->ns);
+	if (err)
+		goto fail_free;
+
+	ns->ns.ops = &imans_operations;
+	ns->user_ns = get_user_ns(user_ns);
+	ns->ucounts = ucounts;
+
+	return ns;
+
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_ima_namespaces(ucounts);
+fail:
+	return ERR_PTR(err);
+}
+
+/**
+ * Copy task's ima namespace, or clone it if flags specifies CLONE_NEWNS.
+ *
+ * @flags:      Cloning flags
+ * @user_ns:	User namespace that current task runs in
+ * @old_ns:	Old ima namespace to clone
+ *
+ * Return: IMA namespace or ERR_PTR.
+ */
+
+struct ima_namespace *copy_ima_ns(unsigned long flags,
+				  struct user_namespace *user_ns,
+				  struct ima_namespace *old_ns)
+{
+	if (!(flags & CLONE_NEWIMA))
+		return get_ima_ns(old_ns);
+
+	return clone_ima_ns(user_ns, old_ns);
+}
+
+static void destroy_ima_ns(struct ima_namespace *ns)
+{
+	dec_ima_namespaces(ns->ucounts);
+	put_user_ns(ns->user_ns);
+	ns_free_inum(&ns->ns);
+	kfree(ns);
+}
+
+void free_ima_ns(struct kref *kref)
+{
+	struct ima_namespace *ns;
+
+	ns = container_of(kref, struct ima_namespace, kref);
+
+	destroy_ima_ns(ns);
+}
+
+static inline struct ima_namespace *to_ima_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct ima_namespace, ns);
+}
+
+static struct ns_common *imans_get(struct task_struct *task)
+{
+	struct ima_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	task_lock(task);
+	nsproxy = task->nsproxy;
+	if (nsproxy) {
+		ns = nsproxy->ima_ns;
+		get_ima_ns(ns);
+	}
+	task_unlock(task);
+
+	return ns ? &ns->ns : NULL;
+}
+
+static struct ns_common *imans_for_children_get(struct task_struct *task)
+{
+	struct ima_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	task_lock(task);
+	nsproxy = task->nsproxy;
+	if (nsproxy) {
+		ns = nsproxy->ima_ns_for_children;
+		get_ima_ns(ns);
+	}
+	task_unlock(task);
+
+	return ns ? &ns->ns : NULL;
+}
+
+static void imans_put(struct ns_common *ns)
+{
+	put_ima_ns(to_ima_ns(ns));
+}
+
+static int imans_install(struct nsset *nsset, struct ns_common *new)
+{
+	struct nsproxy *nsproxy = nsset->nsproxy;
+	struct ima_namespace *ns = to_ima_ns(new);
+
+	if (!current_is_single_threaded())
+		return -EUSERS;
+
+	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
+	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	get_ima_ns(ns);
+	put_ima_ns(nsproxy->ima_ns);
+	nsproxy->ima_ns = ns;
+
+	get_ima_ns(ns);
+	put_ima_ns(nsproxy->ima_ns_for_children);
+	nsproxy->ima_ns_for_children = ns;
+
+	return 0;
+}
+
+int imans_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
+{
+	struct ns_common *nsc = &nsproxy->ima_ns_for_children->ns;
+	struct ima_namespace *ns = to_ima_ns(nsc);
+
+	/* create_new_namespaces() already incremented the ref counter */
+	if (nsproxy->ima_ns == nsproxy->ima_ns_for_children)
+		return 0;
+
+	get_ima_ns(ns);
+	put_ima_ns(nsproxy->ima_ns);
+	nsproxy->ima_ns = ns;
+
+	return 0;
+}
+
+static struct user_namespace *imans_owner(struct ns_common *ns)
+{
+	return to_ima_ns(ns)->user_ns;
+}
+
+const struct proc_ns_operations imans_operations = {
+	.name = "ima",
+	.type = CLONE_NEWIMA,
+	.get = imans_get,
+	.put = imans_put,
+	.install = imans_install,
+	.owner = imans_owner,
+};
+
+const struct proc_ns_operations imans_for_children_operations = {
+	.name = "ima_for_children",
+	.type = CLONE_NEWIMA,
+	.get = imans_for_children_get,
+	.put = imans_put,
+	.install = imans_install,
+	.owner = imans_owner,
+};
+
-- 
2.20.1


^ permalink raw reply related

* [RFC PATCH 00/30] ima: Introduce IMA namespace
From: krzysztof.struczynski @ 2020-08-18 15:20 UTC (permalink / raw)
  To: linux-integrity, linux-kernel, containers, linux-security-module
  Cc: Krzysztof Struczynski, zohar, stefanb, sunyuqiong1988, mkayaalp,
	dmitry.kasatkin, serge, jmorris, christian, silviu.vlasceanu,
	roberto.sassu
In-Reply-To: <N>

From: Krzysztof Struczynski <krzysztof.struczynski@huawei.com>

IMA has not been designed to work with containers. It handles every
process in the same way, and it cannot distinguish if a process belongs to
a container or not.

Containers use namespaces to make it appear to the processes in the
containers that they have their own isolated instance of the global
resource. For IMA as well, it is desirable to let processes in the
containers have IMA functionality independent from other containers:
separate policy rules, measurement list, additional appraisal keys to
verify the container image, separate audit logs.

As previous work done in this area, this patch series introduces the IMA
namespace, which is a separate instance of IMA to handle a subset of
processes that belong to a container.

The IMA namespace is created using clone3() or unshare() system calls. It
is important to configure the namespace before any process appears in it,
so that the new policy rules apply to the very first process in the
namespace. To achieve that, the intermediate namespace ima_ns_for_children
is used. It stores the configuration and becomes active on the next fork
or when the first process enters it using the setns() system call. The
similar process is used for the time namespace.

The IMA namespace can be configured using the new securityfs directory
entries that allow the user to set the policy rules, x509 certificate for
appraisal and pass IMA configuration parameters normally included in the
kernel command line parameters. It is intended to extend the clone_args to
allow configuration from clone3() syscall.

To inform other containers about an action made by a given container, a
linked list of IMA namespaces has been implemented. The state/policy of
those containers is evaluated to see if read-write violations (ToMToU,
open-writer) should be recorded in the respective measurement list. Any
change to the files shared across containers is recorded in the namespaced
inode's integrity cache when the file is freed.

To isolate the measurement list and the appraisal keys, the following
decisions were made:

The measurement list remains global, with the assumption that there is
only one TPM in the system. Each IMA namespace has a unique ID, that
allows to track measurements per IMA namespace. Processes in one
namespace, have access only to the measurements from that namespace. The
exception is made for the initial IMA namespace, whose processes have
access to all entries.

The appraisal keys of all IMA namespaces are stored in the IMA system
keyring. Each key is linked to the respective IMA namespace using the key
domain tag. The process that belongs to one IMA namespace, cannot
add/replace/modify a key that belongs to another IMA namespace.

To give access to the IMA securityfs directory entries to the container's
owner, read and write (when needed) permissions are given to the "other"
users not in the file's group 'o'. The access to the files is controlled
by IMA, and given only to the user that has SYS_ADMIN capabilities in the
user namespace owning the IMA namespace. The processes from one IMA
namespace have access to the data from that namespace only. This mechanism
can be changed in the future. The one alternative is to create per IMA
namespace entries, similar to the AppArmour file system.

This work is inspired by Stefan Berger's, Mehmet Kayaalp's, Yuqiong Sun's
and Mimi Zohar's series of patches:
https://lore.kernel.org/patchwork/cover/899419/

Patches are logically divided into 5 groups. That order is not yet
reflected in the commit order. This will be fixed as soon as possible.

1. Base mechanism for the IMA namespace; patches: 1-3, 6-8, 15

   Add a new IMA namespace. Add a new CLONE_NEWIMA flag. Create and
   configure IMA analogously to the time namespace, using the intermediate
   ima_ns_for_children. Add the IMA namespace to the IMA subsystem API.

   Create a list of active IMA namespaces.

   Add a reader counter to the integrity inode data to detect violations
   correctly.

2. Policy; patches: 4, 5, 9, 18, 20-22

   Replace global policy data with the per IMA namespace policies.

   Record read-write violations (ToMToU, open-writer) across namespaces.
   Record modifications to the files shared across containers when the
   files are freed.

   Set the owning user namespace of the IMA namespace, to the user
   namespace of the first process born into the new IMA namespace.

   Remap IDs in the policy rules, if the rules were loaded before the user
   namespace mapping was defined.

3. IMA-measurement; patches: 10-14, 17, 29

   Link measurement list entries to the respective IMA namespaces using
   the IMA namespace ID. Include the namespace ID in the digest entry
   lookup.

   Add a new measurement list template that includes IMA namespace ID.

   Add a dummy boot aggregate entry for non-root IMA namespaces.

   Show the measurement list data only for the IMA namespace the process
   belongs to, unless it is the root IMA namespace.

4. IMA-appraisal; patches: 23-28

   Modify keyring search mechanism to include the key domain tag in the
   search criteria for both, direct lookup and the iterative search. Allow
   to set the key domain tag separately from the key type using the
   KEY_ALLOC* flags.

   Add the key domain to the IMA namespace, so that the key is linked to
   the namespace.

   Add the key domain tag to the integrity module's API. Use the new API
   to load the IMA namespace's key to the system IMA keyring.

5. Configuration; patches: 16, 19, 30

   Add the new entries in the IMA securityfs directory. Parse and validate
   configuration data. Apply the new configuration when the first process
   is born in the new IMA namespace.

   Extend read/write permissions to the IMA securityfs entries to the
   other users not in the file's group 'o'. Allow access only to the users
   that have the SYS_ADMIN caps in the user namespace owning the given IMA
   namespace.

Krzysztof Struczynski (30):
  ima: Introduce ima namespace
  ima: Add a list of the installed ima namespaces
  ima: Bind ima namespace to the file descriptor
  ima: Add ima policy related data to the ima namespace
  ima: Add methods for parsing ima policy configuration string
  ima: Add ima namespace to the ima subsystem APIs
  ima: Extend the APIs in the integrity subsystem
  ima: Add integrity inode related data to the ima namespace
  ima: Enable per ima namespace policy settings
  ima: Add ima namespace ID to the ima ML related structures
  ima: Keep track of the measurment list per ima namespace
  ima: Check ima namespace ID during digest entry lookup
  ima: Add a new ima template that includes namespace ID
  ima: Add per namespace view of the measurement list
  ima: Add a reader counter to the integrity inode data
  ima: Extend permissions to the ima securityfs entries
  ima: Add the violation counter to the namespace
  ima: Change the owning user namespace of the ima namespace if
    necessary
  ima: Configure the new ima namespace from securityfs
  ima: Parse per ima namespace policy file
  user namespace: Add function that checks if the UID map is defined
  ima: Remap IDs of subject based rules if necessary
  keys: Add domain tag to the keyring search criteria
  keys: Include key domain tag in the iterative search
  keys: Allow to set key domain tag separately from the key type
  ima: Add key domain to the ima namespace
  integrity: Add key domain tag to the search criteria
  ima: Load per ima namespace x509 certificate
  ima: Add dummy boot aggregate to per ima namespace measurement list
  ima: Set ML template per ima namespace

 crypto/asymmetric_keys/asymmetric_type.c     |  20 +-
 fs/file_table.c                              |   6 +-
 fs/proc/namespaces.c                         |   4 +
 include/linux/digsig.h                       |  11 +-
 include/linux/fs.h                           |   3 +
 include/linux/ima.h                          |  99 ++-
 include/linux/integrity.h                    |  31 +
 include/linux/key-type.h                     |   1 +
 include/linux/key.h                          |  27 +-
 include/linux/nsproxy.h                      |   3 +
 include/linux/proc_ns.h                      |   5 +-
 include/linux/user_namespace.h               |   7 +
 include/uapi/linux/sched.h                   |   1 +
 init/Kconfig                                 |  12 +
 kernel/fork.c                                |  24 +-
 kernel/kexec_file.c                          |   7 +
 kernel/nsproxy.c                             |  34 +-
 kernel/ucount.c                              |   1 +
 kernel/user_namespace.c                      |  11 +
 lib/digsig.c                                 |  11 +-
 security/integrity/digsig.c                  |  46 +-
 security/integrity/digsig_asymmetric.c       |  20 +-
 security/integrity/iint.c                    | 126 +++-
 security/integrity/ima/Makefile              |   1 +
 security/integrity/ima/ima.h                 | 134 +++-
 security/integrity/ima/ima_api.c             |  31 +-
 security/integrity/ima/ima_appraise.c        | 105 ++-
 security/integrity/ima/ima_asymmetric_keys.c |  12 +-
 security/integrity/ima/ima_fs.c              | 272 ++++++-
 security/integrity/ima/ima_init.c            |  47 +-
 security/integrity/ima/ima_kexec.c           |   4 +-
 security/integrity/ima/ima_main.c            | 323 +++++++--
 security/integrity/ima/ima_ns.c              | 718 +++++++++++++++++++
 security/integrity/ima/ima_policy.c          | 397 +++++++---
 security/integrity/ima/ima_queue.c           |  69 +-
 security/integrity/ima/ima_queue_keys.c      |  11 +-
 security/integrity/ima/ima_template.c        |  44 +-
 security/integrity/ima/ima_template_lib.c    |  13 +
 security/integrity/ima/ima_template_lib.h    |   2 +
 security/integrity/integrity.h               |  42 +-
 security/keys/key.c                          |  20 +
 security/keys/keyring.c                      |  25 +-
 security/security.c                          |   2 +-
 43 files changed, 2405 insertions(+), 377 deletions(-)
 create mode 100644 security/integrity/ima/ima_ns.c

base-commit: fc80c51fd4b23ec007e88d4c688f2cac1b8648e7
prerequisite-patch-id: 409c4abf4ee18e7d43eda995ff0db7879d3d0f5c
-- 
2.20.1

^ permalink raw reply

* Re: [PATCH bpf-next v8 5/7] bpf: Implement bpf_local_storage for inodes
From: Martin KaFai Lau @ 2020-08-18 15:23 UTC (permalink / raw)
  To: KP Singh
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <60344fad-f761-0fee-a6ef-4880c45c3e52@chromium.org>

On Tue, Aug 18, 2020 at 05:10:34PM +0200, KP Singh wrote:
> 
> 
> On 8/18/20 3:27 AM, Martin KaFai Lau wrote:
> > On Mon, Aug 03, 2020 at 06:46:53PM +0200, KP Singh wrote:
> >> From: KP Singh <kpsingh@google.com>
> >>
> >> Similar to bpf_local_storage for sockets, add local storage for inodes.
> >> The life-cycle of storage is managed with the life-cycle of the inode.
> >> i.e. the storage is destroyed along with the owning inode.
> >>
> >> The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the
> >> security blob which are now stackable and can co-exist with other LSMs.
> >>
> >> Signed-off-by: KP Singh <kpsingh@google.com>
> >> ---
> >>  include/linux/bpf_local_storage.h             |  10 +
> >>  include/linux/bpf_lsm.h                       |  21 ++
> >>  include/linux/bpf_types.h                     |   3 +
> >>  include/uapi/linux/bpf.h                      |  38 +++
> >>  kernel/bpf/Makefile                           |   1 +
> 
> [...]
> 
> ata *inode_storage_lookup(struct inode *inode,
> >> +							   struct bpf_map *map,
> >> +							   bool cacheit_lockit)
> >> +{
> >> +	struct bpf_local_storage *inode_storage;
> >> +	struct bpf_local_storage_map *smap;
> >> +	struct bpf_storage_blob *bsb;
> >> +
> >> +	bsb = bpf_inode(inode);
> >> +	if (!bsb)
> >> +		return ERR_PTR(-ENOENT);
> > ERR_PTR is returned here...
> > 
> >> +
> >> +	inode_storage = rcu_dereference(bsb->storage);
> >> +	if (!inode_storage)
> >> +		return NULL;
> >> +
> 
> [...]
> 
> >> +		kfree_rcu(local_storage, rcu);
> >> +}
> >> +
> >> +
> >> +static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
> >> +{
> >> +	struct bpf_local_storage_data *sdata;
> >> +	struct file *f;
> >> +	int fd;
> >> +
> >> +	fd = *(int *)key;
> >> +	f = fcheck(fd);
> >> +	if (!f)
> >> +		return ERR_PTR(-EINVAL);
> >> +
> >> +	get_file(f);
> >> +	sdata = inode_storage_lookup(f->f_inode, map, true);
> >> +	fput(f);
> >> +	return sdata ? sdata->data : NULL;
> > sdata can be ERR_PTR here and a few other cases below.
> > 
> > May be inode_storage_lookup() should just return NULL.
> 
> I think returning NULL is a better option. Thanks!
> 
> > 
> >> +}
> >> +
> >> +static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
> >> +					 void *value, u64 map_flags)
> >> +{
> >> +	struct bpf_local_storage_data *sdata;
> >> +	struct file *f;
> >> +	int fd;
> >> +
> >> +	fd = *(int *)key;
> >> +	f = fcheck(fd);
> >> +	if (!f)
> >> +		return -EINVAL;
> >> +
> >> +	get_file(f);> get_file() does atomic_long_inc() instead of atomic_long_inc_not_zero().
> > I don't see how that helps here.  Am I missing something?
> 
> You are right, this should not not be an fcheck followed by a get_file
> rather fcheck followed by get_file_rcu:
> 
> #define get_file_rcu_many(x, cnt)	\
> 	atomic_long_add_unless(&(x)->f_count, (cnt), 0)
> #define get_file_rcu(x) get_file_rcu_many((x), 1)
> #define file_count(x)	atomic_long_read(&(x)->f_count)
> 
> But there is an easier way than all of this and this is to use 
> fget_raw which also calls get_file_rcu_many 
> and ensures a non-zero count before getting a reference.
ic. Make sense.

There are fdget() and fdput() also which are used in bpf/syscall.c.

^ permalink raw reply

* Re: [PATCH bpf-next v8 5/7] bpf: Implement bpf_local_storage for inodes
From: KP Singh @ 2020-08-18 15:10 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200818012758.4666zlknkr4x6cbl@kafai-mbp.dhcp.thefacebook.com>



On 8/18/20 3:27 AM, Martin KaFai Lau wrote:
> On Mon, Aug 03, 2020 at 06:46:53PM +0200, KP Singh wrote:
>> From: KP Singh <kpsingh@google.com>
>>
>> Similar to bpf_local_storage for sockets, add local storage for inodes.
>> The life-cycle of storage is managed with the life-cycle of the inode.
>> i.e. the storage is destroyed along with the owning inode.
>>
>> The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the
>> security blob which are now stackable and can co-exist with other LSMs.
>>
>> Signed-off-by: KP Singh <kpsingh@google.com>
>> ---
>>  include/linux/bpf_local_storage.h             |  10 +
>>  include/linux/bpf_lsm.h                       |  21 ++
>>  include/linux/bpf_types.h                     |   3 +
>>  include/uapi/linux/bpf.h                      |  38 +++
>>  kernel/bpf/Makefile                           |   1 +

[...]

ata *inode_storage_lookup(struct inode *inode,
>> +							   struct bpf_map *map,
>> +							   bool cacheit_lockit)
>> +{
>> +	struct bpf_local_storage *inode_storage;
>> +	struct bpf_local_storage_map *smap;
>> +	struct bpf_storage_blob *bsb;
>> +
>> +	bsb = bpf_inode(inode);
>> +	if (!bsb)
>> +		return ERR_PTR(-ENOENT);
> ERR_PTR is returned here...
> 
>> +
>> +	inode_storage = rcu_dereference(bsb->storage);
>> +	if (!inode_storage)
>> +		return NULL;
>> +

[...]

>> +		kfree_rcu(local_storage, rcu);
>> +}
>> +
>> +
>> +static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
>> +{
>> +	struct bpf_local_storage_data *sdata;
>> +	struct file *f;
>> +	int fd;
>> +
>> +	fd = *(int *)key;
>> +	f = fcheck(fd);
>> +	if (!f)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	get_file(f);
>> +	sdata = inode_storage_lookup(f->f_inode, map, true);
>> +	fput(f);
>> +	return sdata ? sdata->data : NULL;
> sdata can be ERR_PTR here and a few other cases below.
> 
> May be inode_storage_lookup() should just return NULL.

I think returning NULL is a better option. Thanks!

> 
>> +}
>> +
>> +static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
>> +					 void *value, u64 map_flags)
>> +{
>> +	struct bpf_local_storage_data *sdata;
>> +	struct file *f;
>> +	int fd;
>> +
>> +	fd = *(int *)key;
>> +	f = fcheck(fd);
>> +	if (!f)
>> +		return -EINVAL;
>> +
>> +	get_file(f);> get_file() does atomic_long_inc() instead of atomic_long_inc_not_zero().
> I don't see how that helps here.  Am I missing something?

You are right, this should not not be an fcheck followed by a get_file
rather fcheck followed by get_file_rcu:

#define get_file_rcu_many(x, cnt)	\
	atomic_long_add_unless(&(x)->f_count, (cnt), 0)
#define get_file_rcu(x) get_file_rcu_many((x), 1)
#define file_count(x)	atomic_long_read(&(x)->f_count)

But there is an easier way than all of this and this is to use 
fget_raw which also calls get_file_rcu_many 
and ensures a non-zero count before getting a reference.

- KP

> 
>> +	sdata = bpf_local_storage_update(f->f_inode, map, value, map_flags);
>> +	fput(f);
>> +	return PTR_ERR_OR_ZERO(sdata);
>> +}
>> +

^ permalink raw reply

* Re: file metadata via fs API (was: [GIT PULL] Filesystem Information)
From: Jeffrey E Altman @ 2020-08-18 15:01 UTC (permalink / raw)
  To: Linus Torvalds (torvalds@linux-foundation.org)
  Cc: David Howells, Miklos Szeredi, linux-fsdevel, Al Viro, Karel Zak,
	Jeff Layton, Miklos Szeredi, Nicolas Dichtel, Christian Brauner,
	Lennart Poettering, Linux API, Ian Kent, LSM,
	Linux Kernel Mailing List
In-Reply-To: <CAHk-=whLhwum2E+qperD=TypGHXxoBtXOu-HHDd9L9_XFFyiaA@mail.gmail.com>

[-- Attachment #1.1: Type: text/plain, Size: 925 bytes --]

On 8/14/2020 1:05 PM, Linus Torvalds (torvalds@linux-foundation.org) wrote:
> Honestly, I really think you may want an extended [f]statfs(), not
> some mount tracking.
> 
>                  Linus

Linus,

Thank you for the reply.  Perhaps some of the communication disconnect
is due to which thread this discussion is taking place on.  My
understanding is that there were two separate pull requests.  One for
mount notifications and the other for filesystem information.  This
thread is derived from the pull request entitled "Filesystem
Information" and my response was a request for use cases.  The
assumption being that the request was related to the subject.

I apologize for creating unnecessary noise due to my misinterpretation
of your intended question.  The use cases I described and the types of
filesystem information required to satisfy them do not require mount
tracking.

Jeffrey Altman

[-- Attachment #1.2: jaltman.vcf --]
[-- Type: text/x-vcard, Size: 283 bytes --]

begin:vcard
fn:Jeffrey Altman
n:Altman;Jeffrey
org:AuriStor, Inc.
adr:;;255 W 94TH ST STE 6B;New York;NY;10025-6985;United States
email;internet:jaltman@auristor.com
title:CEO
tel;work:+1-212-769-9018
url:https://www.linkedin.com/in/jeffreyaltman/
version:2.1
end:vcard

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4033 bytes --]

^ permalink raw reply

* Re: [PATCH bpf-next v8 1/7] A purely mechanical change to split the renaming from the actual generalization.
From: KP Singh @ 2020-08-18 14:30 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200817235621.ulkqw6mqd2uu647t@kafai-mbp.dhcp.thefacebook.com>



On 8/18/20 1:56 AM, Martin KaFai Lau wrote:
> On Mon, Aug 03, 2020 at 06:46:49PM +0200, KP Singh wrote:
>> From: KP Singh <kpsingh@google.com>
>>
>> Flags/consts:
>>
>>   SK_STORAGE_CREATE_FLAG_MASK	BPF_LOCAL_STORAGE_CREATE_FLAG_MASK
>>   BPF_SK_STORAGE_CACHE_SIZE	BPF_LOCAL_STORAGE_CACHE_SIZE
>>   MAX_VALUE_SIZE		BPF_LOCAL_STORAGE_MAX_VALUE_SIZE
>>
>> Structs:
>>
>>   bucket			bpf_local_storage_map_bucket
>>   bpf_sk_storage_map		bpf_local_storage_map
>>   bpf_sk_storage_data		bpf_local_storage_data
>>   bpf_sk_storage_elem		bpf_local_storage_elem
>>   bpf_sk_storage		bpf_local_storage
>>
>> The "sk" member in bpf_local_storage is also updated to "owner"
>> in preparation for changing the type to void * in a subsequent patch.
>>
>> Functions:
>>
>>   selem_linked_to_sk			selem_linked_to_storage
>>   selem_alloc				bpf_selem_alloc
>>   __selem_unlink_sk			bpf_selem_unlink_storage
>>   __selem_link_sk			bpf_selem_link_storage
>>   selem_unlink_sk			__bpf_selem_unlink_storage
>>   sk_storage_update			bpf_local_storage_update
>>   __sk_storage_lookup			bpf_local_storage_lookup
>>   bpf_sk_storage_map_free		bpf_local_storage_map_free
>>   bpf_sk_storage_map_alloc		bpf_local_storage_map_alloc
>>   bpf_sk_storage_map_alloc_check	bpf_local_storage_map_alloc_check
>>   bpf_sk_storage_map_check_btf		bpf_local_storage_map_check_btf
>>
> 
> [ ... ]
> 
>> @@ -147,85 +148,86 @@ static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap,
>>   * The caller must ensure selem->smap is still valid to be
>>   * dereferenced for its smap->elem_size and smap->cache_idx.
>>   */
>> -static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage,
>> -			      struct bpf_sk_storage_elem *selem,
>> -			      bool uncharge_omem)
>> +static bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
>> +				     struct bpf_local_storage_elem *selem,
>> +				     bool uncharge_omem)
> Please add a "_nolock()" suffix, just to be clear that the unlink_map()
> counter part is locked.  It could be a follow up later.

Done.

> 
>>  {
>> -	struct bpf_sk_storage_map *smap;
>> -	bool free_sk_storage;
>> +	struct bpf_local_storage_map *smap;
>> +	bool free_local_storage;

[...]

>> +	if (unlikely(!selem_linked_to_storage(selem)))
>>  		/* selem has already been unlinked from sk */
>>  		return;
>>  
>> -	sk_storage = rcu_dereference(selem->sk_storage);
>> -	raw_spin_lock_bh(&sk_storage->lock);
>> -	if (likely(selem_linked_to_sk(selem)))
>> -		free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
>> -	raw_spin_unlock_bh(&sk_storage->lock);
>> +	local_storage = rcu_dereference(selem->local_storage);
>> +	raw_spin_lock_bh(&local_storage->lock);
>> +	if (likely(selem_linked_to_storage(selem)))
>> +		free_local_storage =
>> +			bpf_selem_unlink_storage(local_storage, selem, true);
>> +	raw_spin_unlock_bh(&local_storage->lock);
>>  
>> -	if (free_sk_storage)
>> -		kfree_rcu(sk_storage, rcu);
>> +	if (free_local_storage)
>> +		kfree_rcu(local_storage, rcu);
>>  }
>>  
>> -static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
>> -			    struct bpf_sk_storage_elem *selem)
>> +static void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
>> +				   struct bpf_local_storage_elem *selem)
> Same here. bpf_selem_link_storage"_nolock"().

Done.

> 
> Please tag the Subject line with "bpf:".

Done. Changed it to:

bpf: Renames in preparation for bpf_local_storage
    
A purely mechanical change to split the renaming from the actual
generalization.

[...]

> 
> Acked-by: Martin KaFai Lau <kafai@fb.com>
> 

^ permalink raw reply

* Re: [PATCH v36 15/24] x86/sgx: Allow a limited use of ATTRIBUTE.PROVISIONKEY for attestation
From: Jarkko Sakkinen @ 2020-08-18 13:30 UTC (permalink / raw)
  To: Darren Kenny
  Cc: x86, linux-sgx, linux-kernel, linux-security-module,
	Jethro Beekman, Andy Lutomirski, akpm, andriy.shevchenko, asapek,
	bp, cedric.xing, chenalexchen, conradparker, cyhanish,
	dave.hansen, haitao.huang, josh, kai.huang, kai.svahn, kmoy,
	ludloff, nhorman, npmccallum, puiterwijk, rientjes,
	sean.j.christopherson, tglx, yaozhangx
In-Reply-To: <m27dubr9pp.fsf@oracle.com>

On Thu, Aug 06, 2020 at 06:00:02PM +0100, Darren Kenny wrote:
> On Thursday, 2020-07-16 at 16:52:54 +03, Jarkko Sakkinen wrote:
> > Provisioning Certification Enclave (PCE), the root of trust for other
> > enclaves, generates a signing key from a fused key called Provisioning
> > Certification Key. PCE can then use this key to certify an attestation key
> > of a Quoting Enclave (QE), e.g. we get the chain of trust down to the
> > hardware if the Intel signed PCE is used.
> >
> > To use the needed keys, ATTRIBUTE.PROVISIONKEY is required but should be
> > only allowed for those who actually need it so that only the trusted
> > parties can certify QE's.
> >
> > Obviously the attestation service should know the public key of the used
> > PCE and that way detect illegit attestation, but whitelisting the legit
> > users still adds an additional layer of defence.
> >
> > Add new device file called /dev/sgx/provision. The sole purpose of this
> > file is to provide file descriptors that act as privilege tokens to allow
> > to build enclaves with ATTRIBUTE.PROVISIONKEY set. A new ioctl called
> > SGX_IOC_ENCLAVE_SET_ATTRIBUTE is used to assign this token to an enclave.
> >
> > Cc: linux-security-module@vger.kernel.org
> > Acked-by: Jethro Beekman <jethro@fortanix.com>
> > Suggested-by: Andy Lutomirski <luto@kernel.org>
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> 
> Reviewed-by: Darren Kenny <darren.kenny@oracle.com>

Thanks, I added your tags to the corresponding commits.

/Jarkko

^ permalink raw reply

* Re: file metadata via fs API
From: Miklos Szeredi @ 2020-08-18 12:50 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Steven Whitehouse, David Howells, linux-fsdevel, Al Viro,
	Karel Zak, Jeff Layton, Miklos Szeredi, Nicolas Dichtel,
	Christian Brauner, Lennart Poettering, Linux API, Ian Kent, LSM,
	Linux Kernel Mailing List
In-Reply-To: <CAHk-=wh4qaj6iFTrbHy8TPfmM3fj+msYC5X_KE0rCdStJKH2NA@mail.gmail.com>

On Tue, Aug 18, 2020 at 12:44 AM Linus Torvalds
<torvalds@linux-foundation.org> wrote:

> So please. Can we just make a simple extended statfs() and be done
> with it, instead of this hugely complex thing that does five different
> things with the same interface and makes it really odd as a result?

How do you propose handling variable size attributes, like the list of
fs options?

Otherwise I'm fine with a statx-like interface for querying fs/mount attributes.

Thanks,
Miklos

^ permalink raw reply

* Re: file metadata via fs API (was: [GIT PULL] Filesystem Information)
From: Miklos Szeredi @ 2020-08-18  9:41 UTC (permalink / raw)
  To: Al Viro
  Cc: Linus Torvalds, Jann Horn, Casey Schaufler, Andy Lutomirski,
	linux-fsdevel, David Howells, Karel Zak, Jeff Layton,
	Miklos Szeredi, Nicolas Dichtel, Christian Brauner,
	Lennart Poettering, Linux API, Ian Kent, LSM,
	Linux Kernel Mailing List
In-Reply-To: <20200812213041.GV1236603@ZenIV.linux.org.uk>

On Wed, Aug 12, 2020 at 11:30 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Wed, Aug 12, 2020 at 07:33:26PM +0100, Al Viro wrote:
>
> > BTW, what would such opened files look like from /proc/*/fd/* POV?  And
> > what would happen if you walk _through_ that symlink, with e.g. ".."
> > following it?  Or with names of those attributes, for that matter...
> > What about a normal open() of such a sucker?  It won't know where to
> > look for your ->private_data...
> >
> > FWIW, you keep refering to regularity of this stuff from the syscall
> > POV, but it looks like you have no real idea of what subset of the
> > things available for normal descriptors will be available for those.
>
> Another question: what should happen with that sucker on umount of
> the filesystem holding the underlying object?  Should it be counted
> as pinning that fs?

Obviously yes.

> Who controls what's in that tree?

It could be several entities:

 - global (like mount info)
 - per inode (like xattr)
 - per fs (fs specific inode attributes)
 - etc..

>  If we plan to have xattrs there,
> will they be in a flat tree, or should it mirror the hierarchy of
> xattrs?  When is it populated?  open() time?  What happens if we
> add/remove an xattr after that point?

From the interface perspective it would be dynamic (i.e. would get
updated on open or read).  From an implementation POV it could have
caching, but that's not how I'd start out.

> If we open the same file several times, what should we get?  A full
> copy of the tree every time, with all coherency being up to whatever's
> putting attributes there?
>
> What are the permissions needed to do lookups in that thing?

That would depend on what would need to be looked up.  Top level would
be world readable, otherwise it would be up to the attribute/group.

>
> All of that is about semantics and the answers are needed before we
> start looking into implementations.  "Whatever my implementation
> does" is _not_ a good way to go, especially since that'll be cast
> in stone as soon as API becomes exposed to userland...

Fine.

Thanks,
Miklos

^ permalink raw reply

* Re: file metadata via fs API (was: [GIT PULL] Filesystem Information)
From: Miklos Szeredi @ 2020-08-18  9:30 UTC (permalink / raw)
  To: Al Viro
  Cc: Linus Torvalds, Jann Horn, Casey Schaufler, Andy Lutomirski,
	linux-fsdevel, David Howells, Karel Zak, Jeff Layton,
	Miklos Szeredi, Nicolas Dichtel, Christian Brauner,
	Lennart Poettering, Linux API, Ian Kent, LSM,
	Linux Kernel Mailing List
In-Reply-To: <20200812183326.GU1236603@ZenIV.linux.org.uk>

On Wed, Aug 12, 2020 at 8:33 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Wed, Aug 12, 2020 at 06:39:11PM +0100, Al Viro wrote:
> > On Wed, Aug 12, 2020 at 07:16:37PM +0200, Miklos Szeredi wrote:
> > > On Wed, Aug 12, 2020 at 6:33 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> > > >
> > > > On Wed, Aug 12, 2020 at 05:13:14PM +0200, Miklos Szeredi wrote:
> > >
> > > > > Why does it have to have a struct mount?  It does not have to use
> > > > > dentry/mount based path lookup.
> > > >
> > > > What the fuck?  So we suddenly get an additional class of objects
> > > > serving as kinda-sorta analogues of dentries *AND* now struct file
> > > > might refer to that instead of a dentry/mount pair - all on the VFS
> > > > level?  And so do all the syscalls you want to allow for such "pathnames"?
> > >
> > > The only syscall I'd want to allow is open, everything else would be
> > > on the open files themselves.
> > >
> > > file->f_path can refer to an anon mount/inode, the real object is
> > > referred to by file->private_data.
> > >
> > > The change to namei.c would be on the order of ~10 lines.  No other
> > > parts of the VFS would be affected.
> >
> > If some of the things you open are directories (and you *have* said that
> > directories will be among those just upthread, and used references to
> > readdir() as argument in favour of your approach elsewhere in the thread),
> > you will have to do something about fchdir().  And that's the least of
> > the issues.
>
> BTW, what would such opened files look like from /proc/*/fd/* POV?  And
> what would happen if you walk _through_ that symlink, with e.g. ".."
> following it?  Or with names of those attributes, for that matter...
> What about a normal open() of such a sucker?  It won't know where to
> look for your ->private_data...
>
> FWIW, you keep refering to regularity of this stuff from the syscall
> POV, but it looks like you have no real idea of what subset of the
> things available for normal descriptors will be available for those.

I have said that IMO using a non-seekable anon-file would be okay for
those.   All the answers fall out of that:  nothing works on those
fd's except read/write/getdents.  No fchdir(), no /proc/*/fd deref,
etc...

Starting with a very limited functionality and expanding on that if
necessary is I think a good way to not get bogged down with the
details.

Thanks,
Miklos

^ permalink raw reply

* Re: [PATCH] security: keys: delete repeated words in comments
From: Jarkko Sakkinen @ 2020-08-18  5:57 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: linux-kernel, David Howells, keyrings, James Morris,
	Serge E. Hallyn, linux-security-module
In-Reply-To: <20200807165123.3863-1-rdunlap@infradead.org>

On Fri, Aug 07, 2020 at 09:51:23AM -0700, Randy Dunlap wrote:
> Drop repeated words in comments.
> {to, will, the}
> 
> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
> Cc: David Howells <dhowells@redhat.com>
> Cc: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> Cc: keyrings@vger.kernel.org
> Cc: James Morris <jmorris@namei.org>
> Cc: "Serge E. Hallyn" <serge@hallyn.com>
> Cc: linux-security-module@vger.kernel.org

Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>

/Jarkko

^ permalink raw reply

* Re: [PATCH bpf-next v8 6/7] bpf: Allow local storage to be used from LSM programs
From: Martin KaFai Lau @ 2020-08-18  4:16 UTC (permalink / raw)
  To: KP Singh
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200803164655.1924498-7-kpsingh@chromium.org>

On Mon, Aug 03, 2020 at 06:46:54PM +0200, KP Singh wrote:
> From: KP Singh <kpsingh@google.com>
> 
> Adds support for both bpf_{sk, inode}_storage_{get, delete} to be used
> in LSM programs. These helpers are not used for tracing programs
> (currently) as their usage is tied to the life-cycle of the object and
> should only be used where the owning object won't be freed (when the
> owning object is passed as an argument to the LSM hook). Thus, they
> are safer to use in LSM hooks than tracing. Usage of local storage in
> tracing programs will probably follow a per function based whitelist
> approach.
> 
> Since the UAPI helper signature for bpf_sk_storage expect a bpf_sock,
> it, leads to a compilation warning for LSM programs, it's also updated
> to accept a void * pointer instead.
> 
> Signed-off-by: KP Singh <kpsingh@google.com>
> ---
>  include/net/bpf_sk_storage.h   |  2 ++
>  include/uapi/linux/bpf.h       |  8 ++++++--
>  kernel/bpf/bpf_lsm.c           | 21 ++++++++++++++++++++-
>  net/core/bpf_sk_storage.c      | 25 +++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  8 ++++++--
>  5 files changed, 59 insertions(+), 5 deletions(-)
> 
> diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
> index 847926cf2899..c5702d7baeaa 100644
> --- a/include/net/bpf_sk_storage.h
> +++ b/include/net/bpf_sk_storage.h
> @@ -20,6 +20,8 @@ void bpf_sk_storage_free(struct sock *sk);
>  
>  extern const struct bpf_func_proto bpf_sk_storage_get_proto;
>  extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
> +extern const struct bpf_func_proto sk_storage_get_btf_proto;
> +extern const struct bpf_func_proto sk_storage_delete_btf_proto;
>  
>  struct bpf_sk_storage_diag;
>  struct sk_buff;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e17c00eea5d8..6ffc61dafc5c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2807,7 +2807,7 @@ union bpf_attr {
>   *
>   *		**-ERANGE** if resulting value was out of range.
>   *
> - * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
> + * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
>   *	Description
>   *		Get a bpf-local-storage from a *sk*.
>   *
> @@ -2823,6 +2823,10 @@ union bpf_attr {
>   *		"type". The bpf-local-storage "type" (i.e. the *map*) is
>   *		searched against all bpf-local-storages residing at *sk*.
>   *
> + *		For socket programs, *sk* should be a **struct bpf_sock** pointer
> + *		and an **ARG_PTR_TO_BTF_ID** of type **struct sock** for LSM
> + *		programs.
I found it a little vague on what "socket programs" is.  May be:

*sk* is a kernel **struct sock** pointer for LSM program.
*sk* is a **struct bpf_sock** pointer for other program types.

Others LGTM

Acked-by: Martin KaFai Lau <kafai@fb.com>

^ permalink raw reply

* Re: [PATCH bpf-next v8 5/7] bpf: Implement bpf_local_storage for inodes
From: Martin KaFai Lau @ 2020-08-18  1:27 UTC (permalink / raw)
  To: KP Singh
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200803164655.1924498-6-kpsingh@chromium.org>

On Mon, Aug 03, 2020 at 06:46:53PM +0200, KP Singh wrote:
> From: KP Singh <kpsingh@google.com>
> 
> Similar to bpf_local_storage for sockets, add local storage for inodes.
> The life-cycle of storage is managed with the life-cycle of the inode.
> i.e. the storage is destroyed along with the owning inode.
> 
> The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the
> security blob which are now stackable and can co-exist with other LSMs.
> 
> Signed-off-by: KP Singh <kpsingh@google.com>
> ---
>  include/linux/bpf_local_storage.h             |  10 +
>  include/linux/bpf_lsm.h                       |  21 ++
>  include/linux/bpf_types.h                     |   3 +
>  include/uapi/linux/bpf.h                      |  38 +++
>  kernel/bpf/Makefile                           |   1 +
>  kernel/bpf/bpf_inode_storage.c                | 265 ++++++++++++++++++
>  kernel/bpf/syscall.c                          |   3 +-
>  kernel/bpf/verifier.c                         |  10 +
>  security/bpf/hooks.c                          |   7 +
>  .../bpf/bpftool/Documentation/bpftool-map.rst |   2 +-
>  tools/bpf/bpftool/bash-completion/bpftool     |   3 +-
>  tools/bpf/bpftool/map.c                       |   3 +-
>  tools/include/uapi/linux/bpf.h                |  38 +++
>  tools/lib/bpf/libbpf_probes.c                 |   5 +-
>  14 files changed, 403 insertions(+), 6 deletions(-)
>  create mode 100644 kernel/bpf/bpf_inode_storage.c
> 
> diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
> index 3685f681a7fc..75c76847fad5 100644
> --- a/include/linux/bpf_local_storage.h
> +++ b/include/linux/bpf_local_storage.h
> @@ -160,4 +160,14 @@ struct bpf_local_storage_data *
>  bpf_local_storage_update(void *owner, struct bpf_map *map, void *value,
>  			 u64 map_flags);
>  
> +#ifdef CONFIG_BPF_LSM
> +extern const struct bpf_func_proto bpf_inode_storage_get_proto;
> +extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
> +void bpf_inode_storage_free(struct inode *inode);
> +#else
> +static inline void bpf_inode_storage_free(struct inode *inode)
> +{
> +}
> +#endif /* CONFIG_BPF_LSM */
This is LSM specific.  Can they be moved to bpf_lsm.h?

> +
>  #endif /* _BPF_LOCAL_STORAGE_H */
> diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
> index af74712af585..d0683ada1e49 100644
> --- a/include/linux/bpf_lsm.h
> +++ b/include/linux/bpf_lsm.h
> @@ -17,9 +17,24 @@
>  #include <linux/lsm_hook_defs.h>
>  #undef LSM_HOOK
>  
> +struct bpf_storage_blob {
> +	struct bpf_local_storage __rcu *storage;
> +};
> +
> +extern struct lsm_blob_sizes bpf_lsm_blob_sizes;
> +
>  int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
>  			const struct bpf_prog *prog);
>  
> +static inline struct bpf_storage_blob *bpf_inode(
> +	const struct inode *inode)
> +{
> +	if (unlikely(!inode->i_security))
> +		return NULL;
> +
> +	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
> +}
> +
>  #else /* !CONFIG_BPF_LSM */
>  
>  static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
> @@ -28,6 +43,12 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
>  	return -EOPNOTSUPP;
>  }
>  
> +static inline struct bpf_storage_blob *bpf_inode(
> +	const struct inode *inode)
> +{
> +	return NULL;
> +}
> +
>  #endif /* CONFIG_BPF_LSM */
>  
>  #endif /* _LINUX_BPF_LSM_H */

[ ... ]

> diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
> new file mode 100644
> index 000000000000..a51a6328c02d
> --- /dev/null
> +++ b/kernel/bpf/bpf_inode_storage.c
> @@ -0,0 +1,265 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019 Facebook
> + * Copyright 2020 Google LLC.
> + */
> +
> +#include <linux/rculist.h>
> +#include <linux/list.h>
> +#include <linux/hash.h>
> +#include <linux/types.h>
> +#include <linux/spinlock.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_local_storage.h>
> +#include <net/sock.h>
> +#include <uapi/linux/sock_diag.h>
> +#include <uapi/linux/btf.h>
> +#include <linux/bpf_lsm.h>
> +#include <linux/btf_ids.h>
> +#include <linux/fdtable.h>
> +
> +DEFINE_BPF_STORAGE_CACHE(inode_cache);
> +
> +static struct bpf_local_storage __rcu **
> +inode_storage_ptr(void *owner)
> +{
> +	struct inode *inode = owner;
> +	struct bpf_storage_blob *bsb;
> +	bsb = bpf_inode(inode);
> +	if (!bsb)
> +		return NULL;
> +	return &bsb->storage;
> +}
> +
> +static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
> +							   struct bpf_map *map,
> +							   bool cacheit_lockit)
> +{
> +	struct bpf_local_storage *inode_storage;
> +	struct bpf_local_storage_map *smap;
> +	struct bpf_storage_blob *bsb;
> +
> +	bsb = bpf_inode(inode);
> +	if (!bsb)
> +		return ERR_PTR(-ENOENT);
ERR_PTR is returned here...

> +
> +	inode_storage = rcu_dereference(bsb->storage);
> +	if (!inode_storage)
> +		return NULL;
> +
> +	smap = (struct bpf_local_storage_map *)map;
> +	return bpf_local_storage_lookup(inode_storage, smap, cacheit_lockit);
> +}
> +
> +void bpf_inode_storage_free(struct inode *inode)
> +{
> +	struct bpf_local_storage_elem *selem;
> +	struct bpf_local_storage *local_storage;
> +	bool free_inode_storage = false;
> +	struct bpf_storage_blob *bsb;
> +	struct hlist_node *n;
> +
> +	bsb = bpf_inode(inode);
> +	if (!bsb)
> +		return;
> +
> +	rcu_read_lock();
> +
> +	local_storage = rcu_dereference(bsb->storage);
> +	if (!local_storage) {
> +		rcu_read_unlock();
> +		return;
> +	}
> +
> +	/* Netiher the bpf_prog nor the bpf-map's syscall
> +	 * could be modifying the local_storage->list now.
> +	 * Thus, no elem can be added-to or deleted-from the
> +	 * local_storage->list by the bpf_prog or by the bpf-map's syscall.
> +	 *
> +	 * It is racing with bpf_local_storage_map_free() alone
> +	 * when unlinking elem from the local_storage->list and
> +	 * the map's bucket->list.
> +	 */
> +	raw_spin_lock_bh(&local_storage->lock);
> +	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
> +		/* Always unlink from map before unlinking from
> +		 * local_storage.
> +		 */
> +		bpf_selem_unlink_map(selem);
> +		free_inode_storage =
> +			bpf_selem_unlink_storage(local_storage, selem, false);
> +	}
> +	raw_spin_unlock_bh(&local_storage->lock);
> +	rcu_read_unlock();
> +
> +	/* free_inoode_storage should always be true as long as
> +	 * local_storage->list was non-empty.
> +	 */
> +	if (free_inode_storage)
> +		kfree_rcu(local_storage, rcu);
> +}
> +
> +
> +static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
> +{
> +	struct bpf_local_storage_data *sdata;
> +	struct file *f;
> +	int fd;
> +
> +	fd = *(int *)key;
> +	f = fcheck(fd);
> +	if (!f)
> +		return ERR_PTR(-EINVAL);
> +
> +	get_file(f);
> +	sdata = inode_storage_lookup(f->f_inode, map, true);
> +	fput(f);
> +	return sdata ? sdata->data : NULL;
sdata can be ERR_PTR here and a few other cases below.

May be inode_storage_lookup() should just return NULL.

> +}
> +
> +static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
> +					 void *value, u64 map_flags)
> +{
> +	struct bpf_local_storage_data *sdata;
> +	struct file *f;
> +	int fd;
> +
> +	fd = *(int *)key;
> +	f = fcheck(fd);
> +	if (!f)
> +		return -EINVAL;
> +
> +	get_file(f);
get_file() does atomic_long_inc() instead of atomic_long_inc_not_zero().
I don't see how that helps here.  Am I missing something?

> +	sdata = bpf_local_storage_update(f->f_inode, map, value, map_flags);
> +	fput(f);
> +	return PTR_ERR_OR_ZERO(sdata);
> +}
> +

^ permalink raw reply

* Re: [PATCH bpf-next v8 3/7] bpf: Generalize bpf_sk_storage
From: Martin KaFai Lau @ 2020-08-18  1:05 UTC (permalink / raw)
  To: KP Singh
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200803164655.1924498-4-kpsingh@chromium.org>

On Mon, Aug 03, 2020 at 06:46:51PM +0200, KP Singh wrote:
> From: KP Singh <kpsingh@google.com>
> 
> Refactor the functionality in bpf_sk_storage.c so that concept of
> storage linked to kernel objects can be extended to other objects like
> inode, task_struct etc.
> 
> Each new local storage will still be a separate map and provide its own
> set of helpers. This allows for future object specific extensions and
> still share a lot of the underlying implementation.
> 
> This includes the changes suggested by Martin in:
> 
>   https://lore.kernel.org/bpf/20200725013047.4006241-1-kafai@fb.com/
> 
> which adds map_local_storage_charge, map_local_storage_uncharge,
> and map_owner_storage_ptr.
A description will still be useful in the commit message to talk
about the new map_ops, e.g.
they allow kernel object to optionally have different mem-charge strategy.

> 
> Co-developed-by: Martin KaFai Lau <kafai@fb.com>
> Signed-off-by: KP Singh <kpsingh@google.com>
> ---
>  include/linux/bpf.h            |   9 ++
>  include/net/bpf_sk_storage.h   |  51 +++++++
>  include/uapi/linux/bpf.h       |   8 +-
>  net/core/bpf_sk_storage.c      | 246 +++++++++++++++++++++------------
>  tools/include/uapi/linux/bpf.h |   8 +-
>  5 files changed, 233 insertions(+), 89 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index cef4ef0d2b4e..8e1e23c60dc7 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -34,6 +34,9 @@ struct btf_type;
>  struct exception_table_entry;
>  struct seq_operations;
>  struct bpf_iter_aux_info;
> +struct bpf_local_storage;
> +struct bpf_local_storage_map;
> +struct bpf_local_storage_elem;
"struct bpf_local_storage_elem" is not needed.

>  
>  extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
> @@ -104,6 +107,12 @@ struct bpf_map_ops {
>  	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
>  			     struct poll_table_struct *pts);
>  
> +	/* Functions called by bpf_local_storage maps */
> +	int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
> +					void *owner, u32 size);
> +	void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
> +					   void *owner, u32 size);
> +	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
>  	/* BTF name and id of struct allocated by map_alloc */
>  	const char * const map_btf_name;
>  	int *map_btf_id;
> diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
> index 950c5aaba15e..05b777950eb3 100644
> --- a/include/net/bpf_sk_storage.h
> +++ b/include/net/bpf_sk_storage.h
> @@ -3,8 +3,15 @@
>  #ifndef _BPF_SK_STORAGE_H
>  #define _BPF_SK_STORAGE_H
>  
> +#include <linux/rculist.h>
> +#include <linux/list.h>
> +#include <linux/hash.h>
>  #include <linux/types.h>
>  #include <linux/spinlock.h>
> +#include <linux/bpf.h>
> +#include <net/sock.h>
> +#include <uapi/linux/sock_diag.h>
> +#include <uapi/linux/btf.h>
>  
>  struct sock;
>  
> @@ -34,6 +41,50 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
>  void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
>  				      u16 idx);
>  
> +/* Helper functions for bpf_local_storage */
> +int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
> +
> +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
> +
> +struct bpf_local_storage_data *
> +bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
> +			 struct bpf_local_storage_map *smap,
> +			 bool cacheit_lockit);
> +
> +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
> +
> +int bpf_local_storage_map_check_btf(const struct bpf_map *map,
> +				    const struct btf *btf,
> +				    const struct btf_type *key_type,
> +				    const struct btf_type *value_type);
> +
> +void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
> +			    struct bpf_local_storage_elem *selem);
> +
> +bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
> +			      struct bpf_local_storage_elem *selem,
> +			      bool uncharge_omem);
> +
> +void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
> +
> +void bpf_selem_link_map(struct bpf_local_storage_map *smap,
> +			struct bpf_local_storage_elem *selem);
> +
> +void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
> +
> +struct bpf_local_storage_elem *
> +bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
> +		bool charge_mem);
> +
> +int
> +bpf_local_storage_alloc(void *owner,
> +			struct bpf_local_storage_map *smap,
> +			struct bpf_local_storage_elem *first_selem);
> +
> +struct bpf_local_storage_data *
> +bpf_local_storage_update(void *owner, struct bpf_map *map, void *value,
Nit.  It may be more consistent to take "struct bpf_local_storage_map *smap"
instead of "struct bpf_map *map" here.

bpf_local_storage_map_check_btf() will be the only one taking
"struct bpf_map *map".

> +			 u64 map_flags);
> +
>  #ifdef CONFIG_BPF_SYSCALL
>  int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
>  struct bpf_sk_storage_diag *
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b134e679e9db..35629752cec8 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3647,9 +3647,13 @@ enum {
>  	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
>  };
>  
> -/* BPF_FUNC_sk_storage_get flags */
> +/* BPF_FUNC_<local>_storage_get flags */
BPF_FUNC_<kernel_obj>_storage_get flags?

>  enum {
> -	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
> +	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
> +	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
> +	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
> +	 */
> +	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
>  };
>  
>  /* BPF_FUNC_read_branch_records flags. */
> diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
> index 99dde7b74767..bb2375769ca1 100644
> --- a/net/core/bpf_sk_storage.c
> +++ b/net/core/bpf_sk_storage.c
> @@ -84,7 +84,7 @@ struct bpf_local_storage_elem {
>  struct bpf_local_storage {
>  	struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
>  	struct hlist_head list; /* List of bpf_local_storage_elem */
> -	struct sock *owner;	/* The object that owns the the above "list" of
> +	void *owner;		/* The object that owns the the above "list" of
>  				 * bpf_local_storage_elem.
>  				 */
>  	struct rcu_head rcu;
> @@ -110,6 +110,33 @@ static int omem_charge(struct sock *sk, unsigned int size)
>  	return -ENOMEM;
>  }
>  
> +static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
> +{
> +	struct bpf_map *map = &smap->map;
> +
> +	if (!map->ops->map_local_storage_charge)
> +		return 0;
> +
> +	return map->ops->map_local_storage_charge(smap, owner, size);
> +}
> +
> +static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
> +			 u32 size)
> +{
> +	struct bpf_map *map = &smap->map;
> +
> +	if (map->ops->map_local_storage_uncharge)
> +		map->ops->map_local_storage_uncharge(smap, owner, size);
> +}
> +
> +static struct bpf_local_storage __rcu **
> +owner_storage(struct bpf_local_storage_map *smap, void *owner)
> +{
> +	struct bpf_map *map = &smap->map;
> +
> +	return map->ops->map_owner_storage_ptr(owner);
> +}
> +
>  static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
>  {
>  	return !hlist_unhashed(&selem->snode);
> @@ -120,13 +147,13 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
>  	return !hlist_unhashed(&selem->map_node);
>  }
>  
> -static struct bpf_local_storage_elem *
> -bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
> -		void *value, bool charge_omem)
> +struct bpf_local_storage_elem *
> +bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
> +		void *value, bool charge_mem)
>  {
>  	struct bpf_local_storage_elem *selem;
>  
> -	if (charge_omem && omem_charge(sk, smap->elem_size))
> +	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
>  		return NULL;
>  
>  	selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
> @@ -136,40 +163,42 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
>  		return selem;
>  	}
>  
> -	if (charge_omem)
> -		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
> +	if (charge_mem)
> +		mem_uncharge(smap, owner, smap->elem_size);
>  
>  	return NULL;
>  }
>  
> -/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
> +/* local_storage->lock must be held and selem->sk_storage == sk_storage.
This name change belongs to patch 1.

Also,
selem->"local_"storage == "local_"storage

>   * The caller must ensure selem->smap is still valid to be
>   * dereferenced for its smap->elem_size and smap->cache_idx.
>   */

[ ... ]

> @@ -367,7 +401,7 @@ static int sk_storage_alloc(struct sock *sk,
>  		/* Note that even first_selem was linked to smap's
>  		 * bucket->list, first_selem can be freed immediately
>  		 * (instead of kfree_rcu) because
> -		 * bpf_sk_storage_map_free() does a
> +		 * bpf_local_storage_map_free() does a
This name change belongs to patch 1 also.

>  		 * synchronize_rcu() before walking the bucket->list.
>  		 * Hence, no one is accessing selem from the
>  		 * bucket->list under rcu_read_lock().
> @@ -377,8 +411,8 @@ static int sk_storage_alloc(struct sock *sk,
>  	return 0;
>  
>  uncharge:
> -	kfree(sk_storage);
> -	atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
> +	kfree(storage);
> +	mem_uncharge(smap, owner, sizeof(*storage));
>  	return err;
>  }
>  
> @@ -387,9 +421,9 @@ static int sk_storage_alloc(struct sock *sk,
>   * Otherwise, it will become a leak (and other memory issues
>   * during map destruction).
>   */
> -static struct bpf_local_storage_data *
> -bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
> -			 u64 map_flags)
> +struct bpf_local_storage_data *
> +bpf_local_storage_update(void *owner, struct bpf_map *map,
> +			 void *value, u64 map_flags)
>  {
>  	struct bpf_local_storage_data *old_sdata = NULL;
>  	struct bpf_local_storage_elem *selem;
> @@ -404,21 +438,21 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
>  		return ERR_PTR(-EINVAL);
>  
>  	smap = (struct bpf_local_storage_map *)map;
> -	local_storage = rcu_dereference(sk->sk_bpf_storage);
> +	local_storage = rcu_dereference(*owner_storage(smap, owner));
>  	if (!local_storage || hlist_empty(&local_storage->list)) {
> -		/* Very first elem for this object */
> +		/* Very first elem for the owner */
>  		err = check_flags(NULL, map_flags);
>  		if (err)
>  			return ERR_PTR(err);
>  
> -		selem = bpf_selem_alloc(smap, sk, value, true);
> +		selem = bpf_selem_alloc(smap, owner, value, true);
>  		if (!selem)
>  			return ERR_PTR(-ENOMEM);
>  
> -		err = sk_storage_alloc(sk, smap, selem);
> +		err = bpf_local_storage_alloc(owner, smap, selem);
>  		if (err) {
>  			kfree(selem);
> -			atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
> +			mem_uncharge(smap, owner, smap->elem_size);
>  			return ERR_PTR(err);
>  		}
>  
> @@ -430,8 +464,8 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
>  		 * such that it can avoid taking the local_storage->lock
>  		 * and changing the lists.
>  		 */
> -		old_sdata =
> -			bpf_local_storage_lookup(local_storage, smap, false);
> +		old_sdata = bpf_local_storage_lookup(local_storage, smap,
> +						     false);
Pure indentation change.  The same line has been changed in patch 1.  Please change
the identation in patch 1 if the above way is preferred.

>  		err = check_flags(old_sdata, map_flags);
>  		if (err)
>  			return ERR_PTR(err);
> @@ -475,7 +509,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
>  	 * old_sdata will not be uncharged later during
>  	 * bpf_selem_unlink_storage().
>  	 */
> -	selem = bpf_selem_alloc(smap, sk, value, !old_sdata);
> +	selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
>  	if (!selem) {
>  		err = -ENOMEM;
>  		goto unlock_err;
> @@ -567,7 +601,7 @@ void bpf_sk_storage_free(struct sock *sk)
>  	 * Thus, no elem can be added-to or deleted-from the
>  	 * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
>  	 *
> -	 * It is racing with bpf_sk_storage_map_free() alone
> +	 * It is racing with bpf_local_storage_map_free() alone
This name change belongs to patch 1 also.

>  	 * when unlinking elem from the sk_storage->list and
>  	 * the map's bucket->list.
>  	 */
> @@ -587,17 +621,12 @@ void bpf_sk_storage_free(struct sock *sk)
>  		kfree_rcu(sk_storage, rcu);
>  }
>  
> -static void bpf_local_storage_map_free(struct bpf_map *map)
> +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
>  {
>  	struct bpf_local_storage_elem *selem;
> -	struct bpf_local_storage_map *smap;
>  	struct bpf_local_storage_map_bucket *b;
>  	unsigned int i;
>  
> -	smap = (struct bpf_local_storage_map *)map;
> -
> -	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
> -
>  	/* Note that this map might be concurrently cloned from
>  	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
>  	 * RCU read section to finish before proceeding. New RCU
> @@ -607,11 +636,12 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
>  
>  	/* bpf prog and the userspace can no longer access this map
>  	 * now.  No new selem (of this map) can be added
> -	 * to the sk->sk_bpf_storage or to the map bucket's list.
> +	 * to the bpf_local_storage or to the map bucket's list.
s/bpf_local_storage/owner->storage/

>  	 *
>  	 * The elem of this map can be cleaned up here
>  	 * or
> -	 * by bpf_sk_storage_free() during __sk_destruct().
> +	 * by bpf_local_storage_free() during the destruction of the
> +	 * owner object. eg. __sk_destruct.
This belongs to patch 1 also.

>  	 */
>  	for (i = 0; i < (1U << smap->bucket_log); i++) {
>  		b = &smap->buckets[i];
> @@ -627,22 +657,31 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
>  		rcu_read_unlock();
>  	}
>  
> -	/* bpf_sk_storage_free() may still need to access the map.
> -	 * e.g. bpf_sk_storage_free() has unlinked selem from the map
> +	/* bpf_local_storage_free() may still need to access the map.
It is confusing.  There is no bpf_local_storage_free().

> +	 * e.g. bpf_local_storage_free() has unlinked selem from the map
>  	 * which then made the above while((selem = ...)) loop
>  	 * exited immediately.
>  	 *
> -	 * However, the bpf_sk_storage_free() still needs to access
> +	 * However, the bpf_local_storage_free() still needs to access
Same here.

>  	 * the smap->elem_size to do the uncharging in
>  	 * bpf_selem_unlink_storage().
>  	 *
>  	 * Hence, wait another rcu grace period for the
> -	 * bpf_sk_storage_free() to finish.
> +	 * bpf_local_storage_free() to finish.
and here.

>  	 */
>  	synchronize_rcu();
>  
>  	kvfree(smap->buckets);
> -	kfree(map);
> +	kfree(smap);
> +}
> +
> +static void sk_storage_map_free(struct bpf_map *map)
> +{
> +	struct bpf_local_storage_map *smap;
> +
> +	smap = (struct bpf_local_storage_map *)map;
> +	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
> +	bpf_local_storage_map_free(smap);
>  }
>  
>  /* U16_MAX is much more than enough for sk local storage
> @@ -654,7 +693,7 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
>  	       sizeof(struct bpf_local_storage_elem)),			\
>  	      (U16_MAX - sizeof(struct bpf_local_storage_elem)))
>  
> -static int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
> +int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
>  {
>  	if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
>  	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
> @@ -673,7 +712,7 @@ static int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
>  	return 0;
>  }
>  
> -static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
> +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
>  {
>  	struct bpf_local_storage_map *smap;
>  	unsigned int i;
> @@ -711,9 +750,21 @@ static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
>  		raw_spin_lock_init(&smap->buckets[i].lock);
>  	}
>  
> -	smap->elem_size = sizeof(struct bpf_local_storage_elem) + attr->value_size;
> -	smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
> +	smap->elem_size =
> +		sizeof(struct bpf_local_storage_elem) + attr->value_size;
Same line has changed in patch 1.   Change the indentation in patch 1 also
if the above way is desired.

Others LGTM.

^ permalink raw reply

* Re: [PATCH bpf-next v8 2/7] bpf: Generalize caching for sk_storage.
From: Martin KaFai Lau @ 2020-08-17 23:57 UTC (permalink / raw)
  To: KP Singh
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200803164655.1924498-3-kpsingh@chromium.org>

On Mon, Aug 03, 2020 at 06:46:50PM +0200, KP Singh wrote:
> From: KP Singh <kpsingh@google.com>
> 
> Provide the a ability to define local storage caches on a per-object
> type basis. The caches and caching indices for different objects should
> not be inter-mixed as suggested in:
Acked-by: Martin KaFai Lau <kafai@fb.com>

^ permalink raw reply

* Re: [PATCH bpf-next v8 1/7] A purely mechanical change to split the renaming from the actual generalization.
From: Martin KaFai Lau @ 2020-08-17 23:56 UTC (permalink / raw)
  To: KP Singh
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200803164655.1924498-2-kpsingh@chromium.org>

On Mon, Aug 03, 2020 at 06:46:49PM +0200, KP Singh wrote:
> From: KP Singh <kpsingh@google.com>
> 
> Flags/consts:
> 
>   SK_STORAGE_CREATE_FLAG_MASK	BPF_LOCAL_STORAGE_CREATE_FLAG_MASK
>   BPF_SK_STORAGE_CACHE_SIZE	BPF_LOCAL_STORAGE_CACHE_SIZE
>   MAX_VALUE_SIZE		BPF_LOCAL_STORAGE_MAX_VALUE_SIZE
> 
> Structs:
> 
>   bucket			bpf_local_storage_map_bucket
>   bpf_sk_storage_map		bpf_local_storage_map
>   bpf_sk_storage_data		bpf_local_storage_data
>   bpf_sk_storage_elem		bpf_local_storage_elem
>   bpf_sk_storage		bpf_local_storage
> 
> The "sk" member in bpf_local_storage is also updated to "owner"
> in preparation for changing the type to void * in a subsequent patch.
> 
> Functions:
> 
>   selem_linked_to_sk			selem_linked_to_storage
>   selem_alloc				bpf_selem_alloc
>   __selem_unlink_sk			bpf_selem_unlink_storage
>   __selem_link_sk			bpf_selem_link_storage
>   selem_unlink_sk			__bpf_selem_unlink_storage
>   sk_storage_update			bpf_local_storage_update
>   __sk_storage_lookup			bpf_local_storage_lookup
>   bpf_sk_storage_map_free		bpf_local_storage_map_free
>   bpf_sk_storage_map_alloc		bpf_local_storage_map_alloc
>   bpf_sk_storage_map_alloc_check	bpf_local_storage_map_alloc_check
>   bpf_sk_storage_map_check_btf		bpf_local_storage_map_check_btf
> 

[ ... ]

> @@ -147,85 +148,86 @@ static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap,
>   * The caller must ensure selem->smap is still valid to be
>   * dereferenced for its smap->elem_size and smap->cache_idx.
>   */
> -static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage,
> -			      struct bpf_sk_storage_elem *selem,
> -			      bool uncharge_omem)
> +static bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
> +				     struct bpf_local_storage_elem *selem,
> +				     bool uncharge_omem)
Please add a "_nolock()" suffix, just to be clear that the unlink_map()
counter part is locked.  It could be a follow up later.

>  {
> -	struct bpf_sk_storage_map *smap;
> -	bool free_sk_storage;
> +	struct bpf_local_storage_map *smap;
> +	bool free_local_storage;
>  	struct sock *sk;
>  
>  	smap = rcu_dereference(SDATA(selem)->smap);
> -	sk = sk_storage->sk;
> +	sk = local_storage->owner;
>  
>  	/* All uncharging on sk->sk_omem_alloc must be done first.
> -	 * sk may be freed once the last selem is unlinked from sk_storage.
> +	 * sk may be freed once the last selem is unlinked from local_storage.
>  	 */
>  	if (uncharge_omem)
>  		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
>  
> -	free_sk_storage = hlist_is_singular_node(&selem->snode,
> -						 &sk_storage->list);
> -	if (free_sk_storage) {
> -		atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc);
> -		sk_storage->sk = NULL;
> +	free_local_storage = hlist_is_singular_node(&selem->snode,
> +						    &local_storage->list);
> +	if (free_local_storage) {
> +		atomic_sub(sizeof(struct bpf_local_storage), &sk->sk_omem_alloc);
> +		local_storage->owner = NULL;
>  		/* After this RCU_INIT, sk may be freed and cannot be used */
>  		RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
>  
> -		/* sk_storage is not freed now.  sk_storage->lock is
> -		 * still held and raw_spin_unlock_bh(&sk_storage->lock)
> +		/* local_storage is not freed now.  local_storage->lock is
> +		 * still held and raw_spin_unlock_bh(&local_storage->lock)
>  		 * will be done by the caller.
>  		 *
>  		 * Although the unlock will be done under
>  		 * rcu_read_lock(),  it is more intutivie to
> -		 * read if kfree_rcu(sk_storage, rcu) is done
> -		 * after the raw_spin_unlock_bh(&sk_storage->lock).
> +		 * read if kfree_rcu(local_storage, rcu) is done
> +		 * after the raw_spin_unlock_bh(&local_storage->lock).
>  		 *
> -		 * Hence, a "bool free_sk_storage" is returned
> +		 * Hence, a "bool free_local_storage" is returned
>  		 * to the caller which then calls the kfree_rcu()
>  		 * after unlock.
>  		 */
>  	}
>  	hlist_del_init_rcu(&selem->snode);
> -	if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) ==
> +	if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
>  	    SDATA(selem))
> -		RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL);
> +		RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
>  
>  	kfree_rcu(selem, rcu);
>  
> -	return free_sk_storage;
> +	return free_local_storage;
>  }
>  
> -static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
> +static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
>  {
> -	struct bpf_sk_storage *sk_storage;
> -	bool free_sk_storage = false;
> +	struct bpf_local_storage *local_storage;
> +	bool free_local_storage = false;
>  
> -	if (unlikely(!selem_linked_to_sk(selem)))
> +	if (unlikely(!selem_linked_to_storage(selem)))
>  		/* selem has already been unlinked from sk */
>  		return;
>  
> -	sk_storage = rcu_dereference(selem->sk_storage);
> -	raw_spin_lock_bh(&sk_storage->lock);
> -	if (likely(selem_linked_to_sk(selem)))
> -		free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
> -	raw_spin_unlock_bh(&sk_storage->lock);
> +	local_storage = rcu_dereference(selem->local_storage);
> +	raw_spin_lock_bh(&local_storage->lock);
> +	if (likely(selem_linked_to_storage(selem)))
> +		free_local_storage =
> +			bpf_selem_unlink_storage(local_storage, selem, true);
> +	raw_spin_unlock_bh(&local_storage->lock);
>  
> -	if (free_sk_storage)
> -		kfree_rcu(sk_storage, rcu);
> +	if (free_local_storage)
> +		kfree_rcu(local_storage, rcu);
>  }
>  
> -static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
> -			    struct bpf_sk_storage_elem *selem)
> +static void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
> +				   struct bpf_local_storage_elem *selem)
Same here. bpf_selem_link_storage"_nolock"().

Please tag the Subject line with "bpf:".

Acked-by: Martin KaFai Lau <kafai@fb.com>

^ permalink raw reply

* Re: [PATCH 2/3] IMA: add policy to support measuring critical data from kernel components
From: Tushar Sugandhi @ 2020-08-17 23:45 UTC (permalink / raw)
  To: Mimi Zohar, stephen.smalley.work, casey, gmazyland
  Cc: tyhicks, sashal, jmorris, linux-integrity, selinux,
	linux-security-module, linux-kernel, dm-devel, nramas
In-Reply-To: <97d25609b6a87f104cc88a2ff8ae52d3f2e4e387.camel@linux.ibm.com>



On 2020-08-17 4:43 p.m., Mimi Zohar wrote:
> On Mon, 2020-08-17 at 15:27 -0700, Tushar Sugandhi wrote:
> 
>>> scripts/Lindent isn't as prevalent as it used to be, but it's still
>>> included in Documentation/process/coding-style.rst.  Use it as a guide.
>> Thanks for the pointer. We'll use scripts/Lindent going forward
> 
> Please don't change existing code to conform to it.  Use it as a
> guide/suggestion for new code.
> 
> Mimi
> 
> 
Will do.
Again, appreciate your feedback.

^ permalink raw reply

* Re: [PATCH 2/3] IMA: add policy to support measuring critical data from kernel components
From: Mimi Zohar @ 2020-08-17 23:43 UTC (permalink / raw)
  To: Tushar Sugandhi, stephen.smalley.work, casey, gmazyland
  Cc: tyhicks, sashal, jmorris, linux-integrity, selinux,
	linux-security-module, linux-kernel, dm-devel, nramas
In-Reply-To: <5275268e-2ce8-0129-b11d-8419ac384262@linux.microsoft.com>

On Mon, 2020-08-17 at 15:27 -0700, Tushar Sugandhi wrote:

> > scripts/Lindent isn't as prevalent as it used to be, but it's still
> > included in Documentation/process/coding-style.rst.  Use it as a guide.
> Thanks for the pointer. We'll use scripts/Lindent going forward

Please don't change existing code to conform to it.  Use it as a
guide/suggestion for new code.

Mimi




^ permalink raw reply

* Re: [PATCH 2/2] SELinux: Measure state and hash of policy using IMA
From: Lakshmi Ramasubramanian @ 2020-08-17 23:20 UTC (permalink / raw)
  To: Mimi Zohar, Casey Schaufler, Stephen Smalley
  Cc: Tyler Hicks, tusharsu, sashal, James Morris, linux-integrity,
	SElinux list, LSM List, linux-kernel, paul Moore
In-Reply-To: <082a4311cd9211475df4c694f310f652d51e5d64.camel@linux.ibm.com>

On 8/17/20 4:11 PM, Mimi Zohar wrote:
> On Mon, 2020-08-17 at 15:33 -0700, Lakshmi Ramasubramanian wrote:
>> On 8/17/20 3:00 PM, Casey Schaufler wrote:
>>> On 8/17/2020 2:31 PM, Mimi Zohar wrote:
>>>> On Thu, 2020-08-13 at 14:13 -0400, Stephen Smalley wrote:
>>>>> On Thu, Aug 13, 2020 at 2:03 PM Lakshmi Ramasubramanian
>>>>> <nramas@linux.microsoft.com> wrote:
>>>>>> On 8/13/20 10:58 AM, Stephen Smalley wrote:
>>>>>>> On Thu, Aug 13, 2020 at 1:52 PM Lakshmi Ramasubramanian
>>>>>>> <nramas@linux.microsoft.com> wrote:
>>>>>>>> On 8/13/20 10:42 AM, Stephen Smalley wrote:
>>>>>>>>
>>>>>>>>>> diff --git a/security/selinux/measure.c b/security/selinux/measure.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 000000000000..f21b7de4e2ae
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/security/selinux/measure.c
>>>>>>>>>> @@ -0,0 +1,204 @@
>>>>>>>>>> +static int selinux_hash_buffer(void *buf, size_t buf_len,
>>>>>>>>>> +                   void **buf_hash, int *buf_hash_len)
>>>>>>>>>> +{
>>>>>>>>>> +    struct crypto_shash *tfm;
>>>>>>>>>> +    struct shash_desc *desc = NULL;
>>>>>>>>>> +    void *digest = NULL;
>>>>>>>>>> +    int desc_size;
>>>>>>>>>> +    int digest_size;
>>>>>>>>>> +    int ret = 0;
>>>>>>>>>> +
>>>>>>>>>> +    tfm = crypto_alloc_shash("sha256", 0, 0);
>>>>>>>>>> +    if (IS_ERR(tfm))
>>>>>>>>>> +        return PTR_ERR(tfm);
>>>>>>>>> Can we make the algorithm selectable via kernel parameter and/or writing
>>>>>>>>> to a new selinuxfs node?
>>>>>>>> I can add a kernel parameter to select this hash algorithm.
>>>>>>> Also can we provide a Kconfig option for the default value like IMA does?
>>>>>>>
>>>>>> Would we need both - Kconfig and kernel param?
>>>>>>
>>>>>> The other option is to provide an IMA function to return the current
>>>>>> hash algorithm used for measurement. That way a consistent hash
>>>>>> algorithm can be employed by both IMA and the callers. Would that be better?
>>>>> This is why I preferred just passing the serialized policy buffer to
>>>>> IMA and letting it handle the hashing.  But apparently that approach
>>>>> wouldn't fly.  IMA appears to support both a Kconfig option for
>>>>> selecting a default algorithm and a kernel parameter for overriding
>>>>> it.  I assume the idea is that the distros can pick a reasonable
>>>>> default and then the end users can override that if they have specific
>>>>> requirements.  I'd want the same for SELinux.  If IMA is willing to
>>>>> export its hash algorithm to external components, then I'm willing to
>>>>> reuse that but not sure if that's a layering violation.
>>>> With the new ima_measure_critical_data() hook, I agree with you and
>>>> Casey it doesn't make sense for each caller to have to write their own
>>>> function.  Casey suggested exporting IMA's hash function or defining a
>>>> new common hash function.   There's nothing specific to IMA.
>>>
>>> Except that no one is going to use the function unless they're
>>> doing an IMA operation.
>>
>> Can we do the following instead:
>>
>> In ima_measure_critical_data() IMA hook, we can add another param for
>> the caller to indicate whether
>>
>>    => The contents of "buf" needs to be measured
>>       OR
>>    => Hash of the contents of "buf" needs to be measured.
>>
>> This way IMA doesn't need to export any new function to meet the hashing
>> requirement.
> 
> I'm not sure overloading the parameters is a good idea, but extending
> ima_measure_critical_data() to calculate a simple buffer hash should be
> fine.
> 

Sorry I wasn't clear - I didn't mean to say overload existing 
parameters, but extending the IMA hook to calculate the hash of the 
buffer - like the following:

int ima_measure_critical_data(const char *event_name,
                               const char *event_data_source,
                               const void *buf, int buf_len,
                               bool measure_buf_hash);

If measure_buf_hash is true, IMA will calculate the hash of contents of 
"buf" and measure the hash.
Else, IMA will measure the contents of "buf".

  -lakshmi



^ permalink raw reply

* Re: [PATCH 2/2] SELinux: Measure state and hash of policy using IMA
From: Mimi Zohar @ 2020-08-17 23:11 UTC (permalink / raw)
  To: Lakshmi Ramasubramanian, Casey Schaufler, Stephen Smalley
  Cc: Tyler Hicks, tusharsu, sashal, James Morris, linux-integrity,
	SElinux list, LSM List, linux-kernel, paul Moore
In-Reply-To: <089ca24d-863b-ca84-4859-d2d6e4f09b4c@linux.microsoft.com>

On Mon, 2020-08-17 at 15:33 -0700, Lakshmi Ramasubramanian wrote:
> On 8/17/20 3:00 PM, Casey Schaufler wrote:
> > On 8/17/2020 2:31 PM, Mimi Zohar wrote:
> > > On Thu, 2020-08-13 at 14:13 -0400, Stephen Smalley wrote:
> > > > On Thu, Aug 13, 2020 at 2:03 PM Lakshmi Ramasubramanian
> > > > <nramas@linux.microsoft.com> wrote:
> > > > > On 8/13/20 10:58 AM, Stephen Smalley wrote:
> > > > > > On Thu, Aug 13, 2020 at 1:52 PM Lakshmi Ramasubramanian
> > > > > > <nramas@linux.microsoft.com> wrote:
> > > > > > > On 8/13/20 10:42 AM, Stephen Smalley wrote:
> > > > > > > 
> > > > > > > > > diff --git a/security/selinux/measure.c b/security/selinux/measure.c
> > > > > > > > > new file mode 100644
> > > > > > > > > index 000000000000..f21b7de4e2ae
> > > > > > > > > --- /dev/null
> > > > > > > > > +++ b/security/selinux/measure.c
> > > > > > > > > @@ -0,0 +1,204 @@
> > > > > > > > > +static int selinux_hash_buffer(void *buf, size_t buf_len,
> > > > > > > > > +                   void **buf_hash, int *buf_hash_len)
> > > > > > > > > +{
> > > > > > > > > +    struct crypto_shash *tfm;
> > > > > > > > > +    struct shash_desc *desc = NULL;
> > > > > > > > > +    void *digest = NULL;
> > > > > > > > > +    int desc_size;
> > > > > > > > > +    int digest_size;
> > > > > > > > > +    int ret = 0;
> > > > > > > > > +
> > > > > > > > > +    tfm = crypto_alloc_shash("sha256", 0, 0);
> > > > > > > > > +    if (IS_ERR(tfm))
> > > > > > > > > +        return PTR_ERR(tfm);
> > > > > > > > Can we make the algorithm selectable via kernel parameter and/or writing
> > > > > > > > to a new selinuxfs node?
> > > > > > > I can add a kernel parameter to select this hash algorithm.
> > > > > > Also can we provide a Kconfig option for the default value like IMA does?
> > > > > > 
> > > > > Would we need both - Kconfig and kernel param?
> > > > > 
> > > > > The other option is to provide an IMA function to return the current
> > > > > hash algorithm used for measurement. That way a consistent hash
> > > > > algorithm can be employed by both IMA and the callers. Would that be better?
> > > > This is why I preferred just passing the serialized policy buffer to
> > > > IMA and letting it handle the hashing.  But apparently that approach
> > > > wouldn't fly.  IMA appears to support both a Kconfig option for
> > > > selecting a default algorithm and a kernel parameter for overriding
> > > > it.  I assume the idea is that the distros can pick a reasonable
> > > > default and then the end users can override that if they have specific
> > > > requirements.  I'd want the same for SELinux.  If IMA is willing to
> > > > export its hash algorithm to external components, then I'm willing to
> > > > reuse that but not sure if that's a layering violation.
> > > With the new ima_measure_critical_data() hook, I agree with you and
> > > Casey it doesn't make sense for each caller to have to write their own
> > > function.  Casey suggested exporting IMA's hash function or defining a
> > > new common hash function.   There's nothing specific to IMA.
> > 
> > Except that no one is going to use the function unless they're
> > doing an IMA operation.
> 
> Can we do the following instead:
> 
> In ima_measure_critical_data() IMA hook, we can add another param for 
> the caller to indicate whether
> 
>   => The contents of "buf" needs to be measured
>      OR
>   => Hash of the contents of "buf" needs to be measured.
> 
> This way IMA doesn't need to export any new function to meet the hashing 
> requirement.

I'm not sure overloading the parameters is a good idea, but extending
ima_measure_critical_data() to calculate a simple buffer hash should be
fine.

Mimi


^ permalink raw reply

* Re: file metadata via fs API
From: Linus Torvalds @ 2020-08-17 22:44 UTC (permalink / raw)
  To: Steven Whitehouse
  Cc: David Howells, Miklos Szeredi, linux-fsdevel, Al Viro, Karel Zak,
	Jeff Layton, Miklos Szeredi, Nicolas Dichtel, Christian Brauner,
	Lennart Poettering, Linux API, Ian Kent, LSM,
	Linux Kernel Mailing List
In-Reply-To: <CAHk-=wig0ZqWxgWtD9F1xZzE7jEmgLmXRWABhss0+er3ZRtb9g@mail.gmail.com>

On Mon, Aug 17, 2020 at 10:15 AM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> So it has this very complex "random structures of random things"
> implementation. It's a huge sign of over-design and "I don't know what
> the hell I want to expose, so I'll make this generic thing that can
> expose anything, and then I start adding random fields".

You can see the overdesign in other places too: that "time
granularity" is some very odd stuff. It doesn't actually even match
the kernel granularity rules, so that fsinfo interface is basically
exporting random crap that doesn't match reality.

In the kernel, we give the granularity in nsec, but for some reason
that fsinfo stuff gives it in some hand-written pseudo-floating-point
format. Why? Don't ask me.

And do we really want to have that whole odd Nth/Mth thing?
Considering that it cannot be consistent or atomic, and the complaint
against the /proc interfaces have been about that part, it really
smells completely bogus.

So please. Can we just make a simple extended statfs() and be done
with it, instead of this hugely complex thing that does five different
things with the same interface and makes it really odd as a result?

                  Linus
So honestly,  there's a

^ permalink raw reply

* Re: [PATCH 2/2] SELinux: Measure state and hash of policy using IMA
From: Lakshmi Ramasubramanian @ 2020-08-17 22:33 UTC (permalink / raw)
  To: Casey Schaufler, Mimi Zohar, Stephen Smalley
  Cc: Tyler Hicks, tusharsu, sashal, James Morris, linux-integrity,
	SElinux list, LSM List, linux-kernel, paul Moore
In-Reply-To: <57f972a7-26f1-3ac7-4001-54c0bc7e12a8@schaufler-ca.com>

On 8/17/20 3:00 PM, Casey Schaufler wrote:
> On 8/17/2020 2:31 PM, Mimi Zohar wrote:
>> On Thu, 2020-08-13 at 14:13 -0400, Stephen Smalley wrote:
>>> On Thu, Aug 13, 2020 at 2:03 PM Lakshmi Ramasubramanian
>>> <nramas@linux.microsoft.com> wrote:
>>>> On 8/13/20 10:58 AM, Stephen Smalley wrote:
>>>>> On Thu, Aug 13, 2020 at 1:52 PM Lakshmi Ramasubramanian
>>>>> <nramas@linux.microsoft.com> wrote:
>>>>>> On 8/13/20 10:42 AM, Stephen Smalley wrote:
>>>>>>
>>>>>>>> diff --git a/security/selinux/measure.c b/security/selinux/measure.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..f21b7de4e2ae
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/security/selinux/measure.c
>>>>>>>> @@ -0,0 +1,204 @@
>>>>>>>> +static int selinux_hash_buffer(void *buf, size_t buf_len,
>>>>>>>> +                   void **buf_hash, int *buf_hash_len)
>>>>>>>> +{
>>>>>>>> +    struct crypto_shash *tfm;
>>>>>>>> +    struct shash_desc *desc = NULL;
>>>>>>>> +    void *digest = NULL;
>>>>>>>> +    int desc_size;
>>>>>>>> +    int digest_size;
>>>>>>>> +    int ret = 0;
>>>>>>>> +
>>>>>>>> +    tfm = crypto_alloc_shash("sha256", 0, 0);
>>>>>>>> +    if (IS_ERR(tfm))
>>>>>>>> +        return PTR_ERR(tfm);
>>>>>>> Can we make the algorithm selectable via kernel parameter and/or writing
>>>>>>> to a new selinuxfs node?
>>>>>> I can add a kernel parameter to select this hash algorithm.
>>>>> Also can we provide a Kconfig option for the default value like IMA does?
>>>>>
>>>> Would we need both - Kconfig and kernel param?
>>>>
>>>> The other option is to provide an IMA function to return the current
>>>> hash algorithm used for measurement. That way a consistent hash
>>>> algorithm can be employed by both IMA and the callers. Would that be better?
>>> This is why I preferred just passing the serialized policy buffer to
>>> IMA and letting it handle the hashing.  But apparently that approach
>>> wouldn't fly.  IMA appears to support both a Kconfig option for
>>> selecting a default algorithm and a kernel parameter for overriding
>>> it.  I assume the idea is that the distros can pick a reasonable
>>> default and then the end users can override that if they have specific
>>> requirements.  I'd want the same for SELinux.  If IMA is willing to
>>> export its hash algorithm to external components, then I'm willing to
>>> reuse that but not sure if that's a layering violation.
>> With the new ima_measure_critical_data() hook, I agree with you and
>> Casey it doesn't make sense for each caller to have to write their own
>> function.  Casey suggested exporting IMA's hash function or defining a
>> new common hash function.   There's nothing specific to IMA.
> 
> Except that no one is going to use the function unless they're
> doing an IMA operation.

Can we do the following instead:

In ima_measure_critical_data() IMA hook, we can add another param for 
the caller to indicate whether

  => The contents of "buf" needs to be measured
     OR
  => Hash of the contents of "buf" needs to be measured.

This way IMA doesn't need to export any new function to meet the hashing 
requirement.

  -lakshmi

> 
>>    Should
>> the common hash function be prefixed with "security_"?
> 
> Yuck. That prefix is for interfaces that are exported outside the
> security sub-system. We're talking about a function that is provided
> for use within the security sub-system, but not for any one particular
> security module or non-module feature. We're currently using the lsm_
> prefix for interfaces used within the security subsystem, so I suggest
> lsm_hash_brown_potatoes() might be the way to go.


> 
>>
>> Like when we add a new security hook call, the new LSM call is separate
>> from any other change.   Please break up this patch with the SELinux
>> specific pieces separated from the ima_measure_critical_data() call as
>> much as possible.
>>


^ permalink raw reply

* Re: [PATCH 2/3] IMA: add policy to support measuring critical data from kernel components
From: Tushar Sugandhi @ 2020-08-17 22:27 UTC (permalink / raw)
  To: Mimi Zohar, stephen.smalley.work, casey, gmazyland
  Cc: tyhicks, sashal, jmorris, linux-integrity, selinux,
	linux-security-module, linux-kernel, dm-devel, nramas
In-Reply-To: <591b5f09c7df8ef0378866eaf3afde7a7cb4e82f.camel@linux.ibm.com>



On 2020-08-17 1:43 p.m., Mimi Zohar wrote:
> On Wed, 2020-08-12 at 12:31 -0700, Tushar Sugandhi wrote:
>> There would be several candidate kernel components suitable for IMA
>> measurement. Not all of them would be enlightened for IMA measurement.
>> Also, system administrators may not want to measure data for all of
>> them, even when they are enlightened for IMA measurements. An IMA policy
>> specific to various kernel components is needed to measure their
>> respective critical data.
>>
>> Add a new IMA policy CRITICAL_DATA+data_sources to support measuring
>> various critical kernel components. This policy would enable the
>> system administrators to limit the measurement to the components,
>> if the components are enlightened for IMA measurement.
> 
> "enlightened", really?  Please find a different term, maybe something
> like "supported".
Thanks for the feedback Mimi. Will do.
> 
> Before posting a patch set, please look at the patches line by line,
> like anyone reviewing the code needs to do.  Please minimize code
> change.   Unnecessary formatting changes are unacceptible.   For
> example, like the "#define", below, or in 3/3 the
Thanks for the feedback Mimi.
We extensively reviewed the patches internally before submitting for
upstream review.
We believed adding an extra tab was necessary so that all the previous
values were aligned with the new one - #define IMA_DATA_SOURCES.
We can certainly revert back to only IMA_DATA_SOURCES to have an extra
tab.
> "process_buffer_measurement()" change from void to int.
> 
This was also intentional, and was reviewed internally.
The feedback was we should let the consumers of
process_buffer_measurement() decide whether to use the return
value or not. With void, we are not giving them any choice.

> scripts/Lindent isn't as prevalent as it used to be, but it's still
> included in Documentation/process/coding-style.rst.  Use it as a guide.
Thanks for the pointer. We'll use scripts/Lindent going forward.
> 
> Mimi
> 

^ permalink raw reply

* Re: [PATCH 2/2] SELinux: Measure state and hash of policy using IMA
From: Casey Schaufler @ 2020-08-17 22:00 UTC (permalink / raw)
  To: Mimi Zohar, Stephen Smalley, Lakshmi Ramasubramanian
  Cc: Tyler Hicks, tusharsu, sashal, James Morris, linux-integrity,
	SElinux list, LSM List, linux-kernel, paul Moore, Casey Schaufler
In-Reply-To: <3679df359c35561f5bf6608911f96cc0292c7854.camel@linux.ibm.com>

On 8/17/2020 2:31 PM, Mimi Zohar wrote:
> On Thu, 2020-08-13 at 14:13 -0400, Stephen Smalley wrote:
>> On Thu, Aug 13, 2020 at 2:03 PM Lakshmi Ramasubramanian
>> <nramas@linux.microsoft.com> wrote:
>>> On 8/13/20 10:58 AM, Stephen Smalley wrote:
>>>> On Thu, Aug 13, 2020 at 1:52 PM Lakshmi Ramasubramanian
>>>> <nramas@linux.microsoft.com> wrote:
>>>>> On 8/13/20 10:42 AM, Stephen Smalley wrote:
>>>>>
>>>>>>> diff --git a/security/selinux/measure.c b/security/selinux/measure.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..f21b7de4e2ae
>>>>>>> --- /dev/null
>>>>>>> +++ b/security/selinux/measure.c
>>>>>>> @@ -0,0 +1,204 @@
>>>>>>> +static int selinux_hash_buffer(void *buf, size_t buf_len,
>>>>>>> +                   void **buf_hash, int *buf_hash_len)
>>>>>>> +{
>>>>>>> +    struct crypto_shash *tfm;
>>>>>>> +    struct shash_desc *desc = NULL;
>>>>>>> +    void *digest = NULL;
>>>>>>> +    int desc_size;
>>>>>>> +    int digest_size;
>>>>>>> +    int ret = 0;
>>>>>>> +
>>>>>>> +    tfm = crypto_alloc_shash("sha256", 0, 0);
>>>>>>> +    if (IS_ERR(tfm))
>>>>>>> +        return PTR_ERR(tfm);
>>>>>> Can we make the algorithm selectable via kernel parameter and/or writing
>>>>>> to a new selinuxfs node?
>>>>> I can add a kernel parameter to select this hash algorithm.
>>>> Also can we provide a Kconfig option for the default value like IMA does?
>>>>
>>> Would we need both - Kconfig and kernel param?
>>>
>>> The other option is to provide an IMA function to return the current
>>> hash algorithm used for measurement. That way a consistent hash
>>> algorithm can be employed by both IMA and the callers. Would that be better?
>> This is why I preferred just passing the serialized policy buffer to
>> IMA and letting it handle the hashing.  But apparently that approach
>> wouldn't fly.  IMA appears to support both a Kconfig option for
>> selecting a default algorithm and a kernel parameter for overriding
>> it.  I assume the idea is that the distros can pick a reasonable
>> default and then the end users can override that if they have specific
>> requirements.  I'd want the same for SELinux.  If IMA is willing to
>> export its hash algorithm to external components, then I'm willing to
>> reuse that but not sure if that's a layering violation.
> With the new ima_measure_critical_data() hook, I agree with you and
> Casey it doesn't make sense for each caller to have to write their own
> function.  Casey suggested exporting IMA's hash function or defining a
> new common hash function.   There's nothing specific to IMA.

Except that no one is going to use the function unless they're
doing an IMA operation. 

>   Should
> the common hash function be prefixed with "security_"?

Yuck. That prefix is for interfaces that are exported outside the
security sub-system. We're talking about a function that is provided
for use within the security sub-system, but not for any one particular
security module or non-module feature. We're currently using the lsm_
prefix for interfaces used within the security subsystem, so I suggest
lsm_hash_brown_potatoes() might be the way to go.

>
> Like when we add a new security hook call, the new LSM call is separate
> from any other change.   Please break up this patch with the SELinux
> specific pieces separated from the ima_measure_critical_data() call as
> much as possible.
>
> thanks,
>
> Mimi
>

^ permalink raw reply

* Re: [PATCH 2/2] SELinux: Measure state and hash of policy using IMA
From: Mimi Zohar @ 2020-08-17 21:31 UTC (permalink / raw)
  To: Stephen Smalley, Lakshmi Ramasubramanian
  Cc: Casey Schaufler, Tyler Hicks, tusharsu, sashal, James Morris,
	linux-integrity, SElinux list, LSM List, linux-kernel, paul Moore
In-Reply-To: <CAEjxPJ7uWee5jjALtQ3azMvKRMk8pxFiYByWmYVhjgJiMNZ8ww@mail.gmail.com>

On Thu, 2020-08-13 at 14:13 -0400, Stephen Smalley wrote:
> On Thu, Aug 13, 2020 at 2:03 PM Lakshmi Ramasubramanian
> <nramas@linux.microsoft.com> wrote:
> > On 8/13/20 10:58 AM, Stephen Smalley wrote:
> > > On Thu, Aug 13, 2020 at 1:52 PM Lakshmi Ramasubramanian
> > > <nramas@linux.microsoft.com> wrote:
> > > > On 8/13/20 10:42 AM, Stephen Smalley wrote:
> > > > 
> > > > > > diff --git a/security/selinux/measure.c b/security/selinux/measure.c
> > > > > > new file mode 100644
> > > > > > index 000000000000..f21b7de4e2ae
> > > > > > --- /dev/null
> > > > > > +++ b/security/selinux/measure.c
> > > > > > @@ -0,0 +1,204 @@
> > > > > > +static int selinux_hash_buffer(void *buf, size_t buf_len,
> > > > > > +                   void **buf_hash, int *buf_hash_len)
> > > > > > +{
> > > > > > +    struct crypto_shash *tfm;
> > > > > > +    struct shash_desc *desc = NULL;
> > > > > > +    void *digest = NULL;
> > > > > > +    int desc_size;
> > > > > > +    int digest_size;
> > > > > > +    int ret = 0;
> > > > > > +
> > > > > > +    tfm = crypto_alloc_shash("sha256", 0, 0);
> > > > > > +    if (IS_ERR(tfm))
> > > > > > +        return PTR_ERR(tfm);
> > > > > Can we make the algorithm selectable via kernel parameter and/or writing
> > > > > to a new selinuxfs node?
> > > > 
> > > > I can add a kernel parameter to select this hash algorithm.
> > > 
> > > Also can we provide a Kconfig option for the default value like IMA does?
> > > 
> > 
> > Would we need both - Kconfig and kernel param?
> > 
> > The other option is to provide an IMA function to return the current
> > hash algorithm used for measurement. That way a consistent hash
> > algorithm can be employed by both IMA and the callers. Would that be better?
> 
> This is why I preferred just passing the serialized policy buffer to
> IMA and letting it handle the hashing.  But apparently that approach
> wouldn't fly.  IMA appears to support both a Kconfig option for
> selecting a default algorithm and a kernel parameter for overriding
> it.  I assume the idea is that the distros can pick a reasonable
> default and then the end users can override that if they have specific
> requirements.  I'd want the same for SELinux.  If IMA is willing to
> export its hash algorithm to external components, then I'm willing to
> reuse that but not sure if that's a layering violation.

With the new ima_measure_critical_data() hook, I agree with you and
Casey it doesn't make sense for each caller to have to write their own
function.  Casey suggested exporting IMA's hash function or defining a
new common hash function.   There's nothing specific to IMA.  Should
the common hash function be prefixed with "security_"?

Like when we add a new security hook call, the new LSM call is separate
from any other change.   Please break up this patch with the SELinux
specific pieces separated from the ima_measure_critical_data() call as
much as possible.

thanks,

Mimi


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox