Linux Security Modules development

Linux Security Modules development
 help / color / mirror / Atom feed

* [PATCH v2 2/5] IMA: Define an IMA hook to measure LSM data
From: Lakshmi Ramasubramanian @ 2020-07-16 17:43 UTC (permalink / raw)
  To: zohar, stephen.smalley.work, casey
  Cc: jmorris, linux-integrity, selinux, linux-security-module,
	linux-kernel
In-Reply-To: <20200716174351.20128-1-nramas@linux.microsoft.com>

IMA subsystem needs to define an IMA hook that the security modules can
call to measure critical data of the security modules.

Define a new IMA hook, namely ima_lsm_state(), that the security modules
can call to measure data.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
---
 include/linux/ima.h               |  4 ++++
 security/integrity/ima/ima_main.c | 17 +++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 9164e1534ec9..7e2686f4953a 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -26,6 +26,7 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 extern void ima_post_path_mknod(struct dentry *dentry);
 extern int ima_file_hash(struct file *file, char *buf, size_t buf_size);
 extern void ima_kexec_cmdline(const void *buf, int size);
+extern void ima_lsm_state(const char *lsm_event_name, const void *buf, int size);
 
 #ifdef CONFIG_IMA_KEXEC
 extern void ima_add_kexec_buffer(struct kimage *image);
@@ -104,6 +105,9 @@ static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size)
 }
 
 static inline void ima_kexec_cmdline(const void *buf, int size) {}
+
+static inline void ima_lsm_state(const char *lsm_event_name,
+				 const void *buf, int size) {}
 #endif /* CONFIG_IMA */
 
 #ifndef CONFIG_IMA_KEXEC
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 8351b2fd48e0..04d9a1d35300 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -835,6 +835,23 @@ void ima_kexec_cmdline(const void *buf, int size)
 					   KEXEC_CMDLINE, 0, NULL);
 }
 
+/**
+ * ima_lsm_state - measure LSM specific state
+ * @lsm_event_name: LSM event
+ * @buf: pointer to buffer containing LSM specific state
+ * @size: Number of bytes in buf
+ *
+ * Buffers can only be measured, not appraised.
+ */
+void ima_lsm_state(const char *lsm_event_name, const void *buf, int size)
+{
+	if (!lsm_event_name || !buf || !size)
+		return;
+
+	process_buffer_measurement(buf, size, lsm_event_name,
+				   LSM_STATE, 0, NULL);
+}
+
 static int __init init_ima(void)
 {
 	int error;
-- 
2.27.0


^ permalink raw reply related

* [PATCH v2 5/5] LSM: Define workqueue for measuring security module state
From: Lakshmi Ramasubramanian @ 2020-07-16 17:43 UTC (permalink / raw)
  To: zohar, stephen.smalley.work, casey
  Cc: jmorris, linux-integrity, selinux, linux-security-module,
	linux-kernel
In-Reply-To: <20200716174351.20128-1-nramas@linux.microsoft.com>

Data structures critical to the functioning of a security module could
be tampered with by malware or changed inadvertently at runtime
thereby disabling or reducing the security guarantees provided by
the security module. Such critical data need to be periodically checked
and measured, if there is any change. This would enable an attestation
service, for instance, to verify that the security modules are operating
with the configuration and policy setup by the system administrator.

Define a workqueue in the LSM and invoke the security modules in
the workqueue handler to check their data and measure.

Note that the data given by the security module would be measured by
the IMA subsystem only if it has changed since the last time it was
measured.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
---
 security/security.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/security/security.c b/security/security.c
index 1afa2aebc3ac..63b30da337d8 100644
--- a/security/security.c
+++ b/security/security.c
@@ -89,6 +89,11 @@ static __initdata struct lsm_info *exclusive;
 static struct lsm_info *security_state_lsms;
 static int security_state_lsms_count;
 
+static long security_state_timeout = 300000; /* 5 Minutes */
+static void security_state_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(security_state_delayed_work,
+			    security_state_handler);
+
 static __initdata bool debug;
 #define init_debug(...)						\
 	do {							\
@@ -277,6 +282,26 @@ static void __init initialize_security_state_lsms(void)
 	security_state_lsms_count = count;
 }
 
+static void initialize_security_state_monitor(void)
+{
+	if (security_state_lsms_count == 0)
+		return;
+
+	schedule_delayed_work(&security_state_delayed_work,
+			      msecs_to_jiffies(security_state_timeout));
+}
+
+static void security_state_handler(struct work_struct *work)
+{
+	int inx;
+
+	for (inx = 0; inx < security_state_lsms_count; inx++)
+		measure_security_state(&(security_state_lsms[inx]));
+
+	schedule_delayed_work(&security_state_delayed_work,
+			      msecs_to_jiffies(security_state_timeout));
+}
+
 /* Populate ordered LSMs list from comma-separated LSM name list. */
 static void __init ordered_lsm_parse(const char *order, const char *origin)
 {
@@ -400,6 +425,7 @@ static void __init ordered_lsm_init(void)
 	}
 
 	initialize_security_state_lsms();
+	initialize_security_state_monitor();
 
 	kfree(ordered_lsms);
 }
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 3/5] LSM: Add security_measure_data in lsm_info struct
From: Lakshmi Ramasubramanian @ 2020-07-16 17:43 UTC (permalink / raw)
  To: zohar, stephen.smalley.work, casey
  Cc: jmorris, linux-integrity, selinux, linux-security-module,
	linux-kernel
In-Reply-To: <20200716174351.20128-1-nramas@linux.microsoft.com>

The security modules that require their data to be measured using
the IMA subsystem need to define a function that the LSM can call
to trigger the measurement.

Add a function pointer field namely security_measure_data in lsm_info
structure. Update LSM to call this security module function, if defined,
to measure the security module's data using the IMA subsystem.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
---
 include/linux/lsm_hooks.h |  3 +++
 security/security.c       | 48 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 95b7c1d32062..56f0e524838c 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1591,6 +1591,9 @@ struct lsm_info {
 	int *enabled;		/* Optional: controlled by CONFIG_LSM */
 	int (*init)(void);	/* Required. */
 	struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */
+	int (*security_measure_data)(void); /* Optional: for measuring
+					     * security module data.
+					     */
 };
 
 extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
diff --git a/security/security.c b/security/security.c
index 70a7ad357bc6..1afa2aebc3ac 100644
--- a/security/security.c
+++ b/security/security.c
@@ -86,6 +86,9 @@ static __initconst const char * const builtin_lsm_order = CONFIG_LSM;
 static __initdata struct lsm_info **ordered_lsms;
 static __initdata struct lsm_info *exclusive;
 
+static struct lsm_info *security_state_lsms;
+static int security_state_lsms_count;
+
 static __initdata bool debug;
 #define init_debug(...)						\
 	do {							\
@@ -235,6 +238,45 @@ static void __init initialize_lsm(struct lsm_info *lsm)
 	}
 }
 
+static int measure_security_state(struct lsm_info *lsm)
+{
+	if (!lsm->security_measure_data)
+		return 0;
+
+	return lsm->security_measure_data();
+}
+
+static void __init initialize_security_state_lsms(void)
+{
+	struct lsm_info **lsm;
+	int count = 0;
+	int inx;
+
+	for (lsm = ordered_lsms; *lsm; lsm++) {
+		if ((*lsm)->security_measure_data)
+			count++;
+	}
+
+	if (count == 0)
+		return;
+
+	security_state_lsms = kcalloc(count, sizeof(struct lsm_info),
+				      GFP_KERNEL);
+	if (!security_state_lsms)
+		return;
+
+	inx = 0;
+	for (lsm = ordered_lsms; *lsm; lsm++) {
+		if ((*lsm)->security_measure_data) {
+			security_state_lsms[inx].security_measure_data =
+				(*lsm)->security_measure_data;
+			inx++;
+		}
+	}
+
+	security_state_lsms_count = count;
+}
+
 /* Populate ordered LSMs list from comma-separated LSM name list. */
 static void __init ordered_lsm_parse(const char *order, const char *origin)
 {
@@ -352,8 +394,12 @@ static void __init ordered_lsm_init(void)
 
 	lsm_early_cred((struct cred *) current->cred);
 	lsm_early_task(current);
-	for (lsm = ordered_lsms; *lsm; lsm++)
+	for (lsm = ordered_lsms; *lsm; lsm++) {
 		initialize_lsm(*lsm);
+		measure_security_state(*lsm);
+	}
+
+	initialize_security_state_lsms();
 
 	kfree(ordered_lsms);
 }
-- 
2.27.0


^ permalink raw reply related

* [PATCH v2 1/5] IMA: Add LSM_STATE func to measure LSM data
From: Lakshmi Ramasubramanian @ 2020-07-16 17:43 UTC (permalink / raw)
  To: zohar, stephen.smalley.work, casey
  Cc: jmorris, linux-integrity, selinux, linux-security-module,
	linux-kernel
In-Reply-To: <20200716174351.20128-1-nramas@linux.microsoft.com>

Critical data structures of security modules need to be measured to
enable an attestation service to verify if the policies and
configuration have been setup correctly and that they haven't been
tampered with at runtime. A new IMA policy is required for handling
this measurement.

Define a new IMA policy func namely LSM_STATE to measure data provided
by security modules. Update ima_match_rules() to check for LSM_STATE
and ima_parse_rule() to handle LSM_STATE.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
---
 Documentation/ABI/testing/ima_policy |  6 +++++-
 security/integrity/ima/ima.h         |  1 +
 security/integrity/ima/ima_api.c     |  2 +-
 security/integrity/ima/ima_policy.c  | 29 +++++++++++++++++++++++-----
 4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index cd572912c593..355bc3eade33 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -29,7 +29,7 @@ Description:
 		base: 	func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
 				[FIRMWARE_CHECK]
 				[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
-				[KEXEC_CMDLINE] [KEY_CHECK]
+				[KEXEC_CMDLINE] [KEY_CHECK] [LSM_STATE]
 			mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
 			       [[^]MAY_EXEC]
 			fsmagic:= hex value
@@ -125,3 +125,7 @@ Description:
 		keys added to .builtin_trusted_keys or .ima keyring:
 
 			measure func=KEY_CHECK keyrings=.builtin_trusted_keys|.ima
+
+		Example of measure rule using LSM_STATE to measure LSM data:
+
+			measure func=LSM_STATE
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 4515975cc540..880fda11a61b 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -200,6 +200,7 @@ static inline unsigned int ima_hash_key(u8 *digest)
 	hook(POLICY_CHECK, policy)			\
 	hook(KEXEC_CMDLINE, kexec_cmdline)		\
 	hook(KEY_CHECK, key)				\
+	hook(LSM_STATE, lsm_state)			\
 	hook(MAX_CHECK, none)
 
 #define __ima_hook_enumify(ENUM, str)	ENUM,
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index bf22de8b7ce0..0cebd2404dcf 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -176,7 +176,7 @@ void ima_add_violation(struct file *file, const unsigned char *filename,
  *		subj=, obj=, type=, func=, mask=, fsmagic=
  *	subj,obj, and type: are LSM specific.
  *	func: FILE_CHECK | BPRM_CHECK | CREDS_CHECK | MMAP_CHECK | MODULE_CHECK
- *	| KEXEC_CMDLINE | KEY_CHECK
+ *	| KEXEC_CMDLINE | KEY_CHECK | LSM_STATE
  *	mask: contains the permission mask
  *	fsmagic: hex value
  *
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 66aa3e17a888..fc8457d9242b 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -417,15 +417,31 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
 			    const char *keyring)
 {
 	int i;
+	int funcmatch = 0;
 
-	if ((func == KEXEC_CMDLINE) || (func == KEY_CHECK)) {
+	switch (func) {
+	case KEXEC_CMDLINE:
+	case KEY_CHECK:
+	case LSM_STATE:
 		if ((rule->flags & IMA_FUNC) && (rule->func == func)) {
 			if (func == KEY_CHECK)
-				return ima_match_keyring(rule, keyring, cred);
-			return true;
-		}
-		return false;
+				funcmatch = ima_match_keyring(rule, keyring,
+							      cred) ? 1 : -1;
+			else
+				funcmatch = 1;
+		} else
+			funcmatch = -1;
+
+		break;
+
+	default:
+		funcmatch = 0;
+		break;
 	}
+
+	if (funcmatch)
+		return (funcmatch == 1) ? true : false;
+
 	if ((rule->flags & IMA_FUNC) &&
 	    (rule->func != func && func != POST_SETATTR))
 		return false;
@@ -1068,6 +1084,9 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 				entry->func = KEXEC_CMDLINE;
 			else if (strcmp(args[0].from, "KEY_CHECK") == 0)
 				entry->func = KEY_CHECK;
+			else if (strcmp(args[0].from, "LSM_STATE") == 0)
+				entry->func = LSM_STATE;
+
 			else
 				result = -EINVAL;
 			if (!result)
-- 
2.27.0


^ permalink raw reply related

* [PATCH v2 4/5] LSM: Define SELinux function to measure security state
From: Lakshmi Ramasubramanian @ 2020-07-16 17:43 UTC (permalink / raw)
  To: zohar, stephen.smalley.work, casey
  Cc: jmorris, linux-integrity, selinux, linux-security-module,
	linux-kernel
In-Reply-To: <20200716174351.20128-1-nramas@linux.microsoft.com>

SELinux configuration and policy are some of the critical data for this
security module that needs to be measured. To enable this measurement
SELinux needs to implement the interface function,
security_measure_data(), that the LSM can call.

Define the security_state() function in SELinux to measure SELinux
configuration and policy. Call this function to measure SELinux data
when there is a change in the security module's state.

Sample measurement of SELinux state and hash of the policy:

10 e32e...5ac3 ima-buf sha256:86e8...4594 selinux-state 656e61626c65643d313b656e666f7263696e673d303b636865636b72657170726f743d313b6e6574706565723d313b6f70656e7065726d3d313b657874736f636b636c6173733d313b616c776179736e6574776f726b3d303b6367726f75707365636c6162656c3d313b6e6e706e6f737569647472616e736974696f6e3d313b67656e66737365636c6162656c73796d6c696e6b3d303b
10 f4a7...9408 ima-buf sha256:4941...68fc selinux-policy-hash 8d1d...1834

The data for selinux-state in the above measurement is:
enabled=1;enforcing=0;checkreqprot=1;network_peer_controls=1;open_perms=1;extended_socket_class=1;always_check_network=0;cgroup_seclabel=1;nnp_nosuid_transition=1;genfs_seclabel_symlinks=0;

The data for selinux-policy-hash in the above measurement is
the SHA256 hash of the SELinux policy.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com>
---
 security/selinux/Makefile           |   2 +
 security/selinux/hooks.c            |   4 +
 security/selinux/include/security.h |  26 +++++
 security/selinux/measure.c          | 158 ++++++++++++++++++++++++++++
 security/selinux/selinuxfs.c        |   1 +
 security/selinux/ss/services.c      |  66 ++++++++++--
 6 files changed, 248 insertions(+), 9 deletions(-)
 create mode 100644 security/selinux/measure.c

diff --git a/security/selinux/Makefile b/security/selinux/Makefile
index 4d8e0e8adf0b..83d512116341 100644
--- a/security/selinux/Makefile
+++ b/security/selinux/Makefile
@@ -16,6 +16,8 @@ selinux-$(CONFIG_NETLABEL) += netlabel.o
 
 selinux-$(CONFIG_SECURITY_INFINIBAND) += ibpkey.o
 
+selinux-$(CONFIG_IMA) += measure.o
+
 ccflags-y := -I$(srctree)/security/selinux -I$(srctree)/security/selinux/include
 
 $(addprefix $(obj)/,$(selinux-y)): $(obj)/flask.h
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index efa6108b1ce9..cda1d328339f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -7259,6 +7259,8 @@ static __init int selinux_init(void)
 
 	fs_validate_description("selinux", selinux_fs_parameters);
 
+	selinux_init_measurement();
+
 	return 0;
 }
 
@@ -7284,6 +7286,7 @@ DEFINE_LSM(selinux) = {
 	.enabled = &selinux_enabled_boot,
 	.blobs = &selinux_blob_sizes,
 	.init = selinux_init,
+	.security_measure_data = selinux_measure_data,
 };
 
 #if defined(CONFIG_NETFILTER)
@@ -7394,6 +7397,7 @@ int selinux_disable(struct selinux_state *state)
 	}
 
 	selinux_mark_disabled(state);
+	selinux_measure_state(state);
 
 	pr_info("SELinux:  Disabled at runtime.\n");
 
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index b0e02cfe3ce1..628062ff9bba 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -222,16 +222,42 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void)
 	return state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS];
 }
 
+static inline bool selinux_checkreqprot(const struct selinux_state *state)
+{
+	return READ_ONCE(state->checkreqprot);
+}
+
 int security_mls_enabled(struct selinux_state *state);
 int security_load_policy(struct selinux_state *state,
 			 void *data, size_t len);
 int security_read_policy(struct selinux_state *state,
 			 void **data, size_t *len);
+int security_read_policy_kernel(struct selinux_state *state,
+				void **data, size_t *len);
 size_t security_policydb_len(struct selinux_state *state);
 
 int security_policycap_supported(struct selinux_state *state,
 				 unsigned int req_cap);
 
+#ifdef CONFIG_IMA
+extern void __init selinux_init_measurement(void);
+extern int selinux_measure_data(void);
+extern int selinux_measure_state(struct selinux_state *selinux_state);
+#else
+static inline void __init selinux_init_measurement(void) {}
+
+static inline int selinux_measure_data(void)
+{
+	return 0;
+}
+
+static inline int selinux_measure_state(
+	struct selinux_state *selinux_state)
+{
+	return 0;
+}
+#endif
+
 #define SEL_VEC_MAX 32
 struct av_decision {
 	u32 allowed;
diff --git a/security/selinux/measure.c b/security/selinux/measure.c
new file mode 100644
index 000000000000..27cbb309e926
--- /dev/null
+++ b/security/selinux/measure.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Measure SELinux state using IMA subsystem.
+ */
+#include <linux/ima.h>
+#include "security.h"
+
+/* Pre-allocated buffer used for measuring state */
+static char *selinux_state_string;
+static size_t selinux_state_string_len;
+static char *selinux_state_string_fmt =
+	"%s=%d;%s=%d;%s=%d;%s=%d;%s=%d;%s=%d;%s=%d;%s=%d;%s=%d;%s=%d;";
+
+void __init selinux_init_measurement(void)
+{
+	selinux_state_string_len =
+	snprintf(NULL, 0, selinux_state_string_fmt,
+	"enabled", 0,
+	"enforcing", 0,
+	"checkreqprot", 0,
+	selinux_policycap_names[POLICYDB_CAPABILITY_NETPEER], 0,
+	selinux_policycap_names[POLICYDB_CAPABILITY_OPENPERM], 0,
+	selinux_policycap_names[POLICYDB_CAPABILITY_EXTSOCKCLASS], 0,
+	selinux_policycap_names[POLICYDB_CAPABILITY_ALWAYSNETWORK], 0,
+	selinux_policycap_names[POLICYDB_CAPABILITY_CGROUPSECLABEL], 0,
+	selinux_policycap_names[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION], 0,
+	selinux_policycap_names[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS],
+	0);
+
+	if (selinux_state_string_len < 0)
+		return;
+
+	++selinux_state_string_len;
+
+	selinux_state_string = kzalloc(selinux_state_string_len, GFP_KERNEL);
+	if (!selinux_state_string)
+		selinux_state_string_len = 0;
+}
+
+static int selinux_hash_policy(const char *hash_alg_name,
+			       void *policy, size_t policy_len,
+			       void **policy_hash, int *policy_hash_len)
+{
+	struct crypto_shash *tfm;
+	struct shash_desc *desc = NULL;
+	void *digest = NULL;
+	int desc_size;
+	int digest_size;
+	int ret = 0;
+
+	tfm = crypto_alloc_shash(hash_alg_name, 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+	digest_size = crypto_shash_digestsize(tfm);
+
+	digest = kmalloc(digest_size, GFP_KERNEL);
+	if (!digest) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	desc = kzalloc(desc_size, GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	desc->tfm = tfm;
+
+	ret = crypto_shash_digest(desc, policy, policy_len, digest);
+	if (ret < 0)
+		goto error;
+
+	*policy_hash_len = digest_size;
+	*policy_hash = digest;
+	digest = NULL;
+
+error:
+	kfree(desc);
+	kfree(digest);
+
+	crypto_free_shash(tfm);
+
+	if (ret)
+		pr_err("%s: error %d\n", __func__, ret);
+
+	return ret;
+}
+
+int selinux_measure_state(struct selinux_state *selinux_state)
+{
+	void *policy = NULL;
+	void *policy_hash = NULL;
+	int count;
+	size_t buflen;
+	int policy_hash_len;
+	int rc = 0;
+
+	if (!selinux_initialized(selinux_state))
+		return -EOPNOTSUPP;
+
+	if (!selinux_state_string)
+		return -ENOMEM;
+
+	count =
+	snprintf(
+	selinux_state_string, selinux_state_string_len,
+	selinux_state_string_fmt,
+	"enabled", !selinux_disabled(selinux_state),
+	"enforcing", enforcing_enabled(selinux_state),
+	"checkreqprot", selinux_checkreqprot(selinux_state),
+	selinux_policycap_names[POLICYDB_CAPABILITY_NETPEER],
+	selinux_state->policycap[POLICYDB_CAPABILITY_NETPEER],
+	selinux_policycap_names[POLICYDB_CAPABILITY_OPENPERM],
+	selinux_state->policycap[POLICYDB_CAPABILITY_OPENPERM],
+	selinux_policycap_names[POLICYDB_CAPABILITY_EXTSOCKCLASS],
+	selinux_state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS],
+	selinux_policycap_names[POLICYDB_CAPABILITY_ALWAYSNETWORK],
+	selinux_state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK],
+	selinux_policycap_names[POLICYDB_CAPABILITY_CGROUPSECLABEL],
+	selinux_state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL],
+	selinux_policycap_names[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION],
+	selinux_state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION],
+	selinux_policycap_names[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS],
+	selinux_state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]);
+
+	if (count >= 0 && count < selinux_state_string_len)
+		ima_lsm_state("selinux-state", selinux_state_string, count);
+	else {
+		pr_err("selinux state error: %d\n", count);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = security_read_policy_kernel(selinux_state, &policy, &buflen);
+	if (!rc)
+		rc = selinux_hash_policy("sha256", policy, buflen,
+					 &policy_hash, &policy_hash_len);
+	if (!rc)
+		ima_lsm_state("selinux-policy-hash", policy_hash,
+			      policy_hash_len);
+
+out:
+	vfree(policy);
+	kfree(policy_hash);
+
+	if (rc)
+		pr_err("%s: error %d\n", __func__, rc);
+
+	return rc;
+}
+
+int selinux_measure_data(void)
+{
+	return selinux_measure_state(&selinux_state);
+}
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 4781314c2510..b1f70739d709 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -173,6 +173,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
 			from_kuid(&init_user_ns, audit_get_loginuid(current)),
 			audit_get_sessionid(current));
 		enforcing_set(state, new_value);
+		selinux_measure_state(&selinux_state);
 		if (new_value)
 			avc_ss_reset(state->avc, 0);
 		selnl_notify_setenforce(new_value);
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index ef0afd878bfc..79a6b462f1fe 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -3720,14 +3720,22 @@ int security_netlbl_sid_to_secattr(struct selinux_state *state,
 }
 #endif /* CONFIG_NETLABEL */
 
+static int security_read_policy_len(struct selinux_state *state, size_t *len)
+{
+	if (!selinux_initialized(state))
+		return -EINVAL;
+
+	*len = security_policydb_len(state);
+	return 0;
+}
+
 /**
  * security_read_policy - read the policy.
  * @data: binary policy data
  * @len: length of data in bytes
- *
  */
-int security_read_policy(struct selinux_state *state,
-			 void **data, size_t *len)
+int security_read_selinux_policy(struct selinux_state *state,
+				 void **data, size_t *len)
 {
 	struct policydb *policydb = &state->ss->policydb;
 	int rc;
@@ -3736,12 +3744,6 @@ int security_read_policy(struct selinux_state *state,
 	if (!selinux_initialized(state))
 		return -EINVAL;
 
-	*len = security_policydb_len(state);
-
-	*data = vmalloc_user(*len);
-	if (!*data)
-		return -ENOMEM;
-
 	fp.data = *data;
 	fp.len = *len;
 
@@ -3754,5 +3756,51 @@ int security_read_policy(struct selinux_state *state,
 
 	*len = (unsigned long)fp.data - (unsigned long)*data;
 	return 0;
+}
+
+/**
+ * security_read_policy - read the policy.
+ * @data: binary policy data
+ * @len: length of data in bytes
+ *
+ */
+int security_read_policy(struct selinux_state *state,
+			 void **data, size_t *len)
+{
+	int rc;
+
+	rc = security_read_policy_len(state, len);
+	if (rc)
+		return rc;
+
+	*data = vmalloc_user(*len);
+	if (!*data)
+		return -ENOMEM;
+
+	return security_read_selinux_policy(state, data, len);
+}
+
+/**
+ * security_read_policy_kernel - read the policy.
+ * @data: binary policy data
+ * @len: length of data in bytes
+ *
+ * Allocates kernel memory for reading SELinux policy.
+ * This function is for internal use only and should not
+ * be used for returning data to user space
+ */
+int security_read_policy_kernel(struct selinux_state *state,
+				void **data, size_t *len)
+{
+	int rc;
+
+	rc = security_read_policy_len(state, len);
+	if (rc)
+		return rc;
+
+	*data = vmalloc(*len);
+	if (!*data)
+		return -ENOMEM;
 
+	return security_read_selinux_policy(state, data, len);
 }
-- 
2.27.0


^ permalink raw reply related

* [PATCH v2 0/5] LSM: Measure security module state
From: Lakshmi Ramasubramanian @ 2020-07-16 17:43 UTC (permalink / raw)
  To: zohar, stephen.smalley.work, casey
  Cc: jmorris, linux-integrity, selinux, linux-security-module,
	linux-kernel

Critical data structures of security modules are currently not measured.
Therefore an attestation service, for instance, would not be able to
attest whether the security modules are always operating with the policies
and configuration that the system administrator had setup. The policies
and configuration for the security modules could be tampered with by
malware by exploiting Kernel vulnerabilities or modified through some
inadvertent actions on the system. Measuring such critical data would
enable an attestation service to better assess the state of the system.

IMA subsystem measures system files, command line arguments passed to
kexec, boot aggregate, keys, etc. It can be used to measure critical
data structures of security modules as well.

This change aims to address measuring critical data structures
of security modules when they are initialized, when they are updated
at runtime, and also periodically to detect any tampering.

This change set is based off of Linux Kernel version 5.8-rc5.

The following patch needs to be applied first before applying
the patches in this patch set:

    https://patchwork.kernel.org/patch/11612989/

Change log:

  v2:
      => Pass selinux_state struct as parameter to the function
         that measures SELinux data.
      => Use strings from selinux_policycap_names array for SELinux
         state measurement.
      => Refactored security_read_policy() to alloc kernel or user
         virtual memory and then read the SELinux policy.

  v1:
      => Per Stephen Smalley's suggestion added selinux_state booleans
         and hash of SELinux policy in the measured data for SELinux.
      => Call IMA hook from the security module directly instead of
         redirecting through the LSM.

Lakshmi Ramasubramanian (5):
  IMA: Add LSM_STATE func to measure LSM data
  IMA: Define an IMA hook to measure LSM data
  LSM: Add security_measure_data in lsm_info struct
  LSM: Define SELinux function to measure security state
  LSM: Define workqueue for measuring security module state

 Documentation/ABI/testing/ima_policy |   6 +-
 include/linux/ima.h                  |   4 +
 include/linux/lsm_hooks.h            |   3 +
 security/integrity/ima/ima.h         |   1 +
 security/integrity/ima/ima_api.c     |   2 +-
 security/integrity/ima/ima_main.c    |  17 +++
 security/integrity/ima/ima_policy.c  |  29 ++++-
 security/security.c                  |  74 ++++++++++++-
 security/selinux/Makefile            |   2 +
 security/selinux/hooks.c             |   4 +
 security/selinux/include/security.h  |  26 +++++
 security/selinux/measure.c           | 158 +++++++++++++++++++++++++++
 security/selinux/selinuxfs.c         |   1 +
 security/selinux/ss/services.c       |  66 +++++++++--
 14 files changed, 376 insertions(+), 17 deletions(-)
 create mode 100644 security/selinux/measure.c

-- 
2.27.0

^ permalink raw reply

* Re: [PATCH v9 2/2] tpm: Add support for event log pointer found in TPM2 ACPI table
From: Jarkko Sakkinen @ 2020-07-16 17:26 UTC (permalink / raw)
  To: Stefan Berger
  Cc: Stefan Berger, linux-integrity, linux-kernel, linux-acpi,
	linux-security-module
In-Reply-To: <69907c30-62c2-b4bd-e84f-11612bba9c95@linux.ibm.com>

On Tue, Jul 14, 2020 at 08:09:03AM -0400, Stefan Berger wrote:
> On 7/14/20 7:20 AM, Jarkko Sakkinen wrote:
> > On Wed, Jul 08, 2020 at 10:17:17AM -0400, Stefan Berger wrote:
> > > > ❯ swtpm-mvo.swtpm socket --tpmstate dir=/tmp/mytpm1 \
> > > >     --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
> > > >     --log level=20
> > > > swtpm: Could not open UnixIO socket: No such file or directory
> > > 
> > > Did you create the directory '/tmp/mytpm1' ?
> > Yes. It's the socket file that it is complain because it does
> > not exist beforehand.
> 
> 
> The socket file is created by the swtpm program.

I got this tested with real hardware, i.e. tested that TPM 1.2 works.

Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>

/Jarkko

^ permalink raw reply

* Re: [PATCH v6 4/7] fs: Introduce O_MAYEXEC flag for openat2(2)
From: Kees Cook @ 2020-07-16 15:31 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Jan Kara, Matthew Bobrowski, linux-nfs, linux-kernel,
	Aleksa Sarai, Alexei Starovoitov, Al Viro, Andrew Morton,
	Andy Lutomirski, Christian Brauner, Christian Heimes,
	Daniel Borkmann, Deven Bowers, Dmitry Vyukov, Eric Biggers,
	Eric Chiang, Florian Weimer, James Morris, Jann Horn,
	Jonathan Corbet, Lakshmi Ramasubramanian, Matthew Garrett,
	Matthew Wilcox, Michael Kerrisk, Mickaël Salaün,
	Mimi Zohar, Philippe Trébuchet, Scott Shell,
	Sean Christopherson, Shuah Khan, Steve Dower, Steve Grubb,
	Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel
In-Reply-To: <b209ea10-5b7f-c40e-5b6a-3da9028403d5@digikod.net>

On Thu, Jul 16, 2020 at 04:18:27PM +0200, Mickaël Salaün wrote:
> On 15/07/2020 22:06, Kees Cook wrote:
> > On Tue, Jul 14, 2020 at 08:16:35PM +0200, Mickaël Salaün wrote:
> >> The implementation of O_MAYEXEC almost duplicates what execve(2) and
> >> uselib(2) are already doing: setting MAY_OPENEXEC in acc_mode (which can
> >> then be checked as MAY_EXEC, if enforced), and propagating FMODE_EXEC to
> >> _fmode via __FMODE_EXEC flag (which can then trigger a
> >> fanotify/FAN_OPEN_EXEC event).
> >> [...]
> > 
> > Adding __FMODE_EXEC here will immediately change the behaviors of NFS
> > and fsnotify. If that's going to happen, I think it needs to be under
> > the control of the later patches doing the behavioral controls.
> > (specifically, NFS looks like it completely changes its access control
> > test when this is set and ignores the read/write checks entirely, which
> > is not what's wanted).
> 
> __FMODE_EXEC was suggested by Jan Kara and Matthew Bobrowski because of
> fsnotify. However, the NFS handling of SUID binaries [1] indeed leads to
> an unintended behavior. This also means that uselib(2) shouldn't work
> properly with NFS. I can remove the __FMODE_EXEC flag for now.

I kind of wonder if we need to more completely fix __FMODE_EXEC?

> [1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=f8d9a897d4384b77f13781ea813156568f68b83e

Hmpf, this implies that "fmode" should contain MAY_EXEC? It really looks
like __FMODE_EXEC is a hack for places where only "flags" were passed
around, and this only seems to be an issue for NFS at this point? And it
should be fixable for fsnotify too?

Hmm. (And nothing should use uselib anyway...)

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH v6 7/7] ima: add policy support for the new file open MAY_OPENEXEC flag
From: Mickaël Salaün @ 2020-07-16 15:22 UTC (permalink / raw)
  To: Randy Dunlap, Kees Cook
  Cc: linux-kernel, Aleksa Sarai, Alexei Starovoitov, Al Viro,
	Andrew Morton, Andy Lutomirski, Christian Brauner,
	Christian Heimes, Daniel Borkmann, Deven Bowers, Dmitry Vyukov,
	Eric Biggers, Eric Chiang, Florian Weimer, James Morris, Jan Kara,
	Jann Horn, Jonathan Corbet, Lakshmi Ramasubramanian,
	Matthew Garrett, Matthew Wilcox, Michael Kerrisk,
	Mickaël Salaün, Mimi Zohar, Philippe Trébuchet,
	Scott Shell, Sean Christopherson, Shuah Khan, Steve Dower,
	Steve Grubb, Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel
In-Reply-To: <61c05cb0-a956-3cc7-5dab-e11ebf0e95bf@infradead.org>


On 16/07/2020 16:59, Randy Dunlap wrote:
> On 7/16/20 7:40 AM, Mickaël Salaün wrote:
>>
>> On 15/07/2020 22:40, Kees Cook wrote:
>>> On Tue, Jul 14, 2020 at 08:16:38PM +0200, Mickaël Salaün wrote:
>>>> From: Mimi Zohar <zohar@linux.ibm.com>
>>>>
>>>> The kernel has no way of differentiating between a file containing data
>>>> or code being opened by an interpreter.  The proposed O_MAYEXEC
>>>> openat2(2) flag bridges this gap by defining and enabling the
>>>> MAY_OPENEXEC flag.
>>>>
>>>> This patch adds IMA policy support for the new MAY_OPENEXEC flag.
>>>>
>>>> Example:
>>>> measure func=FILE_CHECK mask=^MAY_OPENEXEC
>>>> appraise func=FILE_CHECK appraise_type=imasig mask=^MAY_OPENEXEC
>>>>
>>>> Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
>>>> Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>>>> Acked-by: Mickaël Salaün <mic@digikod.net>
>>>
>>> (Process nit: if you're sending this on behalf of another author, then
>>> this should be Signed-off-by rather than Acked-by.)
>>
>> I'm not a co-author of this patch.
>>
> 
> from Documentation/process/submitting-patches.rst:
> 
> The Signed-off-by: tag indicates that the signer was involved in the
> development of the patch, or that he/she was in the patch's delivery path.
>                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 

OK, I though such tag had to go along with the From/Author, the
Committer or a Co-developed-by tag, but there is also this specific
case. I'll fix that in the next series.

^ permalink raw reply

* Re: [PATCH 16/16] capsh.c: Spelling fixes in usage() message
From: Andrew G. Morgan @ 2020-07-16 15:08 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages); +Cc: LSM List
In-Reply-To: <20200716101827.162793-16-mtk.manpages@gmail.com>

Thanks! Applied all of them except 07_16. Instead, I've hopefully
clarified the intent of the text with some quotes.

Cheers

Andrew

https://git.kernel.org/pub/scm/libs/libcap/libcap.git/commit/?id=34e4e00b983a2c0fc5f13b403871a8fb5860bb89

On Thu, Jul 16, 2020 at 3:19 AM Michael Kerrisk (man-pages)
<mtk.manpages@gmail.com> wrote:
>
> Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
> ---
>  progs/capsh.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/progs/capsh.c b/progs/capsh.c
> index 94bf57d..7bed98e 100644
> --- a/progs/capsh.c
> +++ b/progs/capsh.c
> @@ -879,10 +879,10 @@ int main(int argc, char *argv[], char *envp[])
>                    "  --delamb=xxx   remove xxx,... capabilities from ambient\n"
>                    "  --noamb        reset (drop) all ambient capabilities\n"
>                    "  --caps=xxx     set caps as per cap_from_text()\n"
> -                  "  --inh=xxx      set xxx,.. inheritiable set\n"
> +                  "  --inh=xxx      set xxx,.. inheritable set\n"
>                    "  --secbits=<n>  write a new value for securebits\n"
>                    "  --iab=...      use cap_iab_from_text() to set iab\n"
> -                  "  --keep=<n>     set keep-capabability bit to <n>\n"
> +                  "  --keep=<n>     set keep-capability bit to <n>\n"
>                    "  --uid=<n>      set uid to <n> (hint: id <username>)\n"
>                    "  --cap-uid=<n>  libcap cap_setuid() to change uid\n"
>                    "  --is-uid=<n>   exit 1 if uid != <n>\n"
> --
> 2.26.2
>

^ permalink raw reply

* Re: [PATCH v6 7/7] ima: add policy support for the new file open MAY_OPENEXEC flag
From: Randy Dunlap @ 2020-07-16 14:59 UTC (permalink / raw)
  To: Mickaël Salaün, Kees Cook
  Cc: linux-kernel, Aleksa Sarai, Alexei Starovoitov, Al Viro,
	Andrew Morton, Andy Lutomirski, Christian Brauner,
	Christian Heimes, Daniel Borkmann, Deven Bowers, Dmitry Vyukov,
	Eric Biggers, Eric Chiang, Florian Weimer, James Morris, Jan Kara,
	Jann Horn, Jonathan Corbet, Lakshmi Ramasubramanian,
	Matthew Garrett, Matthew Wilcox, Michael Kerrisk,
	Mickaël Salaün, Mimi Zohar, Philippe Trébuchet,
	Scott Shell, Sean Christopherson, Shuah Khan, Steve Dower,
	Steve Grubb, Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel
In-Reply-To: <8df69733-0088-3e3c-9c3d-2610414cea2b@digikod.net>

On 7/16/20 7:40 AM, Mickaël Salaün wrote:
> 
> On 15/07/2020 22:40, Kees Cook wrote:
>> On Tue, Jul 14, 2020 at 08:16:38PM +0200, Mickaël Salaün wrote:
>>> From: Mimi Zohar <zohar@linux.ibm.com>
>>>
>>> The kernel has no way of differentiating between a file containing data
>>> or code being opened by an interpreter.  The proposed O_MAYEXEC
>>> openat2(2) flag bridges this gap by defining and enabling the
>>> MAY_OPENEXEC flag.
>>>
>>> This patch adds IMA policy support for the new MAY_OPENEXEC flag.
>>>
>>> Example:
>>> measure func=FILE_CHECK mask=^MAY_OPENEXEC
>>> appraise func=FILE_CHECK appraise_type=imasig mask=^MAY_OPENEXEC
>>>
>>> Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
>>> Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>>> Acked-by: Mickaël Salaün <mic@digikod.net>
>>
>> (Process nit: if you're sending this on behalf of another author, then
>> this should be Signed-off-by rather than Acked-by.)
> 
> I'm not a co-author of this patch.
> 

from Documentation/process/submitting-patches.rst:

The Signed-off-by: tag indicates that the signer was involved in the
development of the patch, or that he/she was in the patch's delivery path.
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-- 
~Randy


^ permalink raw reply

* Re: [PATCH v6 5/7] fs,doc: Enable to enforce noexec mounts or file exec through O_MAYEXEC
From: Mickaël Salaün @ 2020-07-16 14:40 UTC (permalink / raw)
  To: Randy Dunlap, linux-kernel
  Cc: Aleksa Sarai, Alexei Starovoitov, Al Viro, Andrew Morton,
	Andy Lutomirski, Christian Brauner, Christian Heimes,
	Daniel Borkmann, Deven Bowers, Dmitry Vyukov, Eric Biggers,
	Eric Chiang, Florian Weimer, James Morris, Jan Kara, Jann Horn,
	Jonathan Corbet, Kees Cook, Lakshmi Ramasubramanian,
	Matthew Garrett, Matthew Wilcox, Michael Kerrisk,
	Mickaël Salaün, Mimi Zohar, Philippe Trébuchet,
	Scott Shell, Sean Christopherson, Shuah Khan, Steve Dower,
	Steve Grubb, Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel
In-Reply-To: <038639b1-92da-13c1-b3e5-8f13639a815e@infradead.org>


On 14/07/2020 20:40, Randy Dunlap wrote:
> Hi,
> 
> On 7/14/20 11:16 AM, Mickaël Salaün wrote:
> 
>> ---
>>  Documentation/admin-guide/sysctl/fs.rst | 45 +++++++++++++++++++++++++
>>  fs/namei.c                              | 29 +++++++++++++---
>>  include/linux/fs.h                      |  1 +
>>  kernel/sysctl.c                         | 12 +++++--
>>  4 files changed, 80 insertions(+), 7 deletions(-)
>>
>> diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst
>> index 2a45119e3331..02ec384b8bbf 100644
>> --- a/Documentation/admin-guide/sysctl/fs.rst
>> +++ b/Documentation/admin-guide/sysctl/fs.rst
> 
> Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
> 
> with one tiny nit:
> 
>> @@ -165,6 +166,50 @@ system needs to prune the inode list instead of allocating
>> +The ability to restrict code execution must be thought as a system-wide policy,
>> +which first starts by restricting mount points with the ``noexec`` option.
>> +This option is also automatically applied to special filesystems such as /proc
>> +.  This prevents files on such mount points to be directly executed by the
> 
> Can you move that period from the beginning of the line to the end of the
> previous line?

OK, done. Thanks!

> 
>> +kernel or mapped as executable memory (e.g. libraries).  With script
>> +interpreters using the ``O_MAYEXEC`` flag, the executable permission can then
>> +be checked before reading commands from files. This makes it possible to
>> +enforce the ``noexec`` at the interpreter level, and thus propagates this
>> +security policy to scripts.  To be fully effective, these interpreters also
>> +need to handle the other ways to execute code: command line parameters (e.g.,
>> +option ``-e`` for Perl), module loading (e.g., option ``-m`` for Python),
>> +stdin, file sourcing, environment variables, configuration files, etc.
>> +According to the threat model, it may be acceptable to allow some script
>> +interpreters (e.g. Bash) to interpret commands from stdin, may it be a TTY or a
>> +pipe, because it may not be enough to (directly) perform syscalls.
> 
> thanks.
> 

^ permalink raw reply

* Re: [PATCH v6 7/7] ima: add policy support for the new file open MAY_OPENEXEC flag
From: Mickaël Salaün @ 2020-07-16 14:40 UTC (permalink / raw)
  To: Kees Cook
  Cc: linux-kernel, Aleksa Sarai, Alexei Starovoitov, Al Viro,
	Andrew Morton, Andy Lutomirski, Christian Brauner,
	Christian Heimes, Daniel Borkmann, Deven Bowers, Dmitry Vyukov,
	Eric Biggers, Eric Chiang, Florian Weimer, James Morris, Jan Kara,
	Jann Horn, Jonathan Corbet, Lakshmi Ramasubramanian,
	Matthew Garrett, Matthew Wilcox, Michael Kerrisk,
	Mickaël Salaün, Mimi Zohar, Philippe Trébuchet,
	Scott Shell, Sean Christopherson, Shuah Khan, Steve Dower,
	Steve Grubb, Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel
In-Reply-To: <202007151339.283D7CD@keescook>


On 15/07/2020 22:40, Kees Cook wrote:
> On Tue, Jul 14, 2020 at 08:16:38PM +0200, Mickaël Salaün wrote:
>> From: Mimi Zohar <zohar@linux.ibm.com>
>>
>> The kernel has no way of differentiating between a file containing data
>> or code being opened by an interpreter.  The proposed O_MAYEXEC
>> openat2(2) flag bridges this gap by defining and enabling the
>> MAY_OPENEXEC flag.
>>
>> This patch adds IMA policy support for the new MAY_OPENEXEC flag.
>>
>> Example:
>> measure func=FILE_CHECK mask=^MAY_OPENEXEC
>> appraise func=FILE_CHECK appraise_type=imasig mask=^MAY_OPENEXEC
>>
>> Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
>> Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>> Acked-by: Mickaël Salaün <mic@digikod.net>
> 
> (Process nit: if you're sending this on behalf of another author, then
> this should be Signed-off-by rather than Acked-by.)

I'm not a co-author of this patch.

^ permalink raw reply

* Re: [PATCH v6 5/7] fs,doc: Enable to enforce noexec mounts or file exec through O_MAYEXEC
From: Mickaël Salaün @ 2020-07-16 14:39 UTC (permalink / raw)
  To: Kees Cook
  Cc: linux-kernel, Aleksa Sarai, Alexei Starovoitov, Al Viro,
	Andrew Morton, Andy Lutomirski, Christian Brauner,
	Christian Heimes, Daniel Borkmann, Deven Bowers, Dmitry Vyukov,
	Eric Biggers, Eric Chiang, Florian Weimer, James Morris, Jan Kara,
	Jann Horn, Jonathan Corbet, Lakshmi Ramasubramanian,
	Matthew Garrett, Matthew Wilcox, Michael Kerrisk,
	Mickaël Salaün, Mimi Zohar, Philippe Trébuchet,
	Scott Shell, Sean Christopherson, Shuah Khan, Steve Dower,
	Steve Grubb, Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel
In-Reply-To: <202007151312.C28D112013@keescook>


On 15/07/2020 22:37, Kees Cook wrote:
> On Tue, Jul 14, 2020 at 08:16:36PM +0200, Mickaël Salaün wrote:
>> @@ -2849,7 +2855,7 @@ static int may_open(const struct path *path, int acc_mode, int flag)
>>  	case S_IFLNK:
>>  		return -ELOOP;
>>  	case S_IFDIR:
>> -		if (acc_mode & (MAY_WRITE | MAY_EXEC))
>> +		if (acc_mode & (MAY_WRITE | MAY_EXEC | MAY_OPENEXEC))
>>  			return -EISDIR;
>>  		break;
> 
> (I need to figure out where "open for reading" rejects S_IFDIR, since
> it's clearly not here...)
> 
>>  	case S_IFBLK:
>> @@ -2859,13 +2865,26 @@ static int may_open(const struct path *path, int acc_mode, int flag)
>>  		fallthrough;
>>  	case S_IFIFO:
>>  	case S_IFSOCK:
>> -		if (acc_mode & MAY_EXEC)
>> +		if (acc_mode & (MAY_EXEC | MAY_OPENEXEC))
>>  			return -EACCES;
>>  		flag &= ~O_TRUNC;
>>  		break;
> 
> This will immediately break a system that runs code with MAY_OPENEXEC
> set but reads from a block, char, fifo, or socket, even in the case of
> a sysadmin leaving the "file" sysctl disabled.

As documented, O_MAYEXEC is for regular files. The only legitimate use
case seems to be with pipes, which should probably be allowed when
enforcement is disabled.

> 
>>  	case S_IFREG:
>> -		if ((acc_mode & MAY_EXEC) && path_noexec(path))
>> -			return -EACCES;
>> +		if (path_noexec(path)) {
>> +			if (acc_mode & MAY_EXEC)
>> +				return -EACCES;
>> +			if ((acc_mode & MAY_OPENEXEC) &&
>> +					(sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_MOUNT))
>> +				return -EACCES;
>> +		}
>> +		if ((acc_mode & MAY_OPENEXEC) &&
>> +				(sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_FILE))
>> +			/*
>> +			 * Because acc_mode may change here, the next and only
>> +			 * use of acc_mode should then be by the following call
>> +			 * to inode_permission().
>> +			 */
>> +			acc_mode |= MAY_EXEC;
>>  		break;
>>  	}
> 
> Likely very minor, but I'd like to avoid the path_noexec() call in the
> fast-path (it dereferences a couple pointers where as doing bit tests on
> acc_mode is fast).
> 
> Given that and the above observations, I think that may_open() likely
> needs to start with:
> 
> 	if (acc_mode & MAY_OPENEXEC) {
> 		/* Reject all file types when mount enforcement set. */
> 		if ((sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_MOUNT) &&
> 		    path_noexec(path))
> 			return -EACCES;
> 		/* Treat the same as MAY_EXEC. */
> 		if (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_FILE))
> 			acc_mode |= MAY_EXEC;
> 	}

OK

> 
> (Though I'm not 100% sure that path_noexec() is safe to be called for
> all file types: i.e. path->mnt and path->-mnt->mnt_sb *always* non-NULL?)

path->mnt should always be non-NULL:
https://lore.kernel.org/lkml/20200317164709.GA23230@ZenIV.linux.org.uk/

> 
> This change would also imply that OPEN_MAYEXEC_ENFORCE_FILE *includes*
> OPEN_MAYEXEC_ENFORCE_MOUNT (i.e. the sysctl should not be a bitfield),
> since path_noexec() would get checked for S_ISREG. I can't come up with
> a rationale where one would want OPEN_MAYEXEC_ENFORCE_FILE but _not_
> OPEN_MAYEXEC_ENFORCE_MOUNT?

I don't see why it is an inclusion.

> 
> (I can absolutely see wanting only OPEN_MAYEXEC_ENFORCE_MOUNT, or
> suddenly one has to go mark every loaded thing with the exec bit and
> most distros haven't done this to, for example, shared libraries. But
> setting the exec bit and then NOT wanting to enforce the mount check
> seems... not sensible?)
> 
> Outside of this change, yes, I like this now -- it's much cleaner
> because we have all the checks in the same place where they belong. :)
> 
>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>> index db1ce7af2563..5008a2566e79 100644
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -113,6 +113,7 @@ static int sixty = 60;
>>  
>>  static int __maybe_unused neg_one = -1;
>>  static int __maybe_unused two = 2;
>> +static int __maybe_unused three = 3;
>>  static int __maybe_unused four = 4;
>>  static unsigned long zero_ul;
>>  static unsigned long one_ul = 1;
> 
> Oh, are these still here? I thought they got removed (or at least made
> const). Where did that series go? Hmpf, see sysctl_vals, but yes, for
> now, this is fine.
> 
>> @@ -888,7 +889,6 @@ static int proc_taint(struct ctl_table *table, int write,
>>  	return err;
>>  }
>>  
>> -#ifdef CONFIG_PRINTK
>>  static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
>>  				void *buffer, size_t *lenp, loff_t *ppos)
>>  {
>> @@ -897,7 +897,6 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
>>  
>>  	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
>>  }
>> -#endif
>>  
>>  /**
>>   * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
>> @@ -3264,6 +3263,15 @@ static struct ctl_table fs_table[] = {
>>  		.extra1		= SYSCTL_ZERO,
>>  		.extra2		= &two,
>>  	},
>> +	{
>> +		.procname       = "open_mayexec_enforce",
>> +		.data           = &sysctl_open_mayexec_enforce,
>> +		.maxlen         = sizeof(int),
>> +		.mode           = 0600,
>> +		.proc_handler	= proc_dointvec_minmax_sysadmin,
>> +		.extra1		= SYSCTL_ZERO,
>> +		.extra2		= &three,
>> +	},
>>  #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
>>  	{
>>  		.procname	= "binfmt_misc",
>> -- 
>> 2.27.0
>>
> 

^ permalink raw reply

* Re: [PATCH] ima: Rename internal audit rule functions
From: Mimi Zohar @ 2020-07-16 14:23 UTC (permalink / raw)
  To: Tyler Hicks
  Cc: Dmitry Kasatkin, James Morris, Serge E . Hallyn, linux-kernel,
	linux-integrity, linux-security-module, Casey Schaufler,
	linux-audit
In-Reply-To: <20200710194234.GA7254@sequoia>

On Fri, 2020-07-10 at 14:42 -0500, Tyler Hicks wrote:
> On 2020-06-29 17:30:03, Mimi Zohar wrote:
> > [Cc'ing the audit mailing list]
> > 
> > On Mon, 2020-06-29 at 10:30 -0500, Tyler Hicks wrote:
> > > 
> > > diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> > > index ff2bf57ff0c7..5d62ee8319f4 100644
> > > --- a/security/integrity/ima/ima.h
> > > +++ b/security/integrity/ima/ima.h
> > > @@ -419,24 +419,24 @@ static inline void ima_free_modsig(struct modsig *modsig)
> > >  /* LSM based policy rules require audit */
> > >  #ifdef CONFIG_IMA_LSM_RULES
> > >  
> > > -#define security_filter_rule_init security_audit_rule_init
> > > -#define security_filter_rule_free security_audit_rule_free
> > > -#define security_filter_rule_match security_audit_rule_match
> > > +#define ima_audit_rule_init security_audit_rule_init
> > > +#define ima_audit_rule_free security_audit_rule_free
> > > +#define ima_audit_rule_match security_audit_rule_match
> > 
> > Instead of defining an entirely new method of identifying files, IMA
> > piggybacks on top of the existing audit rule syntax.  IMA policy rules
> > "filter" based on this information.
> > 
> > IMA already audits security/integrity related events.  Using the word
> > "audit" here will make things even more confusing than they currently
> > are.  Renaming these functions as ima_audit_rule_XXX provides no
> > benefit.  At that point, IMA might as well call the
> > security_audit_rule prefixed function names directly.  As a quick fix,
> > rename them as "ima_filter_rule".
> > 
> > The correct solution would probably be to rename these prefixed
> > "security_audit_rule" functions as "security_filter_rule", so that
> > both the audit subsystem and IMA could use them.
> 
> There doesn't seem to be any interest, from the audit side, in re-using
> these. I don't quite understand why they would want to use them since
> they're just simple wrappers around the security_audit_rule_*()
> functions.

The security_filter_rule_* wasn't meant to be in addition, but as a
replacement for security_audit_rule_*
> 
> I'll go the "quick fix" route of renaming them as ima_filter_rule_*().

That's fine.

Mimi

^ permalink raw reply

* Re: [PATCH v6 4/7] fs: Introduce O_MAYEXEC flag for openat2(2)
From: Mickaël Salaün @ 2020-07-16 14:18 UTC (permalink / raw)
  To: Kees Cook, Jan Kara, Matthew Bobrowski, linux-nfs
  Cc: linux-kernel, Aleksa Sarai, Alexei Starovoitov, Al Viro,
	Andrew Morton, Andy Lutomirski, Christian Brauner,
	Christian Heimes, Daniel Borkmann, Deven Bowers, Dmitry Vyukov,
	Eric Biggers, Eric Chiang, Florian Weimer, James Morris,
	Jann Horn, Jonathan Corbet, Lakshmi Ramasubramanian,
	Matthew Garrett, Matthew Wilcox, Michael Kerrisk,
	Mickaël Salaün, Mimi Zohar, Philippe Trébuchet,
	Scott Shell, Sean Christopherson, Shuah Khan, Steve Dower,
	Steve Grubb, Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel
In-Reply-To: <202007151304.9F48071@keescook>


On 15/07/2020 22:06, Kees Cook wrote:
> On Tue, Jul 14, 2020 at 08:16:35PM +0200, Mickaël Salaün wrote:
>> When the O_MAYEXEC flag is passed, openat2(2) may be subject to
>> additional restrictions depending on a security policy managed by the
>> kernel through a sysctl or implemented by an LSM thanks to the
>> inode_permission hook.  This new flag is ignored by open(2) and
>> openat(2) because of their unspecified flags handling.
>>
>> The underlying idea is to be able to restrict scripts interpretation
>> according to a policy defined by the system administrator.  For this to
>> be possible, script interpreters must use the O_MAYEXEC flag
>> appropriately.  To be fully effective, these interpreters also need to
>> handle the other ways to execute code: command line parameters (e.g.,
>> option -e for Perl), module loading (e.g., option -m for Python), stdin,
>> file sourcing, environment variables, configuration files, etc.
>> According to the threat model, it may be acceptable to allow some script
>> interpreters (e.g. Bash) to interpret commands from stdin, may it be a
>> TTY or a pipe, because it may not be enough to (directly) perform
>> syscalls.  Further documentation can be found in a following patch.
>>
>> Even without enforced security policy, userland interpreters can set it
>> to enforce the system policy at their level, knowing that it will not
>> break anything on running systems which do not care about this feature.
>> However, on systems which want this feature enforced, there will be
>> knowledgeable people (i.e. sysadmins who enforced O_MAYEXEC
>> deliberately) to manage it.  A simple security policy implementation,
>> configured through a dedicated sysctl, is available in a following
>> patch.
>>
>> O_MAYEXEC should not be confused with the O_EXEC flag which is intended
>> for execute-only, which obviously doesn't work for scripts.  However, a
>> similar behavior could be implemented in userland with O_PATH:
>> https://lore.kernel.org/lkml/1e2f6913-42f2-3578-28ed-567f6a4bdda1@digikod.net/
>>
>> The implementation of O_MAYEXEC almost duplicates what execve(2) and
>> uselib(2) are already doing: setting MAY_OPENEXEC in acc_mode (which can
>> then be checked as MAY_EXEC, if enforced), and propagating FMODE_EXEC to
>> _fmode via __FMODE_EXEC flag (which can then trigger a
>> fanotify/FAN_OPEN_EXEC event).
>>
>> This is an updated subset of the patch initially written by Vincent
>> Strubel for CLIP OS 4:
>> https://github.com/clipos-archive/src_platform_clip-patches/blob/f5cb330d6b684752e403b4e41b39f7004d88e561/1901_open_mayexec.patch
>> This patch has been used for more than 12 years with customized script
>> interpreters.  Some examples (with the original name O_MAYEXEC) can be
>> found here:
>> https://github.com/clipos-archive/clipos4_portage-overlay/search?q=O_MAYEXEC
>>
>> Co-developed-by: Vincent Strubel <vincent.strubel@ssi.gouv.fr>
>> Signed-off-by: Vincent Strubel <vincent.strubel@ssi.gouv.fr>
>> Co-developed-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
>> Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
>> Signed-off-by: Mickaël Salaün <mic@digikod.net>
>> Reviewed-by: Deven Bowers <deven.desai@linux.microsoft.com>
>> Reviewed-by: Kees Cook <keescook@chromium.org>
>> Cc: Aleksa Sarai <cyphar@cyphar.com>
>> Cc: Al Viro <viro@zeniv.linux.org.uk>
>> ---
>>
>> Changes since v5:
>> * Update commit message.
>>
>> Changes since v3:
>> * Switch back to O_MAYEXEC, but only handle it with openat2(2) which
>>   checks unknown flags (suggested by Aleksa Sarai). Cf.
>>   https://lore.kernel.org/lkml/20200430015429.wuob7m5ofdewubui@yavin.dot.cyphar.com/
>>
>> Changes since v2:
>> * Replace O_MAYEXEC with RESOLVE_MAYEXEC from openat2(2).  This change
>>   enables to not break existing application using bogus O_* flags that
>>   may be ignored by current kernels by using a new dedicated flag, only
>>   usable through openat2(2) (suggested by Jeff Layton).  Using this flag
>>   will results in an error if the running kernel does not support it.
>>   User space needs to manage this case, as with other RESOLVE_* flags.
>>   The best effort approach to security (for most common distros) will
>>   simply consists of ignoring such an error and retry without
>>   RESOLVE_MAYEXEC.  However, a fully controlled system may which to
>>   error out if such an inconsistency is detected.
>>
>> Changes since v1:
>> * Set __FMODE_EXEC when using O_MAYEXEC to make this information
>>   available through the new fanotify/FAN_OPEN_EXEC event (suggested by
>>   Jan Kara and Matthew Bobrowski):
>>   https://lore.kernel.org/lkml/20181213094658.GA996@lithium.mbobrowski.org/
>> ---
>>  fs/fcntl.c                       | 2 +-
>>  fs/open.c                        | 8 ++++++++
>>  include/linux/fcntl.h            | 2 +-
>>  include/linux/fs.h               | 2 ++
>>  include/uapi/asm-generic/fcntl.h | 7 +++++++
>>  5 files changed, 19 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/fcntl.c b/fs/fcntl.c
>> index 2e4c0fa2074b..0357ad667563 100644
>> --- a/fs/fcntl.c
>> +++ b/fs/fcntl.c
>> @@ -1033,7 +1033,7 @@ static int __init fcntl_init(void)
>>  	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
>>  	 * is defined as O_NONBLOCK on some platforms and not on others.
>>  	 */
>> -	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
>> +	BUILD_BUG_ON(22 - 1 /* for O_RDONLY being 0 */ !=
>>  		HWEIGHT32(
>>  			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
>>  			__FMODE_EXEC | __FMODE_NONOTIFY));
>> diff --git a/fs/open.c b/fs/open.c
>> index 623b7506a6db..38e434bdbbb6 100644
>> --- a/fs/open.c
>> +++ b/fs/open.c
>> @@ -987,6 +987,8 @@ inline struct open_how build_open_how(int flags, umode_t mode)
>>  		.mode = mode & S_IALLUGO,
>>  	};
>>  
>> +	/* O_MAYEXEC is ignored by syscalls relying on build_open_how(). */
>> +	how.flags &= ~O_MAYEXEC;
>>  	/* O_PATH beats everything else. */
>>  	if (how.flags & O_PATH)
>>  		how.flags &= O_PATH_FLAGS;
>> @@ -1054,6 +1056,12 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
>>  	if (flags & __O_SYNC)
>>  		flags |= O_DSYNC;
>>  
>> +	/* Checks execution permissions on open. */
>> +	if (flags & O_MAYEXEC) {
>> +		acc_mode |= MAY_OPENEXEC;
>> +		flags |= __FMODE_EXEC;
>> +	}
> 
> Adding __FMODE_EXEC here will immediately change the behaviors of NFS
> and fsnotify. If that's going to happen, I think it needs to be under
> the control of the later patches doing the behavioral controls.
> (specifically, NFS looks like it completely changes its access control
> test when this is set and ignores the read/write checks entirely, which
> is not what's wanted).

__FMODE_EXEC was suggested by Jan Kara and Matthew Bobrowski because of
fsnotify. However, the NFS handling of SUID binaries [1] indeed leads to
an unintended behavior. This also means that uselib(2) shouldn't work
properly with NFS. I can remove the __FMODE_EXEC flag for now.

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=f8d9a897d4384b77f13781ea813156568f68b83e

^ permalink raw reply

* [PATCH v36 11/24] x86/sgx: Add SGX enclave driver
From: Jarkko Sakkinen @ 2020-07-16 13:52 UTC (permalink / raw)
  To: x86, linux-sgx
  Cc: linux-kernel, Jarkko Sakkinen, linux-security-module, linux-mm,
	Andrew Morton, Matthew Wilcox, Jethro Beekman, Haitao Huang,
	Chunyang Hui, Jordan Hand, Nathaniel McCallum, Seth Moore,
	Sean Christopherson, Suresh Siddha, andriy.shevchenko, asapek, bp,
	cedric.xing, chenalexchen, conradparker, cyhanish, dave.hansen,
	haitao.huang, josh, kai.huang, kai.svahn, kmoy, ludloff, luto,
	nhorman, puiterwijk, rientjes, tglx, yaozhangx
In-Reply-To: <20200716135303.276442-1-jarkko.sakkinen@linux.intel.com>

Intel Software Guard eXtensions (SGX) is a set of CPU instructions that can
be used by applications to set aside private regions of code and data. The
code outside the SGX hosted software entity is prevented from accessing the
memory inside the enclave by the CPU. We call these entities enclaves.

Add a driver that provides an ioctl API to construct and run enclaves.
Enclaves are constructed from pages residing in reserved physical memory
areas. The contents of these pages can only be accessed when they are
mapped as part of an enclave, by a hardware thread running inside the
enclave.

The starting state of an enclave consists of a fixed measured set of
pages that are copied to the EPC during the construction process by
using ENCLS leaf functions and Software Enclave Control Structure (SECS)
that defines the enclave properties.

Enclaves are constructed by using ENCLS leaf functions ECREATE, EADD and
EINIT. ECREATE initializes SECS, EADD copies pages from system memory to
the EPC and EINIT checks a given signed measurement and moves the enclave
into a state ready for execution.

An initialized enclave can only be accessed through special Thread Control
Structure (TCS) pages by using ENCLU (ring-3 only) leaf EENTER.  This leaf
function converts a thread into enclave mode and continues the execution in
the offset defined by the TCS provided to EENTER. An enclave is exited
through syscall, exception, interrupts or by explicitly calling another
ENCLU leaf EEXIT.

The mmap() permissions are capped by the contained enclave page
permissions. The mapped areas must also be opaque, i.e. each page address
must contain a page. This logic is implemented in sgx_encl_may_map().

Cc: linux-security-module@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Acked-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Haitao Huang <haitao.huang@linux.intel.com>
Tested-by: Chunyang Hui <sanqian.hcy@antfin.com>
Tested-by: Jordan Hand <jorhand@linux.microsoft.com>
Tested-by: Nathaniel McCallum <npmccallum@redhat.com>
Tested-by: Seth Moore <sethmo@google.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Co-developed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/kernel/cpu/sgx/Makefile |   2 +
 arch/x86/kernel/cpu/sgx/driver.c | 177 ++++++++++++++++
 arch/x86/kernel/cpu/sgx/driver.h |  29 +++
 arch/x86/kernel/cpu/sgx/encl.c   | 333 +++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/sgx/encl.h   |  87 ++++++++
 arch/x86/kernel/cpu/sgx/main.c   |  11 +
 6 files changed, 639 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/sgx/driver.c
 create mode 100644 arch/x86/kernel/cpu/sgx/driver.h
 create mode 100644 arch/x86/kernel/cpu/sgx/encl.c
 create mode 100644 arch/x86/kernel/cpu/sgx/encl.h

diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 79510ce01b3b..3fc451120735 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -1,2 +1,4 @@
 obj-y += \
+	driver.o \
+	encl.o \
 	main.o
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
new file mode 100644
index 000000000000..b52520407f5b
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-18 Intel Corporation.
+
+#include <linux/acpi.h>
+#include <linux/miscdevice.h>
+#include <linux/mman.h>
+#include <linux/security.h>
+#include <linux/suspend.h>
+#include <asm/traps.h>
+#include "driver.h"
+#include "encl.h"
+
+MODULE_DESCRIPTION("Intel SGX Enclave Driver");
+MODULE_AUTHOR("Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+
+u64 sgx_encl_size_max_32;
+u64 sgx_encl_size_max_64;
+u32 sgx_misc_reserved_mask;
+u64 sgx_attributes_reserved_mask;
+u64 sgx_xfrm_reserved_mask = ~0x3;
+u32 sgx_xsave_size_tbl[64];
+
+static int sgx_open(struct inode *inode, struct file *file)
+{
+	struct sgx_encl *encl;
+	int ret;
+
+	encl = kzalloc(sizeof(*encl), GFP_KERNEL);
+	if (!encl)
+		return -ENOMEM;
+
+	atomic_set(&encl->flags, 0);
+	kref_init(&encl->refcount);
+	xa_init(&encl->page_array);
+	mutex_init(&encl->lock);
+	INIT_LIST_HEAD(&encl->mm_list);
+	spin_lock_init(&encl->mm_lock);
+
+	ret = init_srcu_struct(&encl->srcu);
+	if (ret) {
+		kfree(encl);
+		return ret;
+	}
+
+	file->private_data = encl;
+
+	return 0;
+}
+
+static int sgx_release(struct inode *inode, struct file *file)
+{
+	struct sgx_encl *encl = file->private_data;
+	struct sgx_encl_mm *encl_mm;
+
+	for ( ; ; )  {
+		spin_lock(&encl->mm_lock);
+
+		if (list_empty(&encl->mm_list)) {
+			encl_mm = NULL;
+		} else {
+			encl_mm = list_first_entry(&encl->mm_list,
+						   struct sgx_encl_mm, list);
+			list_del_rcu(&encl_mm->list);
+		}
+
+		spin_unlock(&encl->mm_lock);
+
+		/* The list is empty, ready to go. */
+		if (!encl_mm)
+			break;
+
+		synchronize_srcu(&encl->srcu);
+		mmu_notifier_unregister(&encl_mm->mmu_notifier, encl_mm->mm);
+		kfree(encl_mm);
+	}
+
+	mutex_lock(&encl->lock);
+	atomic_or(SGX_ENCL_DEAD, &encl->flags);
+	mutex_unlock(&encl->lock);
+
+	kref_put(&encl->refcount, sgx_encl_release);
+	return 0;
+}
+
+static int sgx_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct sgx_encl *encl = file->private_data;
+	int ret;
+
+	ret = sgx_encl_may_map(encl, vma->vm_start, vma->vm_end, vma->vm_flags);
+	if (ret)
+		return ret;
+
+	ret = sgx_encl_mm_add(encl, vma->vm_mm);
+	if (ret)
+		return ret;
+
+	vma->vm_ops = &sgx_vm_ops;
+	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+	vma->vm_private_data = encl;
+
+	return 0;
+}
+
+static unsigned long sgx_get_unmapped_area(struct file *file,
+					   unsigned long addr,
+					   unsigned long len,
+					   unsigned long pgoff,
+					   unsigned long flags)
+{
+	if (flags & MAP_PRIVATE)
+		return -EINVAL;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+static const struct file_operations sgx_encl_fops = {
+	.owner			= THIS_MODULE,
+	.open			= sgx_open,
+	.release		= sgx_release,
+	.mmap			= sgx_mmap,
+	.get_unmapped_area	= sgx_get_unmapped_area,
+};
+
+static struct miscdevice sgx_dev_enclave = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "enclave",
+	.nodename = "sgx/enclave",
+	.fops = &sgx_encl_fops,
+};
+
+int __init sgx_drv_init(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	u64 attr_mask, xfrm_mask;
+	int ret;
+	int i;
+
+	if (!boot_cpu_has(X86_FEATURE_SGX_LC)) {
+		pr_info("The public key MSRs are not writable.\n");
+		return -ENODEV;
+	}
+
+	cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	sgx_misc_reserved_mask = ~ebx | SGX_MISC_RESERVED_MASK;
+	sgx_encl_size_max_64 = 1ULL << ((edx >> 8) & 0xFF);
+	sgx_encl_size_max_32 = 1ULL << (edx & 0xFF);
+
+	cpuid_count(SGX_CPUID, 1, &eax, &ebx, &ecx, &edx);
+
+	attr_mask = (((u64)ebx) << 32) + (u64)eax;
+	sgx_attributes_reserved_mask = ~attr_mask | SGX_ATTR_RESERVED_MASK;
+
+	if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+		xfrm_mask = (((u64)edx) << 32) + (u64)ecx;
+
+		for (i = 2; i < 64; i++) {
+			cpuid_count(0x0D, i, &eax, &ebx, &ecx, &edx);
+			if ((1 << i) & xfrm_mask)
+				sgx_xsave_size_tbl[i] = eax + ebx;
+		}
+
+		sgx_xfrm_reserved_mask = ~xfrm_mask;
+	}
+
+	ret = misc_register(&sgx_dev_enclave);
+	if (ret) {
+		pr_err("Creating /dev/sgx/enclave failed with %d.\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h
new file mode 100644
index 000000000000..f7ce40dedc91
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/driver.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+#ifndef __ARCH_SGX_DRIVER_H__
+#define __ARCH_SGX_DRIVER_H__
+
+#include <crypto/hash.h>
+#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
+#include <linux/radix-tree.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include "sgx.h"
+
+#define SGX_EINIT_SPIN_COUNT	20
+#define SGX_EINIT_SLEEP_COUNT	50
+#define SGX_EINIT_SLEEP_TIME	20
+
+extern u64 sgx_encl_size_max_32;
+extern u64 sgx_encl_size_max_64;
+extern u32 sgx_misc_reserved_mask;
+extern u64 sgx_attributes_reserved_mask;
+extern u64 sgx_xfrm_reserved_mask;
+extern u32 sgx_xsave_size_tbl[64];
+
+long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+
+int sgx_drv_init(void);
+
+#endif /* __ARCH_X86_SGX_DRIVER_H__ */
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
new file mode 100644
index 000000000000..af5df6bc58f3
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-18 Intel Corporation.
+
+#include <linux/lockdep.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/shmem_fs.h>
+#include <linux/suspend.h>
+#include <linux/sched/mm.h>
+#include "arch.h"
+#include "encl.h"
+#include "encls.h"
+#include "sgx.h"
+
+static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
+						unsigned long addr)
+{
+	struct sgx_encl_page *entry;
+	unsigned int flags;
+
+	/* If process was forked, VMA is still there but vm_private_data is set
+	 * to NULL.
+	 */
+	if (!encl)
+		return ERR_PTR(-EFAULT);
+
+	flags = atomic_read(&encl->flags);
+
+	if ((flags & SGX_ENCL_DEAD) || !(flags & SGX_ENCL_INITIALIZED))
+		return ERR_PTR(-EFAULT);
+
+	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+	if (!entry)
+		return ERR_PTR(-EFAULT);
+
+	/* Page is already resident in the EPC. */
+	if (entry->epc_page)
+		return entry;
+
+	return ERR_PTR(-EFAULT);
+}
+
+static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
+				     struct mm_struct *mm)
+{
+	struct sgx_encl_mm *encl_mm =
+		container_of(mn, struct sgx_encl_mm, mmu_notifier);
+	struct sgx_encl_mm *tmp = NULL;
+
+	/*
+	 * The enclave itself can remove encl_mm.  Note, objects can't be moved
+	 * off an RCU protected list, but deletion is ok.
+	 */
+	spin_lock(&encl_mm->encl->mm_lock);
+	list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
+		if (tmp == encl_mm) {
+			list_del_rcu(&encl_mm->list);
+			break;
+		}
+	}
+	spin_unlock(&encl_mm->encl->mm_lock);
+
+	if (tmp == encl_mm) {
+		synchronize_srcu(&encl_mm->encl->srcu);
+		mmu_notifier_put(mn);
+	}
+}
+
+static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
+{
+	struct sgx_encl_mm *encl_mm =
+		container_of(mn, struct sgx_encl_mm, mmu_notifier);
+
+	kfree(encl_mm);
+}
+
+static const struct mmu_notifier_ops sgx_mmu_notifier_ops = {
+	.release		= sgx_mmu_notifier_release,
+	.free_notifier		= sgx_mmu_notifier_free,
+};
+
+static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
+					    struct mm_struct *mm)
+{
+	struct sgx_encl_mm *encl_mm = NULL;
+	struct sgx_encl_mm *tmp;
+	int idx;
+
+	idx = srcu_read_lock(&encl->srcu);
+
+	list_for_each_entry_rcu(tmp, &encl->mm_list, list) {
+		if (tmp->mm == mm) {
+			encl_mm = tmp;
+			break;
+		}
+	}
+
+	srcu_read_unlock(&encl->srcu, idx);
+
+	return encl_mm;
+}
+
+int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
+{
+	struct sgx_encl_mm *encl_mm;
+	int ret;
+
+	/* mm_list can be accessed only by a single thread at a time. */
+	mmap_assert_write_locked(mm);
+
+	if (atomic_read(&encl->flags) & SGX_ENCL_DEAD)
+		return -EINVAL;
+
+	/*
+	 * mm_structs are kept on mm_list until the mm or the enclave dies,
+	 * i.e. once an mm is off the list, it's gone for good, therefore it's
+	 * impossible to get a false positive on @mm due to a stale mm_list.
+	 */
+	if (sgx_encl_find_mm(encl, mm))
+		return 0;
+
+	encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL);
+	if (!encl_mm)
+		return -ENOMEM;
+
+	encl_mm->encl = encl;
+	encl_mm->mm = mm;
+	encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
+
+	ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm);
+	if (ret) {
+		kfree(encl_mm);
+		return ret;
+	}
+
+	spin_lock(&encl->mm_lock);
+	list_add_rcu(&encl_mm->list, &encl->mm_list);
+	spin_unlock(&encl->mm_lock);
+
+	return 0;
+}
+
+static void sgx_vma_open(struct vm_area_struct *vma)
+{
+	struct sgx_encl *encl = vma->vm_private_data;
+
+	if (!encl)
+		return;
+
+	if (sgx_encl_mm_add(encl, vma->vm_mm))
+		vma->vm_private_data = NULL;
+}
+
+static unsigned int sgx_vma_fault(struct vm_fault *vmf)
+{
+	unsigned long addr = (unsigned long)vmf->address;
+	struct vm_area_struct *vma = vmf->vma;
+	struct sgx_encl *encl = vma->vm_private_data;
+	struct sgx_encl_page *entry;
+	int ret = VM_FAULT_NOPAGE;
+	unsigned long pfn;
+
+	if (!encl)
+		return VM_FAULT_SIGBUS;
+
+	mutex_lock(&encl->lock);
+
+	entry = sgx_encl_load_page(encl, addr);
+	if (IS_ERR(entry)) {
+		if (unlikely(PTR_ERR(entry) != -EBUSY))
+			ret = VM_FAULT_SIGBUS;
+
+		goto out;
+	}
+
+	if (!follow_pfn(vma, addr, &pfn))
+		goto out;
+
+	ret = vmf_insert_pfn(vma, addr, PFN_DOWN(entry->epc_page->desc));
+	if (ret != VM_FAULT_NOPAGE) {
+		ret = VM_FAULT_SIGBUS;
+		goto out;
+	}
+
+out:
+	mutex_unlock(&encl->lock);
+	return ret;
+}
+
+/**
+ * sgx_encl_may_map() - Check if a requested VMA mapping is allowed
+ * @encl:		an enclave
+ * @start:		lower bound of the address range, inclusive
+ * @end:		upper bound of the address range, exclusive
+ * @vm_prot_bits:	requested protections of the address range
+ *
+ * Iterate through the enclave pages contained within [@start, @end) to verify
+ * the permissions requested by @vm_prot_bits do not exceed that of any enclave
+ * page to be mapped.
+ *
+ * Return:
+ *   0 on success,
+ *   -EACCES if VMA permissions exceed enclave page permissions
+ */
+int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
+		     unsigned long end, unsigned long vm_flags)
+{
+	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+	unsigned long idx_start = PFN_DOWN(start);
+	unsigned long idx_end = PFN_DOWN(end - 1);
+	struct sgx_encl_page *page;
+	XA_STATE(xas, &encl->page_array, idx_start);
+
+	/*
+	 * Disallow RIE tasks as their VMA permissions might conflict with the
+	 * enclave page permissions.
+	 */
+	if (!!(current->personality & READ_IMPLIES_EXEC))
+		return -EACCES;
+
+	xas_for_each(&xas, page, idx_end)
+		if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
+			return -EACCES;
+
+	return 0;
+}
+
+static int sgx_vma_mprotect(struct vm_area_struct *vma,
+			    struct vm_area_struct **pprev, unsigned long start,
+			    unsigned long end, unsigned long newflags)
+{
+	int ret;
+
+	ret = sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
+	if (ret)
+		return ret;
+
+	return mprotect_fixup(vma, pprev, start, end, newflags);
+}
+
+const struct vm_operations_struct sgx_vm_ops = {
+	.open = sgx_vma_open,
+	.fault = sgx_vma_fault,
+	.mprotect = sgx_vma_mprotect,
+};
+
+/**
+ * sgx_encl_find - find an enclave
+ * @mm:		mm struct of the current process
+ * @addr:	address in the ELRANGE
+ * @vma:	the resulting VMA
+ *
+ * Find an enclave identified by the given address. Give back a VMA that is
+ * part of the enclave and located in that address. The VMA is given back if it
+ * is a proper enclave VMA even if an &sgx_encl instance does not exist yet
+ * (enclave creation has not been performed).
+ *
+ * Return:
+ *   0 on success,
+ *   -EINVAL if an enclave was not found,
+ *   -ENOENT if the enclave has not been created yet
+ */
+int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
+		  struct vm_area_struct **vma)
+{
+	struct vm_area_struct *result;
+	struct sgx_encl *encl;
+
+	result = find_vma(mm, addr);
+	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
+		return -EINVAL;
+
+	encl = result->vm_private_data;
+	*vma = result;
+
+	return encl ? 0 : -ENOENT;
+}
+
+/**
+ * sgx_encl_destroy() - destroy enclave resources
+ * @encl:	an &sgx_encl instance
+ */
+void sgx_encl_destroy(struct sgx_encl *encl)
+{
+	struct sgx_encl_page *entry;
+	unsigned long index;
+
+	atomic_or(SGX_ENCL_DEAD, &encl->flags);
+
+	xa_for_each(&encl->page_array, index, entry) {
+		if (entry->epc_page) {
+			sgx_free_epc_page(entry->epc_page);
+			encl->secs_child_cnt--;
+			entry->epc_page = NULL;
+		}
+
+		kfree(entry);
+	}
+
+	xa_destroy(&encl->page_array);
+
+	if (!encl->secs_child_cnt && encl->secs.epc_page) {
+		sgx_free_epc_page(encl->secs.epc_page);
+		encl->secs.epc_page = NULL;
+	}
+}
+
+/**
+ * sgx_encl_release - Destroy an enclave instance
+ * @kref:	address of a kref inside &sgx_encl
+ *
+ * Used together with kref_put(). Frees all the resources associated with the
+ * enclave and the instance itself.
+ */
+void sgx_encl_release(struct kref *ref)
+{
+	struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
+
+	sgx_encl_destroy(encl);
+
+	if (encl->backing)
+		fput(encl->backing);
+
+	cleanup_srcu_struct(&encl->srcu);
+
+	WARN_ON_ONCE(!list_empty(&encl->mm_list));
+
+	/* Detect EPC page leak's. */
+	WARN_ON_ONCE(encl->secs_child_cnt);
+	WARN_ON_ONCE(encl->secs.epc_page);
+
+	kfree(encl);
+}
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
new file mode 100644
index 000000000000..74ad6c4da783
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/**
+ * Copyright(c) 2016-19 Intel Corporation.
+ */
+#ifndef _X86_ENCL_H
+#define _X86_ENCL_H
+
+#include <linux/cpumask.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/srcu.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+#include "sgx.h"
+
+/**
+ * enum sgx_encl_page_desc - defines bits for an enclave page's descriptor
+ * %SGX_ENCL_PAGE_ADDR_MASK:		Holds the virtual address of the page.
+ *
+ * The page address for SECS is zero and is used by the subsystem to recognize
+ * the SECS page.
+ */
+enum sgx_encl_page_desc {
+	/* Bits 11:3 are available when the page is not swapped. */
+	SGX_ENCL_PAGE_ADDR_MASK		= PAGE_MASK,
+};
+
+#define SGX_ENCL_PAGE_ADDR(page) \
+	((page)->desc & SGX_ENCL_PAGE_ADDR_MASK)
+
+struct sgx_encl_page {
+	unsigned long desc;
+	unsigned long vm_max_prot_bits;
+	struct sgx_epc_page *epc_page;
+	struct sgx_encl *encl;
+};
+
+enum sgx_encl_flags {
+	SGX_ENCL_CREATED	= BIT(0),
+	SGX_ENCL_INITIALIZED	= BIT(1),
+	SGX_ENCL_DEBUG		= BIT(2),
+	SGX_ENCL_DEAD		= BIT(3),
+	SGX_ENCL_IOCTL		= BIT(4),
+};
+
+struct sgx_encl_mm {
+	struct sgx_encl *encl;
+	struct mm_struct *mm;
+	struct list_head list;
+	struct mmu_notifier mmu_notifier;
+};
+
+struct sgx_encl {
+	atomic_t flags;
+	u64 secs_attributes;
+	u64 allowed_attributes;
+	unsigned int page_cnt;
+	unsigned int secs_child_cnt;
+	struct mutex lock;
+	struct list_head mm_list;
+	spinlock_t mm_lock;
+	struct file *backing;
+	struct kref refcount;
+	struct srcu_struct srcu;
+	unsigned long base;
+	unsigned long size;
+	unsigned long ssaframesize;
+	struct xarray page_array;
+	struct sgx_encl_page secs;
+	cpumask_t cpumask;
+};
+
+extern const struct vm_operations_struct sgx_vm_ops;
+
+int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
+		  struct vm_area_struct **vma);
+void sgx_encl_destroy(struct sgx_encl *encl);
+void sgx_encl_release(struct kref *ref);
+int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
+int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
+		     unsigned long end, unsigned long vm_flags);
+
+#endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 97c6895fb6c9..4137254fb29e 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -9,6 +9,8 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/slab.h>
+#include "driver.h"
+#include "encl.h"
 #include "encls.h"
 
 struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
@@ -260,6 +262,8 @@ static bool __init sgx_page_cache_init(void)
 
 static void __init sgx_init(void)
 {
+	int ret;
+
 	if (!boot_cpu_has(X86_FEATURE_SGX))
 		return;
 
@@ -269,8 +273,15 @@ static void __init sgx_init(void)
 	if (!sgx_page_reclaimer_init())
 		goto err_page_cache;
 
+	ret = sgx_drv_init();
+	if (ret)
+		goto err_kthread;
+
 	return;
 
+err_kthread:
+	kthread_stop(ksgxswapd_tsk);
+
 err_page_cache:
 	sgx_page_cache_teardown();
 }
-- 
2.25.1


^ permalink raw reply related

* [PATCH v36 15/24] x86/sgx: Allow a limited use of ATTRIBUTE.PROVISIONKEY for attestation
From: Jarkko Sakkinen @ 2020-07-16 13:52 UTC (permalink / raw)
  To: x86, linux-sgx
  Cc: linux-kernel, Jarkko Sakkinen, linux-security-module,
	Jethro Beekman, Andy Lutomirski, akpm, andriy.shevchenko, asapek,
	bp, cedric.xing, chenalexchen, conradparker, cyhanish,
	dave.hansen, haitao.huang, josh, kai.huang, kai.svahn, kmoy,
	ludloff, nhorman, npmccallum, puiterwijk, rientjes,
	sean.j.christopherson, tglx, yaozhangx
In-Reply-To: <20200716135303.276442-1-jarkko.sakkinen@linux.intel.com>

Provisioning Certification Enclave (PCE), the root of trust for other
enclaves, generates a signing key from a fused key called Provisioning
Certification Key. PCE can then use this key to certify an attestation key
of a Quoting Enclave (QE), e.g. we get the chain of trust down to the
hardware if the Intel signed PCE is used.

To use the needed keys, ATTRIBUTE.PROVISIONKEY is required but should be
only allowed for those who actually need it so that only the trusted
parties can certify QE's.

Obviously the attestation service should know the public key of the used
PCE and that way detect illegit attestation, but whitelisting the legit
users still adds an additional layer of defence.

Add new device file called /dev/sgx/provision. The sole purpose of this
file is to provide file descriptors that act as privilege tokens to allow
to build enclaves with ATTRIBUTE.PROVISIONKEY set. A new ioctl called
SGX_IOC_ENCLAVE_SET_ATTRIBUTE is used to assign this token to an enclave.

Cc: linux-security-module@vger.kernel.org
Acked-by: Jethro Beekman <jethro@fortanix.com>
Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/include/uapi/asm/sgx.h  | 11 ++++++++
 arch/x86/kernel/cpu/sgx/driver.c | 18 ++++++++++++
 arch/x86/kernel/cpu/sgx/driver.h |  2 ++
 arch/x86/kernel/cpu/sgx/ioctl.c  | 47 ++++++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+)

diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 5edb08ab8fd0..57d0d30c79b3 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -25,6 +25,8 @@ enum sgx_page_flags {
 	_IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages)
 #define SGX_IOC_ENCLAVE_INIT \
 	_IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init)
+#define SGX_IOC_ENCLAVE_SET_ATTRIBUTE \
+	_IOW(SGX_MAGIC, 0x03, struct sgx_enclave_set_attribute)
 
 /**
  * struct sgx_enclave_create - parameter structure for the
@@ -63,4 +65,13 @@ struct sgx_enclave_init {
 	__u64 sigstruct;
 };
 
+/**
+ * struct sgx_enclave_set_attribute - parameter structure for the
+ *				      %SGX_IOC_ENCLAVE_SET_ATTRIBUTE ioctl
+ * @attribute_fd:	file handle of the attribute file in the securityfs
+ */
+struct sgx_enclave_set_attribute {
+	__u64 attribute_fd;
+};
+
 #endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 5559bc18de41..b9af330a16fa 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -138,6 +138,10 @@ static const struct file_operations sgx_encl_fops = {
 	.get_unmapped_area	= sgx_get_unmapped_area,
 };
 
+const struct file_operations sgx_provision_fops = {
+	.owner			= THIS_MODULE,
+};
+
 static struct miscdevice sgx_dev_enclave = {
 	.minor = MISC_DYNAMIC_MINOR,
 	.name = "enclave",
@@ -145,6 +149,13 @@ static struct miscdevice sgx_dev_enclave = {
 	.fops = &sgx_encl_fops,
 };
 
+static struct miscdevice sgx_dev_provision = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "provision",
+	.nodename = "sgx/provision",
+	.fops = &sgx_provision_fops,
+};
+
 int __init sgx_drv_init(void)
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -185,5 +196,12 @@ int __init sgx_drv_init(void)
 		return ret;
 	}
 
+	ret = misc_register(&sgx_dev_provision);
+	if (ret) {
+		pr_err("Creating /dev/sgx/provision failed with %d.\n", ret);
+		misc_deregister(&sgx_dev_enclave);
+		return ret;
+	}
+
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h
index e4063923115b..72747d01c046 100644
--- a/arch/x86/kernel/cpu/sgx/driver.h
+++ b/arch/x86/kernel/cpu/sgx/driver.h
@@ -23,6 +23,8 @@ extern u64 sgx_attributes_reserved_mask;
 extern u64 sgx_xfrm_reserved_mask;
 extern u32 sgx_xsave_size_tbl[64];
 
+extern const struct file_operations sgx_provision_fops;
+
 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 
 int sgx_drv_init(void);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 3444de955191..95b0a1e62ea7 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -669,6 +669,50 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
 	return ret;
 }
 
+/**
+ * sgx_ioc_enclave_set_attribute - handler for %SGX_IOC_ENCLAVE_SET_ATTRIBUTE
+ * @filep:	open file to /dev/sgx
+ * @arg:	userspace pointer to a struct sgx_enclave_set_attribute instance
+ *
+ * Mark the enclave as being allowed to access a restricted attribute bit.
+ * The requested attribute is specified via the attribute_fd field in the
+ * provided struct sgx_enclave_set_attribute.  The attribute_fd must be a
+ * handle to an SGX attribute file, e.g. "/dev/sgx/provision".
+ *
+ * Failure to explicitly request access to a restricted attribute will cause
+ * sgx_ioc_enclave_init() to fail.  Currently, the only restricted attribute
+ * is access to the PROVISION_KEY.
+ *
+ * Note, access to the EINITTOKEN_KEY is disallowed entirely.
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+static long sgx_ioc_enclave_set_attribute(struct sgx_encl *encl,
+					  void __user *arg)
+{
+	struct sgx_enclave_set_attribute params;
+	struct file *attribute_file;
+	int ret;
+
+	if (copy_from_user(&params, arg, sizeof(params)))
+		return -EFAULT;
+
+	attribute_file = fget(params.attribute_fd);
+	if (!attribute_file)
+		return -EINVAL;
+
+	if (attribute_file->f_op != &sgx_provision_fops) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	encl->allowed_attributes |= SGX_ATTR_PROVISIONKEY;
+	ret = 0;
+
+out:
+	fput(attribute_file);
+	return ret;
+}
 
 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 {
@@ -694,6 +738,9 @@ long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 	case SGX_IOC_ENCLAVE_INIT:
 		ret = sgx_ioc_enclave_init(encl, (void __user *)arg);
 		break;
+	case SGX_IOC_ENCLAVE_SET_ATTRIBUTE:
+		ret = sgx_ioc_enclave_set_attribute(encl, (void __user *)arg);
+		break;
 	default:
 		ret = -ENOIOCTLCMD;
 		break;
-- 
2.25.1


^ permalink raw reply related

* [PATCH 16/16] capsh.c: Spelling fixes in usage() message
From: Michael Kerrisk (man-pages) @ 2020-07-16 10:18 UTC (permalink / raw)
  To: mtk.manpages, Andrew G . Morgan; +Cc: linux-security-module
In-Reply-To: <20200716101827.162793-1-mtk.manpages@gmail.com>

Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
---
 progs/capsh.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/progs/capsh.c b/progs/capsh.c
index 94bf57d..7bed98e 100644
--- a/progs/capsh.c
+++ b/progs/capsh.c
@@ -879,10 +879,10 @@ int main(int argc, char *argv[], char *envp[])
 		   "  --delamb=xxx   remove xxx,... capabilities from ambient\n"
 		   "  --noamb        reset (drop) all ambient capabilities\n"
 		   "  --caps=xxx     set caps as per cap_from_text()\n"
-		   "  --inh=xxx      set xxx,.. inheritiable set\n"
+		   "  --inh=xxx      set xxx,.. inheritable set\n"
 		   "  --secbits=<n>  write a new value for securebits\n"
 		   "  --iab=...      use cap_iab_from_text() to set iab\n"
-		   "  --keep=<n>     set keep-capabability bit to <n>\n"
+		   "  --keep=<n>     set keep-capability bit to <n>\n"
 		   "  --uid=<n>      set uid to <n> (hint: id <username>)\n"
 		   "  --cap-uid=<n>  libcap cap_setuid() to change uid\n"
 		   "  --is-uid=<n>   exit 1 if uid != <n>\n"
-- 
2.26.2


^ permalink raw reply related

* [PATCH 15/16] Various source files: spelling fixes on comments
From: Michael Kerrisk (man-pages) @ 2020-07-16 10:18 UTC (permalink / raw)
  To: mtk.manpages, Andrew G . Morgan; +Cc: linux-security-module
In-Reply-To: <20200716101827.162793-1-mtk.manpages@gmail.com>

Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
---
 libcap/cap_proc.c                      | 6 +++---
 libcap/include/uapi/linux/capability.h | 2 +-
 libcap/include/uapi/linux/securebits.h | 2 +-
 psx/include/sys/psx_syscall.h          | 2 +-
 psx/psx.c                              | 6 +++---
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/libcap/cap_proc.c b/libcap/cap_proc.c
index 1c97beb..3929f66 100644
--- a/libcap/cap_proc.c
+++ b/libcap/cap_proc.c
@@ -75,7 +75,7 @@ static struct syscaller_s singlethread = {
 static int _libcap_overrode_syscalls = 1;
 
 /*
- * psx_load_syscalls() is weakly defined so we can have it overriden
+ * psx_load_syscalls() is weakly defined so we can have it overridden
  * by libpsx if that library is linked. Specifically, when libcap
  * calls psx_load_sycalls() it is prepared to override the default
  * values for the syscalls that libcap uses to change security state.
@@ -238,7 +238,7 @@ cap_t cap_get_pid(pid_t pid)
 
 /*
  * set the caps on a specific process/pg etc.. The kernel has long
- * since deprecated this asynchronus interface. DON'T EXPECT THIS TO
+ * since deprecated this asynchronous interface. DON'T EXPECT THIS TO
  * EVER WORK AGAIN.
  */
 
@@ -468,7 +468,7 @@ static int _cap_set_mode(struct syscaller_s *sc, cap_mode_t flavor)
  * set, these modes cannot be undone by the affected process tree and
  * can only be done by "cap_setpcap" permitted processes. Note, a side
  * effect of this function, whether it succeeds or fails, is to clear
- * atleast the CAP_EFFECTIVE flags for the current process.
+ * at least the CAP_EFFECTIVE flags for the current process.
  */
 int cap_set_mode(cap_mode_t flavor)
 {
diff --git a/libcap/include/uapi/linux/capability.h b/libcap/include/uapi/linux/capability.h
index 3281e73..b9ae18b 100644
--- a/libcap/include/uapi/linux/capability.h
+++ b/libcap/include/uapi/linux/capability.h
@@ -368,7 +368,7 @@ struct vfs_ns_cap_data {
 
 #define CAP_AUDIT_READ       37
 
-/* Alow system performace and observability privileged opeations using
+/* Alow system performance and observability privileged operations using
  * perf_events, i915_perf and other kernel subsystems. */
 
 #define CAP_PERFMON	     38
diff --git a/libcap/include/uapi/linux/securebits.h b/libcap/include/uapi/linux/securebits.h
index 6dae4e9..e9b1309 100644
--- a/libcap/include/uapi/linux/securebits.h
+++ b/libcap/include/uapi/linux/securebits.h
@@ -22,7 +22,7 @@
 #define SECBIT_NOROOT_LOCKED	(issecure_mask(SECURE_NOROOT_LOCKED))
 
 /* When set, setuid to/from uid 0 does not trigger capability-"fixup".
-   When unset, to provide compatiblility with old programs relying on
+   When unset, to provide compatibility with old programs relying on
    set*uid to gain/lose privilege, transitions to/from uid 0 cause
    capabilities to be gained/lost. */
 #define SECURE_NO_SETUID_FIXUP		2
diff --git a/psx/include/sys/psx_syscall.h b/psx/include/sys/psx_syscall.h
index c089a88..8044fbd 100644
--- a/psx/include/sys/psx_syscall.h
+++ b/psx/include/sys/psx_syscall.h
@@ -35,7 +35,7 @@ int __real_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
 
 /*
  * psx_syscall performs the specified syscall on all psx registered
- * threads. The mecanism by which this occurs is much less efficient
+ * threads. The mechanism by which this occurs is much less efficient
  * than a standard system call on Linux, so it should only be used
  * when POSIX semantics are required to change process relevant
  * security state.
diff --git a/psx/psx.c b/psx/psx.c
index cabd342..b494072 100644
--- a/psx/psx.c
+++ b/psx/psx.c
@@ -29,7 +29,7 @@
 #include <sys/syscall.h>
 
 /*
- * psx_load_syscalls() is weakly defined so we can have it overriden
+ * psx_load_syscalls() is weakly defined so we can have it overridden
  * by libpsx if it is linked. Specifically, when libcap calls
  * psx_load_sycalls it will override their defaut values. As can be
  * seen here this present function is a no-op. However, if libpsx is
@@ -272,7 +272,7 @@ static void _psx_forked_child(void) {
      * The only way we can get here is if state is _PSX_INFORK and was
      * previously _PSX_IDLE. However, none of the registered threads
      * exist in this newly minted child process, so we have to reset
-     * the tracking structure to avoid any confusion. We also skuttle
+     * the tracking structure to avoid any confusion. We also scuttle
      * any chance of the PSX API working on more than one thread in
      * the child by leaving the state as _PSX_INFORK. We do support
      * all psx_syscall()s by reverting to them being direct in the
@@ -343,7 +343,7 @@ static void _psx_exiting(void *node) {
 }
 
 /*
- * _psx_start_fn is a trampolene for the intended start function, it
+ * _psx_start_fn is a trampoline for the intended start function, it
  * is called blocked (_PSX_CREATE), but releases the block before
  * calling starter->fn. Before releasing the block, the TLS specific
  * attributes are initialized for use by the interrupt handler under
-- 
2.26.2


^ permalink raw reply related

* [PATCH 14/16] pam_cap.c: Spelling fix in output string
From: Michael Kerrisk (man-pages) @ 2020-07-16 10:18 UTC (permalink / raw)
  To: mtk.manpages, Andrew G . Morgan; +Cc: linux-security-module
In-Reply-To: <20200716101827.162793-1-mtk.manpages@gmail.com>

Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
---
 pam_cap/pam_cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pam_cap/pam_cap.c b/pam_cap/pam_cap.c
index 3030692..6927f7b 100644
--- a/pam_cap/pam_cap.c
+++ b/pam_cap/pam_cap.c
@@ -321,7 +321,7 @@ int pam_sm_authenticate(pam_handle_t *pamh, int flags,

     } else {

-	D(("there are no capabilities restrctions on this user"));
+	D(("there are no capabilities restrictions on this user"));
 	return PAM_IGNORE;

     }
-- 
2.26.2

^ permalink raw reply related

* [PATCH 13/16] Manual pages: capsh.1: Describe the argument of --secbits in more detail
From: Michael Kerrisk (man-pages) @ 2020-07-16 10:18 UTC (permalink / raw)
  To: mtk.manpages, Andrew G . Morgan; +Cc: linux-security-module
In-Reply-To: <20200716101827.162793-1-mtk.manpages@gmail.com>

The page currently gives the reader no clue about the format of the
argument.

Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
---
 doc/capsh.1 | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/capsh.1 b/doc/capsh.1
index fd6a5ba..0b987f0 100644
--- a/doc/capsh.1
+++ b/doc/capsh.1
@@ -164,6 +164,9 @@ the
 header file. The program will list these bits via the
 .B \-\-print
 command.
+The argument is expressed as a numeric bitmask,
+in any of the formats permitted by
+.BR strtoul (3).
 .TP
 .BI \-\-chroot= path
 Execute the
-- 
2.26.2

^ permalink raw reply related

* [PATCH 12/16] Manual pages: capsh.1: Update the capsh --decode example
From: Michael Kerrisk (man-pages) @ 2020-07-16 10:18 UTC (permalink / raw)
  To: mtk.manpages, Andrew G . Morgan; +Cc: linux-security-module
In-Reply-To: <20200716101827.162793-1-mtk.manpages@gmail.com>

With the update to the output of /proc/1/status in the previous patch,
the example no longer matches. So, add a different example.

Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
---
 doc/capsh.1 | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/doc/capsh.1 b/doc/capsh.1
index 916353a..fd6a5ba 100644
--- a/doc/capsh.1
+++ b/doc/capsh.1
@@ -206,15 +206,13 @@ CapAmb:	0000000000000000

 .fi
 This option provides a quick way to decode a capability vector
-represented in this form. For example, the missing capability from
-this effective set is 0x0100. By running:
+represented in this hexadecimal form.
+Here's an example that decodes the two lowest capability bits:
+.IP
 .nf
-
- capsh \-\-decode=0x0100
-
+$ \fBcapsh \-\-decode=3\fP
+0x0000000000000003=cap_chown,cap_dac_override
 .fi
-we observe that the missing capability is:
-.BR cap_setpcap .
 .TP
 .BI \-\-supports= xxx
 As the kernel evolves, more capabilities are added. This option can be used
-- 
2.26.2

^ permalink raw reply related

* [PATCH 11/16] Manual pages: capsh.1: Correct/update the Cap field example for /proc/PID/status
From: Michael Kerrisk (man-pages) @ 2020-07-16 10:18 UTC (permalink / raw)
  To: mtk.manpages, Andrew G . Morgan; +Cc: linux-security-module
In-Reply-To: <20200716101827.162793-1-mtk.manpages@gmail.com>

The /proc/1/status output shown for --decode=N is bogus
(e.g., ffffffffffffffff == 64 capability bits). Furthermore,
showing CAP_SETPCAP as missing from CapEff is historical,
and ceased to be actual more than 10 years ago.

Replace with a more current example, and also add the CapAmb field.

This change renders some of the following text obsolete;
that will be fixed in the next patch.

Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
---
 doc/capsh.1 | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/doc/capsh.1 b/doc/capsh.1
index 522e719..916353a 100644
--- a/doc/capsh.1
+++ b/doc/capsh.1
@@ -198,10 +198,11 @@ This is a convenience feature. If you look at
 there are some capability related fields of the following form:
 .nf

- CapInh:	0000000000000000
- CapPrm:	ffffffffffffffff
- CapEff:	fffffffffffffeff
- CapBnd:	ffffffffffffffff
+CapInh:	0000000000000000
+CapPrm:	0000003fffffffff
+CapEff:	0000003fffffffff
+CapBnd:	0000003fffffffff
+CapAmb:	0000000000000000

 .fi
 This option provides a quick way to decode a capability vector
-- 
2.26.2

^ permalink raw reply related

* [PATCH 10/16] Manual pages: capsh.1: Remove stray .TP macro
From: Michael Kerrisk (man-pages) @ 2020-07-16 10:18 UTC (permalink / raw)
  To: mtk.manpages, Andrew G . Morgan; +Cc: linux-security-module
In-Reply-To: <20200716101827.162793-1-mtk.manpages@gmail.com>

Signed-off-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
---
 doc/capsh.1 | 2 --
 1 file changed, 2 deletions(-)

diff --git a/doc/capsh.1 b/doc/capsh.1
index 24e80b7..522e719 100644
--- a/doc/capsh.1
+++ b/doc/capsh.1
@@ -249,8 +249,6 @@ Removes the specified ambient capability from the running process.
 .TP
 .B \-\-noamb
 Drops all ambient capabilities from the running process.
-.TP
-
 .SH "EXIT STATUS"
 Following successful execution the tool exits with status 0. Following
 an error, the tool immediately exits with status 1.
-- 
2.26.2

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox