Linux Security Modules development

Linux Security Modules development
 help / color / mirror / Atom feed

* Re: [PATCH v19 02/23] LSM: Create and manage the lsmblob data structure.
From: Stephen Smalley @ 2020-07-27 16:12 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: Casey Schaufler, James Morris, LSM List, SElinux list,
	John Johansen, linux-audit, Stephen Smalley
In-Reply-To: <20200724203226.16374-3-casey@schaufler-ca.com>

On Fri, Jul 24, 2020 at 4:35 PM Casey Schaufler <casey@schaufler-ca.com> wrote:
>
> When more than one security module is exporting data to
> audit and networking sub-systems a single 32 bit integer
> is no longer sufficient to represent the data. Add a
> structure to be used instead.
>
> The lsmblob structure is currently an array of
> u32 "secids". There is an entry for each of the
> security modules built into the system that would
> use secids if active. The system assigns the module
> a "slot" when it registers hooks. If modules are
> compiled in but not registered there will be unused
> slots.
>
> A new lsm_id structure, which contains the name
> of the LSM and its slot number, is created. There
> is an instance for each LSM, which assigns the name
> and passes it to the infrastructure to set the slot.
>
> The audit rules data is expanded to use an array of
> security module data rather than a single instance.
> Because IMA uses the audit rule functions it is
> affected as well.
>
> Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
> Acked-by: Paul Moore <paul@paul-moore.com>
> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>

With CONFIG_BPF_LSM=y:

security/bpf/hooks.c: In function ‘bpf_lsm_init’:
security/bpf/hooks.c:18:63: error: passing argument 3 of
‘security_add_hooks’ from incompatible pointer type
[-Werror=incompatible-pointer-types]
   18 |  security_add_hooks(bpf_lsm_hooks, ARRAY_SIZE(bpf_lsm_hooks), "bpf");
      |                                                               ^~~~~
      |                                                               |
      |                                                               char *
In file included from security/bpf/hooks.c:6:
./include/linux/lsm_hooks.h:1592:26: note: expected ‘struct lsm_id *’
but argument is of type ‘char *’
 1592 |           struct lsm_id *lsmid);
      |           ~~~~~~~~~~~~~~~^~~~~

^ permalink raw reply

* [PATCH v2 2/2] LSM: SafeSetID: Add GID security policy handling
From: Micah Morton @ 2020-07-27 15:20 UTC (permalink / raw)
  To: linux-security-module
  Cc: keescook, casey, paul, stephen.smalley.work, serge, jmorris,
	Thomas Cedeno, Micah Morton
In-Reply-To: <20200721170533.GA6653@mail.hallyn.com>

From: Thomas Cedeno <thomascedeno@google.com>

The SafeSetID LSM has functionality for restricting setuid() calls based
on its configured security policies. This patch adds the analogous
functionality for setgid() calls. This is mostly a copy-and-paste change
with some code deduplication, plus slight modifications/name changes to
the policy-rule-related structs (now contain GID rules in addition to
the UID ones) and some type generalization since SafeSetID now needs to
deal with kgid_t and kuid_t types.

Signed-off-by: Thomas Cedeno <thomascedeno@google.com>
Signed-off-by: Micah Morton <mortonm@chromium.org>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
---
NOTE: this v2 patch features very minor tweaks to combine two if/else
statements into one and eliminate a redundant sanity check.
 security/safesetid/lsm.c        | 178 ++++++++++++++++++++++--------
 security/safesetid/lsm.h        |  38 +++++--
 security/safesetid/securityfs.c | 190 +++++++++++++++++++++++---------
 3 files changed, 300 insertions(+), 106 deletions(-)

diff --git a/security/safesetid/lsm.c b/security/safesetid/lsm.c
index 7760019ad35d..787a98e82f1e 100644
--- a/security/safesetid/lsm.c
+++ b/security/safesetid/lsm.c
@@ -24,20 +24,36 @@
 /* Flag indicating whether initialization completed */
 int safesetid_initialized;
 
-struct setuid_ruleset __rcu *safesetid_setuid_rules;
+struct setid_ruleset __rcu *safesetid_setuid_rules;
+struct setid_ruleset __rcu *safesetid_setgid_rules;
+
 
 /* Compute a decision for a transition from @src to @dst under @policy. */
-enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy,
-		kuid_t src, kuid_t dst)
+enum sid_policy_type _setid_policy_lookup(struct setid_ruleset *policy,
+		kid_t src, kid_t dst)
 {
-	struct setuid_rule *rule;
+	struct setid_rule *rule;
 	enum sid_policy_type result = SIDPOL_DEFAULT;
 
-	hash_for_each_possible(policy->rules, rule, next, __kuid_val(src)) {
-		if (!uid_eq(rule->src_uid, src))
-			continue;
-		if (uid_eq(rule->dst_uid, dst))
-			return SIDPOL_ALLOWED;
+	if (policy->type == UID) {
+		hash_for_each_possible(policy->rules, rule, next, __kuid_val(src.uid)) {
+			if (!uid_eq(rule->src_id.uid, src.uid))
+				continue;
+			if (uid_eq(rule->dst_id.uid, dst.uid))
+				return SIDPOL_ALLOWED;
+			result = SIDPOL_CONSTRAINED;
+		}
+	} else if (policy->type == GID) {
+		hash_for_each_possible(policy->rules, rule, next, __kgid_val(src.gid)) {
+			if (!gid_eq(rule->src_id.gid, src.gid))
+				continue;
+			if (gid_eq(rule->dst_id.gid, dst.gid)){
+				return SIDPOL_ALLOWED;
+			}
+			result = SIDPOL_CONSTRAINED;
+		}
+	} else {
+		/* Should not reach here, report the ID as contrainsted */
 		result = SIDPOL_CONSTRAINED;
 	}
 	return result;
@@ -47,15 +63,26 @@ enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy,
  * Compute a decision for a transition from @src to @dst under the active
  * policy.
  */
-static enum sid_policy_type setuid_policy_lookup(kuid_t src, kuid_t dst)
+static enum sid_policy_type setid_policy_lookup(kid_t src, kid_t dst, enum setid_type new_type)
 {
 	enum sid_policy_type result = SIDPOL_DEFAULT;
-	struct setuid_ruleset *pol;
+	struct setid_ruleset *pol;
 
 	rcu_read_lock();
-	pol = rcu_dereference(safesetid_setuid_rules);
-	if (pol)
-		result = _setuid_policy_lookup(pol, src, dst);
+	if (new_type == UID)
+		pol = rcu_dereference(safesetid_setuid_rules);
+	else if (new_type == GID)
+		pol = rcu_dereference(safesetid_setgid_rules);
+	else { /* Should not reach here */
+		result = SIDPOL_CONSTRAINED;
+		rcu_read_unlock();
+		return result;
+	}
+
+	if (pol) {
+		pol->type = new_type;
+		result = _setid_policy_lookup(pol, src, dst);
+	}
 	rcu_read_unlock();
 	return result;
 }
@@ -65,8 +92,8 @@ static int safesetid_security_capable(const struct cred *cred,
 				      int cap,
 				      unsigned int opts)
 {
-	/* We're only interested in CAP_SETUID. */
-	if (cap != CAP_SETUID)
+	/* We're only interested in CAP_SETUID and CAP_SETGID. */
+	if (cap != CAP_SETUID && cap != CAP_SETGID)
 		return 0;
 
 	/*
@@ -77,45 +104,83 @@ static int safesetid_security_capable(const struct cred *cred,
 	if ((opts & CAP_OPT_INSETID) != 0)
 		return 0;
 
-	/*
-	 * If no policy applies to this task, allow the use of CAP_SETUID for
-	 * other purposes.
-	 */
-	if (setuid_policy_lookup(cred->uid, INVALID_UID) == SIDPOL_DEFAULT)
+	switch (cap) {
+	case CAP_SETUID:
+		/*
+		* If no policy applies to this task, allow the use of CAP_SETUID for
+		* other purposes.
+		*/
+		if (setid_policy_lookup((kid_t)cred->uid, INVALID_ID, UID) == SIDPOL_DEFAULT)
+			return 0;
+		/*
+		 * Reject use of CAP_SETUID for functionality other than calling
+		 * set*uid() (e.g. setting up userns uid mappings).
+		 */
+		pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n",
+			__kuid_val(cred->uid));
+		return -EPERM;
+		break;
+	case CAP_SETGID:
+		/*
+		* If no policy applies to this task, allow the use of CAP_SETGID for
+		* other purposes.
+		*/
+		if (setid_policy_lookup((kid_t)cred->gid, INVALID_ID, GID) == SIDPOL_DEFAULT)
+			return 0;
+		/*
+		 * Reject use of CAP_SETUID for functionality other than calling
+		 * set*gid() (e.g. setting up userns gid mappings).
+		 */
+		pr_warn("Operation requires CAP_SETGID, which is not available to GID %u for operations besides approved set*gid transitions\n",
+			__kuid_val(cred->uid));
+		return -EPERM;
+		break;
+	default:
+		/* Error, the only capabilities were checking for is CAP_SETUID/GID */
 		return 0;
-
-	/*
-	 * Reject use of CAP_SETUID for functionality other than calling
-	 * set*uid() (e.g. setting up userns uid mappings).
-	 */
-	pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n",
-		__kuid_val(cred->uid));
-	return -EPERM;
+		break;
+	}
+	return 0;
 }
 
 /*
  * Check whether a caller with old credentials @old is allowed to switch to
- * credentials that contain @new_uid.
+ * credentials that contain @new_id.
  */
-static bool uid_permitted_for_cred(const struct cred *old, kuid_t new_uid)
+static bool id_permitted_for_cred(const struct cred *old, kid_t new_id, enum setid_type new_type)
 {
 	bool permitted;
 
-	/* If our old creds already had this UID in it, it's fine. */
-	if (uid_eq(new_uid, old->uid) || uid_eq(new_uid, old->euid) ||
-	    uid_eq(new_uid, old->suid))
-		return true;
+	/* If our old creds already had this ID in it, it's fine. */
+	if (new_type == UID) {
+		if (uid_eq(new_id.uid, old->uid) || uid_eq(new_id.uid, old->euid) ||
+			uid_eq(new_id.uid, old->suid))
+			return true;
+	} else if (new_type == GID){
+		if (gid_eq(new_id.gid, old->gid) || gid_eq(new_id.gid, old->egid) ||
+			gid_eq(new_id.gid, old->sgid))
+			return true;
+	} else /* Error, new_type is an invalid type */
+		return false;
 
 	/*
 	 * Transitions to new UIDs require a check against the policy of the old
 	 * RUID.
 	 */
 	permitted =
-	    setuid_policy_lookup(old->uid, new_uid) != SIDPOL_CONSTRAINED;
+	    setid_policy_lookup((kid_t)old->uid, new_id, new_type) != SIDPOL_CONSTRAINED;
+
 	if (!permitted) {
-		pr_warn("UID transition ((%d,%d,%d) -> %d) blocked\n",
-			__kuid_val(old->uid), __kuid_val(old->euid),
-			__kuid_val(old->suid), __kuid_val(new_uid));
+		if (new_type == UID) {
+			pr_warn("UID transition ((%d,%d,%d) -> %d) blocked\n",
+				__kuid_val(old->uid), __kuid_val(old->euid),
+				__kuid_val(old->suid), __kuid_val(new_id.uid));
+		} else if (new_type == GID) {
+			pr_warn("GID transition ((%d,%d,%d) -> %d) blocked\n",
+				__kgid_val(old->gid), __kgid_val(old->egid),
+				__kgid_val(old->sgid), __kgid_val(new_id.gid));
+		} else /* Error, new_type is an invalid type */
+			return false;
 	}
 	return permitted;
 }
@@ -131,13 +196,37 @@ static int safesetid_task_fix_setuid(struct cred *new,
 {
 
 	/* Do nothing if there are no setuid restrictions for our old RUID. */
-	if (setuid_policy_lookup(old->uid, INVALID_UID) == SIDPOL_DEFAULT)
+	if (setid_policy_lookup((kid_t)old->uid, INVALID_ID, UID) == SIDPOL_DEFAULT)
+		return 0;
+
+	if (id_permitted_for_cred(old, (kid_t)new->uid, UID) &&
+	    id_permitted_for_cred(old, (kid_t)new->euid, UID) &&
+	    id_permitted_for_cred(old, (kid_t)new->suid, UID) &&
+	    id_permitted_for_cred(old, (kid_t)new->fsuid, UID))
+		return 0;
+
+	/*
+	 * Kill this process to avoid potential security vulnerabilities
+	 * that could arise from a missing whitelist entry preventing a
+	 * privileged process from dropping to a lesser-privileged one.
+	 */
+	force_sig(SIGKILL);
+	return -EACCES;
+}
+
+static int safesetid_task_fix_setgid(struct cred *new,
+				     const struct cred *old,
+				     int flags)
+{
+
+	/* Do nothing if there are no setgid restrictions for our old RGID. */
+	if (setid_policy_lookup((kid_t)old->gid, INVALID_ID, GID) == SIDPOL_DEFAULT)
 		return 0;
 
-	if (uid_permitted_for_cred(old, new->uid) &&
-	    uid_permitted_for_cred(old, new->euid) &&
-	    uid_permitted_for_cred(old, new->suid) &&
-	    uid_permitted_for_cred(old, new->fsuid))
+	if (id_permitted_for_cred(old, (kid_t)new->gid, GID) &&
+	    id_permitted_for_cred(old, (kid_t)new->egid, GID) &&
+	    id_permitted_for_cred(old, (kid_t)new->sgid, GID) &&
+	    id_permitted_for_cred(old, (kid_t)new->fsgid, GID))
 		return 0;
 
 	/*
@@ -151,6 +240,7 @@ static int safesetid_task_fix_setuid(struct cred *new,
 
 static struct security_hook_list safesetid_security_hooks[] = {
 	LSM_HOOK_INIT(task_fix_setuid, safesetid_task_fix_setuid),
+	LSM_HOOK_INIT(task_fix_setgid, safesetid_task_fix_setgid),
 	LSM_HOOK_INIT(capable, safesetid_security_capable)
 };
 
diff --git a/security/safesetid/lsm.h b/security/safesetid/lsm.h
index db6d16e6bbc3..bde8c43a3767 100644
--- a/security/safesetid/lsm.h
+++ b/security/safesetid/lsm.h
@@ -27,27 +27,47 @@ enum sid_policy_type {
 	SIDPOL_ALLOWED /* target ID explicitly allowed */
 };
 
+typedef union {
+	kuid_t uid;
+	kgid_t gid;
+} kid_t;
+
+enum setid_type {
+	UID,
+	GID
+};
+
 /*
- * Hash table entry to store safesetid policy signifying that 'src_uid'
- * can setuid to 'dst_uid'.
+ * Hash table entry to store safesetid policy signifying that 'src_id'
+ * can set*id to 'dst_id'.
  */
-struct setuid_rule {
+struct setid_rule {
 	struct hlist_node next;
-	kuid_t src_uid;
-	kuid_t dst_uid;
+	kid_t src_id;
+	kid_t dst_id;
+
+	/* Flag to signal if rule is for UID's or GID's */
+	enum setid_type type;
 };
 
 #define SETID_HASH_BITS 8 /* 256 buckets in hash table */
 
-struct setuid_ruleset {
+/* Extension of INVALID_UID/INVALID_GID for kid_t type */
+#define INVALID_ID (kid_t){.uid = INVALID_UID}
+
+struct setid_ruleset {
 	DECLARE_HASHTABLE(rules, SETID_HASH_BITS);
 	char *policy_str;
 	struct rcu_head rcu;
+
+	//Flag to signal if ruleset is for UID's or GID's
+	enum setid_type type;
 };
 
-enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy,
-		kuid_t src, kuid_t dst);
+enum sid_policy_type _setid_policy_lookup(struct setid_ruleset *policy,
+		kid_t src, kid_t dst);
 
-extern struct setuid_ruleset __rcu *safesetid_setuid_rules;
+extern struct setid_ruleset __rcu *safesetid_setuid_rules;
+extern struct setid_ruleset __rcu *safesetid_setgid_rules;
 
 #endif /* _SAFESETID_H */
diff --git a/security/safesetid/securityfs.c b/security/safesetid/securityfs.c
index f8bc574cea9c..d8e6f2cc42c5 100644
--- a/security/safesetid/securityfs.c
+++ b/security/safesetid/securityfs.c
@@ -19,22 +19,23 @@
 
 #include "lsm.h"
 
-static DEFINE_MUTEX(policy_update_lock);
+static DEFINE_MUTEX(uid_policy_update_lock);
+static DEFINE_MUTEX(gid_policy_update_lock);
 
 /*
- * In the case the input buffer contains one or more invalid UIDs, the kuid_t
+ * In the case the input buffer contains one or more invalid IDs, the kid_t
  * variables pointed to by @parent and @child will get updated but this
  * function will return an error.
  * Contents of @buf may be modified.
  */
 static int parse_policy_line(struct file *file, char *buf,
-	struct setuid_rule *rule)
+	struct setid_rule *rule)
 {
 	char *child_str;
 	int ret;
 	u32 parsed_parent, parsed_child;
 
-	/* Format of |buf| string should be <UID>:<UID>. */
+	/* Format of |buf| string should be <UID>:<UID> or <GID>:<GID> */
 	child_str = strchr(buf, ':');
 	if (child_str == NULL)
 		return -EINVAL;
@@ -49,20 +50,29 @@ static int parse_policy_line(struct file *file, char *buf,
 	if (ret)
 		return ret;
 
-	rule->src_uid = make_kuid(file->f_cred->user_ns, parsed_parent);
-	rule->dst_uid = make_kuid(file->f_cred->user_ns, parsed_child);
-	if (!uid_valid(rule->src_uid) || !uid_valid(rule->dst_uid))
+	if (rule->type == UID){
+		rule->src_id.uid = make_kuid(file->f_cred->user_ns, parsed_parent);
+		rule->dst_id.uid = make_kuid(file->f_cred->user_ns, parsed_child);
+		if (!uid_valid(rule->src_id.uid) || !uid_valid(rule->dst_id.uid))
+			return -EINVAL;
+	} else if (rule->type == GID){
+		rule->src_id.gid = make_kgid(file->f_cred->user_ns, parsed_parent);
+		rule->dst_id.gid = make_kgid(file->f_cred->user_ns, parsed_child);
+		if (!gid_valid(rule->src_id.gid) || !gid_valid(rule->dst_id.gid))
+			return -EINVAL;
+	} else {
+		/* Error, rule->type is an invalid type */
 		return -EINVAL;
-
+	}
 	return 0;
 }
 
 static void __release_ruleset(struct rcu_head *rcu)
 {
-	struct setuid_ruleset *pol =
-		container_of(rcu, struct setuid_ruleset, rcu);
+	struct setid_ruleset *pol =
+		container_of(rcu, struct setid_ruleset, rcu);
 	int bucket;
-	struct setuid_rule *rule;
+	struct setid_rule *rule;
 	struct hlist_node *tmp;
 
 	hash_for_each_safe(pol->rules, bucket, tmp, rule, next)
@@ -71,36 +81,55 @@ static void __release_ruleset(struct rcu_head *rcu)
 	kfree(pol);
 }
 
-static void release_ruleset(struct setuid_ruleset *pol)
-{
+static void release_ruleset(struct setid_ruleset *pol){
 	call_rcu(&pol->rcu, __release_ruleset);
 }
 
-static void insert_rule(struct setuid_ruleset *pol, struct setuid_rule *rule)
+static void insert_rule(struct setid_ruleset *pol, struct setid_rule *rule)
 {
-	hash_add(pol->rules, &rule->next, __kuid_val(rule->src_uid));
+	if (pol->type == UID)
+		hash_add(pol->rules, &rule->next, __kuid_val(rule->src_id.uid));
+	else if (pol->type == GID)
+		hash_add(pol->rules, &rule->next, __kgid_val(rule->src_id.gid));
+	else /* Error, pol->type is neither UID or GID */
+		return;
 }
 
-static int verify_ruleset(struct setuid_ruleset *pol)
+static int verify_ruleset(struct setid_ruleset *pol)
 {
 	int bucket;
-	struct setuid_rule *rule, *nrule;
+	struct setid_rule *rule, *nrule;
 	int res = 0;
 
 	hash_for_each(pol->rules, bucket, rule, next) {
-		if (_setuid_policy_lookup(pol, rule->dst_uid, INVALID_UID) ==
-		    SIDPOL_DEFAULT) {
-			pr_warn("insecure policy detected: uid %d is constrained but transitively unconstrained through uid %d\n",
-				__kuid_val(rule->src_uid),
-				__kuid_val(rule->dst_uid));
+		if (_setid_policy_lookup(pol, rule->dst_id, INVALID_ID) == SIDPOL_DEFAULT) {
+			if (pol->type == UID) {
+				pr_warn("insecure policy detected: uid %d is constrained but transitively unconstrained through uid %d\n",
+					__kuid_val(rule->src_id.uid),
+					__kuid_val(rule->dst_id.uid));
+			} else if (pol->type == GID) {
+				pr_warn("insecure policy detected: gid %d is constrained but transitively unconstrained through gid %d\n",
+					__kgid_val(rule->src_id.gid),
+					__kgid_val(rule->dst_id.gid));
+			} else { /* pol->type is an invalid type */
+				res = -EINVAL;
+				return res;
+			}
 			res = -EINVAL;
 
 			/* fix it up */
-			nrule = kmalloc(sizeof(struct setuid_rule), GFP_KERNEL);
+			nrule = kmalloc(sizeof(struct setid_rule), GFP_KERNEL);
 			if (!nrule)
 				return -ENOMEM;
-			nrule->src_uid = rule->dst_uid;
-			nrule->dst_uid = rule->dst_uid;
+			if (pol->type == UID){
+				nrule->src_id.uid = rule->dst_id.uid;
+				nrule->dst_id.uid = rule->dst_id.uid;
+				nrule->type = UID;
+			} else { /* pol->type must be GID if we've made it to here */
+				nrule->src_id.gid = rule->dst_id.gid;
+				nrule->dst_id.gid = rule->dst_id.gid;
+				nrule->type = GID;
+			}
 			insert_rule(pol, nrule);
 		}
 	}
@@ -108,16 +137,17 @@ static int verify_ruleset(struct setuid_ruleset *pol)
 }
 
 static ssize_t handle_policy_update(struct file *file,
-				    const char __user *ubuf, size_t len)
+				    const char __user *ubuf, size_t len, enum setid_type policy_type)
 {
-	struct setuid_ruleset *pol;
+	struct setid_ruleset *pol;
 	char *buf, *p, *end;
 	int err;
 
-	pol = kmalloc(sizeof(struct setuid_ruleset), GFP_KERNEL);
+	pol = kmalloc(sizeof(struct setid_ruleset), GFP_KERNEL);
 	if (!pol)
 		return -ENOMEM;
 	pol->policy_str = NULL;
+	pol->type = policy_type;
 	hash_init(pol->rules);
 
 	p = buf = memdup_user_nul(ubuf, len);
@@ -133,7 +163,7 @@ static ssize_t handle_policy_update(struct file *file,
 
 	/* policy lines, including the last one, end with \n */
 	while (*p != '\0') {
-		struct setuid_rule *rule;
+		struct setid_rule *rule;
 
 		end = strchr(p, '\n');
 		if (end == NULL) {
@@ -142,18 +172,18 @@ static ssize_t handle_policy_update(struct file *file,
 		}
 		*end = '\0';
 
-		rule = kmalloc(sizeof(struct setuid_rule), GFP_KERNEL);
+		rule = kmalloc(sizeof(struct setid_rule), GFP_KERNEL);
 		if (!rule) {
 			err = -ENOMEM;
 			goto out_free_buf;
 		}
 
+		rule->type = policy_type;
 		err = parse_policy_line(file, p, rule);
 		if (err)
 			goto out_free_rule;
 
-		if (_setuid_policy_lookup(pol, rule->src_uid, rule->dst_uid) ==
-		    SIDPOL_ALLOWED) {
+		if (_setid_policy_lookup(pol, rule->src_id, rule->dst_id) == SIDPOL_ALLOWED) {
 			pr_warn("bad policy: duplicate entry\n");
 			err = -EEXIST;
 			goto out_free_rule;
@@ -178,21 +208,31 @@ static ssize_t handle_policy_update(struct file *file,
 	 * What we really want here is an xchg() wrapper for RCU, but since that
 	 * doesn't currently exist, just use a spinlock for now.
 	 */
-	mutex_lock(&policy_update_lock);
-	pol = rcu_replace_pointer(safesetid_setuid_rules, pol,
-				  lockdep_is_held(&policy_update_lock));
-	mutex_unlock(&policy_update_lock);
+	if (policy_type == UID) {
+		mutex_lock(&uid_policy_update_lock);
+		pol = rcu_replace_pointer(safesetid_setuid_rules, pol,
+					  lockdep_is_held(&uid_policy_update_lock));
+		mutex_unlock(&uid_policy_update_lock);
+	} else if (policy_type == GID) {
+		mutex_lock(&gid_policy_update_lock);
+		pol = rcu_replace_pointer(safesetid_setgid_rules, pol,
+					  lockdep_is_held(&gid_policy_update_lock));
+		mutex_unlock(&gid_policy_update_lock);
+	} else {
+		/* Error, policy type is neither UID or GID */
+		pr_warn("error: bad policy type");
+	}
 	err = len;
 
 out_free_buf:
 	kfree(buf);
 out_free_pol:
 	if (pol)
-                release_ruleset(pol);
+		release_ruleset(pol);
 	return err;
 }
 
-static ssize_t safesetid_file_write(struct file *file,
+static ssize_t safesetid_uid_file_write(struct file *file,
 				    const char __user *buf,
 				    size_t len,
 				    loff_t *ppos)
@@ -203,38 +243,74 @@ static ssize_t safesetid_file_write(struct file *file,
 	if (*ppos != 0)
 		return -EINVAL;
 
-	return handle_policy_update(file, buf, len);
+	return handle_policy_update(file, buf, len, UID);
+}
+
+static ssize_t safesetid_gid_file_write(struct file *file,
+				    const char __user *buf,
+				    size_t len,
+				    loff_t *ppos)
+{
+	if (!file_ns_capable(file, &init_user_ns, CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (*ppos != 0)
+		return -EINVAL;
+
+	return handle_policy_update(file, buf, len, GID);
 }
 
 static ssize_t safesetid_file_read(struct file *file, char __user *buf,
-				   size_t len, loff_t *ppos)
+				   size_t len, loff_t *ppos, struct mutex *policy_update_lock, struct setid_ruleset* ruleset)
 {
 	ssize_t res = 0;
-	struct setuid_ruleset *pol;
+	struct setid_ruleset *pol;
 	const char *kbuf;
 
-	mutex_lock(&policy_update_lock);
-	pol = rcu_dereference_protected(safesetid_setuid_rules,
-					lockdep_is_held(&policy_update_lock));
+	mutex_lock(policy_update_lock);
+	pol = rcu_dereference_protected(ruleset, lockdep_is_held(&policy_update_lock));
 	if (pol) {
 		kbuf = pol->policy_str;
 		res = simple_read_from_buffer(buf, len, ppos,
 					      kbuf, strlen(kbuf));
 	}
-	mutex_unlock(&policy_update_lock);
+	mutex_unlock(policy_update_lock);
+
 	return res;
 }
 
-static const struct file_operations safesetid_file_fops = {
-	.read = safesetid_file_read,
-	.write = safesetid_file_write,
+static ssize_t safesetid_uid_file_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *ppos)
+{
+	return safesetid_file_read(file, buf, len, ppos,
+				   &uid_policy_update_lock, safesetid_setuid_rules);
+}
+
+static ssize_t safesetid_gid_file_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *ppos)
+{
+	return safesetid_file_read(file, buf, len, ppos,
+				   &gid_policy_update_lock, safesetid_setgid_rules);
+}
+
+
+
+static const struct file_operations safesetid_uid_file_fops = {
+	.read = safesetid_uid_file_read,
+	.write = safesetid_uid_file_write,
+};
+
+static const struct file_operations safesetid_gid_file_fops = {
+	.read = safesetid_gid_file_read,
+	.write = safesetid_gid_file_write,
 };
 
 static int __init safesetid_init_securityfs(void)
 {
 	int ret;
 	struct dentry *policy_dir;
-	struct dentry *policy_file;
+	struct dentry *uid_policy_file;
+	struct dentry *gid_policy_file;
 
 	if (!safesetid_initialized)
 		return 0;
@@ -245,13 +321,21 @@ static int __init safesetid_init_securityfs(void)
 		goto error;
 	}
 
-	policy_file = securityfs_create_file("whitelist_policy", 0600,
-			policy_dir, NULL, &safesetid_file_fops);
-	if (IS_ERR(policy_file)) {
-		ret = PTR_ERR(policy_file);
+	uid_policy_file = securityfs_create_file("uid_whitelist_policy", 0600,
+			policy_dir, NULL, &safesetid_uid_file_fops);
+	if (IS_ERR(uid_policy_file)) {
+		ret = PTR_ERR(uid_policy_file);
 		goto error;
 	}
 
+	gid_policy_file = securityfs_create_file("gid_whitelist_policy", 0600,
+			policy_dir, NULL, &safesetid_gid_file_fops);
+	if (IS_ERR(gid_policy_file)) {
+		ret = PTR_ERR(gid_policy_file);
+		goto error;
+	}
+
+
 	return 0;
 
 error:
-- 
2.28.0.rc0.142.g3c755180ce-goog


^ permalink raw reply related

* Re: [PATCH v3 07/19] fs/kernel_read_file: Split into separate source file
From: Mimi Zohar @ 2020-07-27 14:53 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-8-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> These routines are used in places outside of exec(2), so in preparation
> for refactoring them, move them into a separate source file,
> fs/kernel_read_file.c.
> 
> Acked-by: Scott Branden <scott.branden@broadcom.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 06/19] fs/kernel_read_file: Split into separate include file
From: Mimi Zohar @ 2020-07-27 14:41 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Christoph Hellwig, Luis Chamberlain, Jessica Yu,
	SeongJae Park, KP Singh, linux-efi, linux-security-module,
	linux-integrity, selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-7-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> From: Scott Branden <scott.branden@broadcom.com>
> 
> Move kernel_read_file* out of linux/fs.h to its own linux/kernel_read_file.h
> include file. That header gets pulled in just about everywhere
> and doesn't really need functions not related to the general fs interface.
> 
> Suggested-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Scott Branden <scott.branden@broadcom.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Link: https://lore.kernel.org/r/20200706232309.12010-2-scott.branden@broadcom.com
> Signed-off-by: Kees Cook <keescook@chromium.org>

Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>

^ permalink raw reply

* [PATCH 2/2] ima: Fail rule parsing when asymmetric key measurement isn't supportable
From: Tyler Hicks @ 2020-07-27 14:08 UTC (permalink / raw)
  To: Mimi Zohar, Dmitry Kasatkin
  Cc: James Morris, Serge E . Hallyn, Lakshmi Ramasubramanian,
	Tushar Sugandhi, Nayna Jain, linux-kernel, linux-integrity,
	linux-security-module
In-Reply-To: <20200727140831.64251-1-tyhicks@linux.microsoft.com>

Measuring keys is currently only supported for asymmetric keys. In the
future, this might change.

For now, the "func=KEY_CHECK" and "keyrings=" options are only
appropriate when CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS is enabled. Make
this clear at policy load so that IMA policy authors don't assume that
these policy language constructs are supported.

Fixes: 2b60c0ecedf8 ("IMA: Read keyrings= option from the IMA policy")
Fixes: 5808611cccb2 ("IMA: Add KEY_CHECK func to measure keys")
Suggested-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
---
 security/integrity/ima/ima_policy.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index c328cfa0fc49..05f012fd3dca 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -1233,7 +1233,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 				entry->func = POLICY_CHECK;
 			else if (strcmp(args[0].from, "KEXEC_CMDLINE") == 0)
 				entry->func = KEXEC_CMDLINE;
-			else if (strcmp(args[0].from, "KEY_CHECK") == 0)
+			else if (IS_ENABLED(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) &&
+				 strcmp(args[0].from, "KEY_CHECK") == 0)
 				entry->func = KEY_CHECK;
 			else
 				result = -EINVAL;
@@ -1290,7 +1291,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 		case Opt_keyrings:
 			ima_log_string(ab, "keyrings", args[0].from);
 
-			if (entry->keyrings) {
+			if (!IS_ENABLED(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) ||
+			    entry->keyrings) {
 				result = -EINVAL;
 				break;
 			}
-- 
2.25.1


^ permalink raw reply related

* [PATCH 1/2] ima: Pre-parse the list of keyrings in a KEY_CHECK rule
From: Tyler Hicks @ 2020-07-27 14:08 UTC (permalink / raw)
  To: Mimi Zohar, Dmitry Kasatkin
  Cc: James Morris, Serge E . Hallyn, Lakshmi Ramasubramanian,
	Tushar Sugandhi, Nayna Jain, linux-kernel, linux-integrity,
	linux-security-module
In-Reply-To: <20200727140831.64251-1-tyhicks@linux.microsoft.com>

The ima_keyrings buffer was used as a work buffer for strsep()-based
parsing of the "keyrings=" option of an IMA policy rule. This parsing
was re-performed each time an asymmetric key was added to a kernel
keyring for each loaded policy rule that contained a "keyrings=" option.

An example rule specifying this option is:

 measure func=KEY_CHECK keyrings=a|b|c

The rule says to measure asymmetric keys added to any of the kernel
keyrings named "a", "b", or "c". The size of the buffer size was
equal to the size of the largest "keyrings=" value seen in a previously
loaded rule (5 + 1 for the NUL-terminator in the previous example) and
the buffer was pre-allocated at the time of policy load.

The pre-allocated buffer approach suffered from a couple bugs:

1) There was no locking around the use of the buffer so concurrent key
   add operations, to two different keyrings, would result in the
   strsep() loop of ima_match_keyring() to modify the buffer at the same
   time. This resulted in unexpected results from ima_match_keyring()
   and, therefore, could cause unintended keys to be measured or keys to
   not be measured when IMA policy intended for them to be measured.

2) If the kstrdup() that initialized entry->keyrings in ima_parse_rule()
   failed, the ima_keyrings buffer was freed and set to NULL even when a
   valid KEY_CHECK rule was previously loaded. The next KEY_CHECK event
   would trigger a call to strcpy() with a NULL destination pointer and
   crash the kernel.

Remove the need for a pre-allocated global buffer by parsing the list of
keyrings in a KEY_CHECK rule at the time of policy load. The
ima_rule_entry will contain an array of string pointers which point to
the name of each keyring specified in the rule. No string processing
needs to happen at the time of asymmetric key add so iterating through
the list and doing a string comparison is all that's required at the
time of policy check.

In the process of changing how the "keyrings=" policy option is handled,
a couple additional bugs were fixed:

1) The rule parser accepted rules containing invalid "keyrings=" values
   such as "a|b||c", "a|b|", or simply "|".

2) The /sys/kernel/security/ima/policy file did not display the entire
   "keyrings=" value if the list of keyrings was longer than what could
   fit in the fixed size tbuf buffer in ima_policy_show().

Fixes: 5c7bac9fb2c5 ("IMA: pre-allocate buffer to hold keyrings string")
Fixes: 2b60c0ecedf8 ("IMA: Read keyrings= option from the IMA policy")
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
---
 security/integrity/ima/ima_policy.c | 138 +++++++++++++++++++---------
 1 file changed, 93 insertions(+), 45 deletions(-)

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 07f033634b27..c328cfa0fc49 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -59,6 +59,11 @@ enum policy_types { ORIGINAL_TCB = 1, DEFAULT_TCB };
 
 enum policy_rule_list { IMA_DEFAULT_POLICY = 1, IMA_CUSTOM_POLICY };
 
+struct ima_rule_opt_list {
+	size_t count;
+	char *items[];
+};
+
 struct ima_rule_entry {
 	struct list_head list;
 	int action;
@@ -78,7 +83,7 @@ struct ima_rule_entry {
 		int type;	/* audit type */
 	} lsm[MAX_LSM_RULES];
 	char *fsname;
-	char *keyrings; /* Measure keys added to these keyrings */
+	struct ima_rule_opt_list *keyrings; /* Measure keys added to these keyrings */
 	struct ima_template_desc *template;
 };
 
@@ -206,10 +211,6 @@ static LIST_HEAD(ima_policy_rules);
 static LIST_HEAD(ima_temp_rules);
 static struct list_head *ima_rules = &ima_default_rules;
 
-/* Pre-allocated buffer used for matching keyrings. */
-static char *ima_keyrings;
-static size_t ima_keyrings_len;
-
 static int ima_policy __initdata;
 
 static int __init default_measure_policy_setup(char *str)
@@ -253,6 +254,72 @@ static int __init default_appraise_policy_setup(char *str)
 }
 __setup("ima_appraise_tcb", default_appraise_policy_setup);
 
+static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src)
+{
+	struct ima_rule_opt_list *opt_list;
+	size_t count = 0;
+	char *src_copy;
+	char *cur, *next;
+	size_t i;
+
+	src_copy = match_strdup(src);
+	if (!src_copy)
+		return NULL;
+
+	next = src_copy;
+	while ((cur = strsep(&next, "|"))) {
+		/* Don't accept an empty list item */
+		if (!(*cur)) {
+			kfree(src_copy);
+			return ERR_PTR(-EINVAL);
+		}
+		count++;
+	}
+
+	/* Don't accept an empty list */
+	if (!count) {
+		kfree(src_copy);
+		return ERR_PTR(-EINVAL);
+	}
+
+	opt_list = kzalloc(struct_size(opt_list, items, count), GFP_KERNEL);
+	if (!opt_list) {
+		kfree(src_copy);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/*
+	 * strsep() has already replaced all instances of '|' with '\0',
+	 * leaving a byte sequence of NUL-terminated strings. Reference each
+	 * string with the array of items.
+	 *
+	 * IMPORTANT: Ownership of the allocated buffer is transferred from
+	 * src_copy to the first element in the items array. To free the
+	 * buffer, kfree() must only be called on the first element of the
+	 * array.
+	 */
+	for (i = 0, cur = src_copy; i < count; i++) {
+		opt_list->items[i] = cur;
+		cur = strchr(cur, '\0') + 1;
+	}
+	opt_list->count = count;
+
+	return opt_list;
+}
+
+static void ima_free_rule_opt_list(struct ima_rule_opt_list *opt_list)
+{
+	if (!opt_list)
+		return;
+
+	if (opt_list->count) {
+		kfree(opt_list->items[0]);
+		opt_list->count = 0;
+	}
+
+	kfree(opt_list);
+}
+
 static void ima_lsm_free_rule(struct ima_rule_entry *entry)
 {
 	int i;
@@ -274,7 +341,7 @@ static void ima_free_rule(struct ima_rule_entry *entry)
 	 * the defined_templates list and cannot be freed here
 	 */
 	kfree(entry->fsname);
-	kfree(entry->keyrings);
+	ima_free_rule_opt_list(entry->keyrings);
 	ima_lsm_free_rule(entry);
 	kfree(entry);
 }
@@ -394,8 +461,8 @@ int ima_lsm_policy_change(struct notifier_block *nb, unsigned long event,
 static bool ima_match_keyring(struct ima_rule_entry *rule,
 			      const char *keyring, const struct cred *cred)
 {
-	char *next_keyring, *keyrings_ptr;
 	bool matched = false;
+	size_t i;
 
 	if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
 		return false;
@@ -406,15 +473,8 @@ static bool ima_match_keyring(struct ima_rule_entry *rule,
 	if (!keyring)
 		return false;
 
-	strcpy(ima_keyrings, rule->keyrings);
-
-	/*
-	 * "keyrings=" is specified in the policy in the format below:
-	 * keyrings=.builtin_trusted_keys|.ima|.evm
-	 */
-	keyrings_ptr = ima_keyrings;
-	while ((next_keyring = strsep(&keyrings_ptr, "|")) != NULL) {
-		if (!strcmp(next_keyring, keyring)) {
+	for (i = 0; i < rule->keyrings->count; i++) {
+		if (!strcmp(rule->keyrings->items[i], keyring)) {
 			matched = true;
 			break;
 		}
@@ -1065,7 +1125,6 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 	bool uid_token;
 	struct ima_template_desc *template_desc;
 	int result = 0;
-	size_t keyrings_len;
 
 	ab = integrity_audit_log_start(audit_context(), GFP_KERNEL,
 				       AUDIT_INTEGRITY_POLICY_RULE);
@@ -1231,37 +1290,18 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 		case Opt_keyrings:
 			ima_log_string(ab, "keyrings", args[0].from);
 
-			keyrings_len = strlen(args[0].from) + 1;
-
-			if ((entry->keyrings) ||
-			    (keyrings_len < 2)) {
+			if (entry->keyrings) {
 				result = -EINVAL;
 				break;
 			}
 
-			if (keyrings_len > ima_keyrings_len) {
-				char *tmpbuf;
-
-				tmpbuf = krealloc(ima_keyrings, keyrings_len,
-						  GFP_KERNEL);
-				if (!tmpbuf) {
-					result = -ENOMEM;
-					break;
-				}
-
-				ima_keyrings = tmpbuf;
-				ima_keyrings_len = keyrings_len;
-			}
-
-			entry->keyrings = kstrdup(args[0].from, GFP_KERNEL);
-			if (!entry->keyrings) {
-				kfree(ima_keyrings);
-				ima_keyrings = NULL;
-				ima_keyrings_len = 0;
-				result = -ENOMEM;
+			entry->keyrings = ima_alloc_rule_opt_list(args);
+			if (IS_ERR(entry->keyrings)) {
+				result = PTR_ERR(entry->keyrings);
+				entry->keyrings = NULL;
 				break;
 			}
-			result = 0;
+
 			entry->flags |= IMA_KEYRINGS;
 			break;
 		case Opt_fsuuid:
@@ -1574,6 +1614,15 @@ static void policy_func_show(struct seq_file *m, enum ima_hooks func)
 		seq_printf(m, "func=%d ", func);
 }
 
+static void ima_show_rule_opt_list(struct seq_file *m,
+				   const struct ima_rule_opt_list *opt_list)
+{
+	size_t i;
+
+	for (i = 0; i < opt_list->count; i++)
+		seq_printf(m, "%s%s", i ? "|" : "", opt_list->items[i]);
+}
+
 int ima_policy_show(struct seq_file *m, void *v)
 {
 	struct ima_rule_entry *entry = v;
@@ -1630,9 +1679,8 @@ int ima_policy_show(struct seq_file *m, void *v)
 	}
 
 	if (entry->flags & IMA_KEYRINGS) {
-		if (entry->keyrings != NULL)
-			snprintf(tbuf, sizeof(tbuf), "%s", entry->keyrings);
-		seq_printf(m, pt(Opt_keyrings), tbuf);
+		seq_puts(m, "keyrings=");
+		ima_show_rule_opt_list(m, entry->keyrings);
 		seq_puts(m, " ");
 	}
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH 0/2] ima: Fix keyrings race condition and other key related bugs
From: Tyler Hicks @ 2020-07-27 14:08 UTC (permalink / raw)
  To: Mimi Zohar, Dmitry Kasatkin
  Cc: James Morris, Serge E . Hallyn, Lakshmi Ramasubramanian,
	Tushar Sugandhi, Nayna Jain, linux-kernel, linux-integrity,
	linux-security-module

Nayna pointed out that the "keyrings=" option in an IMA policy rule
should only be accepted when CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS is
enabled:

 https://lore.kernel.org/linux-integrity/336cc947-1f70-0286-6506-6df3d1d23a1d@linux.vnet.ibm.com/

While fixing this, the compiler warned me about the potential for the
ima_keyrings pointer to be NULL despite it being used, without a check
for NULL, as the destination address for the strcpy() in
ima_match_keyring().

It also became apparent that there was not adequate locking around the
use of the pre-allocated buffer that ima_keyrings points to. The kernel
keyring has a lock (.sem member of struct key) that ensures only one key
can be added to a given keyring at a time but there's no protection
against adding multiple keys to different keyrings at the same time.

The first patch in this series fixes both ima_keyrings related issues by
parsing the list of keyrings in a KEY_CHECK rule at policy load time
rather than deferring the parsing to policy check time. Once that fix is
in place, the second patch can enforce that
CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS must be enabled in order to use
"func=KEY_CHECK" or "keyrings=" options in IMA policy.

The new "keyrings=" value handling is done in a generic manner that can
be reused by other options in the future. This seems to make sense as
"appraise_type=" has similar style (though it doesn't need to be fully
parsed at this time) and using "|" as an alternation delimiter is
becoming the norm in IMA policy.

This series is based on commit 311aa6aafea4 ("ima: move
APPRAISE_BOOTPARAM dependency on ARCH_POLICY to runtime") in
next-integrity.

Tyler

Tyler Hicks (2):
  ima: Pre-parse the list of keyrings in a KEY_CHECK rule
  ima: Fail rule parsing when asymmetric key measurement isn't
    supportable

 security/integrity/ima/ima_policy.c | 142 +++++++++++++++++++---------
 1 file changed, 96 insertions(+), 46 deletions(-)

-- 
2.25.1

^ permalink raw reply

* Re: [PATCH v3 04/19] fs/kernel_read_file: Remove FIRMWARE_PREALLOC_BUFFER enum
From: Mimi Zohar @ 2020-07-27 13:35 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: stable, Scott Branden, Luis Chamberlain, Jessica Yu,
	SeongJae Park, KP Singh, linux-efi, linux-security-module,
	linux-integrity, selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-5-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> FIRMWARE_PREALLOC_BUFFER is a "how", not a "what", and confuses the LSMs
> that are interested in filtering between types of things. The "how"
> should be an internal detail made uninteresting to the LSMs.
> 
> Fixes: a098ecd2fa7d ("firmware: support loading into a pre-allocated buffer")
> Fixes: fd90bc559bfb ("ima: based on policy verify firmware signatures (pre-allocated buffer)")
> Fixes: 4f0496d8ffa3 ("ima: based on policy warn about loading firmware (pre-allocated buffer)")
> Cc: stable@vger.kernel.org
> Acked-by: Scott Branden <scott.branden@broadcom.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>

Thank you for updating the pre-allocated buffer comment.

Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 15/19] IMA: Add support for file reads without contents
From: Mimi Zohar @ 2020-07-27 13:23 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-16-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> From: Scott Branden <scott.branden@broadcom.com>
> 
> When the kernel_read_file LSM hook is called with contents=false, IMA
> can appraise the file directly, without requiring a filled buffer. When
> such a buffer is available, though, IMA can continue to use it instead
> of forcing a double read here.
> 
> Signed-off-by: Scott Branden <scott.branden@broadcom.com>
> Link: https://lore.kernel.org/lkml/20200706232309.12010-10-scott.branden@broadcom.com/
> Signed-off-by: Kees Cook <keescook@chromium.org>

After adjusting the comment below.

Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>

> ---
>  security/integrity/ima/ima_main.c | 22 ++++++++++++++++------
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
> index dc4f90660aa6..459e50526a12 100644
> --- a/security/integrity/ima/ima_main.c
> +++ b/security/integrity/ima/ima_main.c
> @@ -613,11 +613,8 @@ void ima_post_path_mknod(struct dentry *dentry)
>  int ima_read_file(struct file *file, enum kernel_read_file_id read_id,
>  		  bool contents)
>  {
> -	/* Reject all partial reads during appraisal. */
> -	if (!contents) {
> -		if (ima_appraise & IMA_APPRAISE_ENFORCE)
> -			return -EACCES;
> -	}
> +	enum ima_hooks func;
> +	u32 secid;
>  
>  	/*
>  	 * Do devices using pre-allocated memory run the risk of the
> @@ -626,7 +623,20 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id,
>  	 * buffers? It may be desirable to include the buffer address
>  	 * in this API and walk all the dma_map_single() mappings to check.
>  	 */
> -	return 0;
> +
> +	/*
> +	 * There will be a call made to ima_post_read_file() with
> +	 * a filled buffer, so we don't need to perform an extra
> +	 * read early here.
> +	 */
> +	if (contents)
> +		return 0;
> +
> +	/* Read entire file for all partial reads during appraisal. */

In addition to verifying the file signature, the file might be
included in the IMA measurement list or the file hash may be used to
augment the audit record.  Please remove "during appraisal" from the
comment.

> +	func = read_idmap[read_id] ?: FILE_CHECK;
> +	security_task_getsecid(current, &secid);
> +	return process_measurement(file, current_cred(), secid, NULL,
> +				   0, MAY_READ, func);
>  }
>  
>  const int read_idmap[READING_MAX_ID] = {


^ permalink raw reply

* Re: [PATCH v3 00/19] Introduce partial kernel_read_file() support
From: Mimi Zohar @ 2020-07-27 11:16 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-1-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> v3:
> - add reviews/acks
> - add "IMA: Add support for file reads without contents" patch
> - trim CC list, in case that's why vger ignored v2
> v2: [missing from lkml archives! (CC list too long?) repeating changes here]
> - fix issues in firmware test suite
> - add firmware partial read patches
> - various bug fixes/cleanups
> v1: https://lore.kernel.org/lkml/20200717174309.1164575-1-keescook@chromium.org/
> 
> Hi,
> 
> Here's my tree for adding partial read support in kernel_read_file(),
> which fixes a number of issues along the way. It's got Scott's firmware
> and IMA patches ported and everything tests cleanly for me (even with
> CONFIG_IMA_APPRAISE=y).

Thanks, Kees.  Other than my comments on the new
security_kernel_post_load_data() hook, the patch set is really nice.

In addition to compiling with CONFIG_IMA_APPRAISE enabled, have you
booted the kernel with the ima_policy=tcb?  The tcb policy will add
measurements to the IMA measurement list and extend the TPM with the
file or buffer data digest.  Are you seeing the firmware measurements,
in particular the partial read measurement?

thanks,

Mimi

^ permalink raw reply

* Re: [PATCH v3 12/19] firmware_loader: Use security_post_load_data()
From: Mimi Zohar @ 2020-07-27 10:57 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-13-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> Now that security_post_load_data() is wired up, use it instead
> of the NULL file argument style of security_post_read_file(),
> and update the security_kernel_load_data() call to indicate that a
> security_kernel_post_load_data() call is expected.
> 
> Wire up the IMA check to match earlier logic. Perhaps a generalized
> change to ima_post_load_data() might look something like this:
> 
>     return process_buffer_measurement(buf, size,
>                                       kernel_load_data_id_str(load_id),
>                                       read_idmap[load_id] ?: FILE_CHECK,
>                                       0, NULL);
> 
> Signed-off-by: Kees Cook <keescook@chromium.org>

process_measurement() measures, verifies a file signature -  both
signatures stored as an xattr and as an appended buffer signature -
and augments audit records with the file hash. (Support for measuring,
augmenting audit records, and/or verifying fs-verity signatures has
yet to be added.)

As explained in my response to 11/19, the file descriptor provides the
file pathname associated with the buffer data.  In addition, IMA
policy rules may be defined in terms of other file descriptor info -
uid, euid, uuid, etc.

Recently support was added for measuring the kexec boot command line,
certificates being loaded onto a keyring, and blacklisted file hashes
(limited to appended signatures).  None of these buffers are signed.
 process_buffer_measurement() was added for this reason and as a
result is limited to just measuring the buffer data.

Whether process_measurement() or process_buffer_measurement() should
be modified, needs to be determined.  In either case to support the
init_module syscall, would at minimum require the associated file
pathname.

Mimi

^ permalink raw reply

* Re: [PATCH v3 11/19] LSM: Introduce kernel_post_load_data() hook
From: Mimi Zohar @ 2020-07-27 10:49 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-12-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> There are a few places in the kernel where LSMs would like to have
> visibility into the contents of a kernel buffer that has been loaded or
> read. While security_kernel_post_read_file() (which includes the
> buffer) exists as a pairing for security_kernel_read_file(), no such
> hook exists to pair with security_kernel_load_data().
> 
> Earlier proposals for just using security_kernel_post_read_file() with a
> NULL file argument were rejected (i.e. "file" should always be valid for
> the security_..._file hooks, but it appears at least one case was
> left in the kernel during earlier refactoring. (This will be fixed in
> a subsequent patch.)
> 
> Since not all cases of security_kernel_load_data() can have a single
> contiguous buffer made available to the LSM hook (e.g. kexec image
> segments are separately loaded), there needs to be a way for the LSM to
> reason about its expectations of the hook coverage. In order to handle
> this, add a "contents" argument to the "kernel_load_data" hook that
> indicates if the newly added "kernel_post_load_data" hook will be called
> with the full contents once loaded. That way, LSMs requiring full contents
> can choose to unilaterally reject "kernel_load_data" with contents=false
> (which is effectively the existing hook coverage), but when contents=true
> they can allow it and later evaluate the "kernel_post_load_data" hook
> once the buffer is loaded.
> 
> With this change, LSMs can gain coverage over non-file-backed data loads
> (e.g. init_module(2) and firmware userspace helper), which will happen
> in subsequent patches.
> 
> Additionally prepare IMA to start processing these cases.
> 
> Signed-off-by: Kees Cook <keescook@chromium.org>

At least from an IMA perspective, the original
security_kernel_load_data() hook was defined in order to prevent
certain syscalls - init_module, kexec_load - and loading firmware via
sysfs.  The resulting error messages were generic.
  
Unlike security_kernel_load_data(), security_kernel_post_load_data()
is meant to be used, but without a file desciptor specific
information, like the filename associated with the buffer, is missing.
 Having the filename isn't actually necessary for verifying the
appended signature, but it is needed for auditing signature
verification failures and including in the IMA measurement list.

Mimi

^ permalink raw reply

* Re: [PATCH v7 4/7] fs: Introduce O_MAYEXEC flag for openat2(2)
From: Florian Weimer @ 2020-07-27  5:27 UTC (permalink / raw)
  To: Al Viro
  Cc: Mickaël Salaün, linux-kernel, Aleksa Sarai,
	Alexei Starovoitov, Andrew Morton, Andy Lutomirski,
	Christian Brauner, Christian Heimes, Daniel Borkmann,
	Deven Bowers, Dmitry Vyukov, Eric Biggers, Eric Chiang,
	James Morris, Jan Kara, Jann Horn, Jonathan Corbet, Kees Cook,
	Lakshmi Ramasubramanian, Matthew Garrett, Matthew Wilcox,
	Michael Kerrisk, Mimi Zohar, Philippe Trébuchet, Scott Shell,
	Sean Christopherson, Shuah Khan, Steve Dower, Steve Grubb,
	Tetsuo Handa, Thibaut Sautereau, Vincent Strubel,
	kernel-hardening, linux-api, linux-integrity,
	linux-security-module, linux-fsdevel, Thibaut Sautereau
In-Reply-To: <20200727042106.GB794331@ZenIV.linux.org.uk>

* Al Viro:

> On Thu, Jul 23, 2020 at 07:12:24PM +0200, MickaÃ«l SalaÃ¼n wrote:
>> When the O_MAYEXEC flag is passed, openat2(2) may be subject to
>> additional restrictions depending on a security policy managed by the
>> kernel through a sysctl or implemented by an LSM thanks to the
>> inode_permission hook.  This new flag is ignored by open(2) and
>> openat(2) because of their unspecified flags handling.  When used with
>> openat2(2), the default behavior is only to forbid to open a directory.
>
> Correct me if I'm wrong, but it looks like you are introducing a magical
> flag that would mean "let the Linux S&M take an extra special whip
> for this open()".
>
> Why is it done during open?  If the caller is passing it deliberately,
> why not have an explicit request to apply given torture device to an
> already opened file?  Why not sys_masochism(int fd, char *hurt_flavour),
> for that matter?

While I do not think this is appropriate language for a workplace, Al
has a point: If the auditing event can be generated on an already-open
descriptor, it would also cover scenarios like this one:

  perl < /path/to/script

Where the process that opens the file does not (and cannot) know that it
will be used for execution purposes.

Thanks,
Florian


^ permalink raw reply

* Re: [PATCH v7 4/7] fs: Introduce O_MAYEXEC flag for openat2(2)
From: Al Viro @ 2020-07-27  4:21 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: linux-kernel, Aleksa Sarai, Alexei Starovoitov, Andrew Morton,
	Andy Lutomirski, Christian Brauner, Christian Heimes,
	Daniel Borkmann, Deven Bowers, Dmitry Vyukov, Eric Biggers,
	Eric Chiang, Florian Weimer, James Morris, Jan Kara, Jann Horn,
	Jonathan Corbet, Kees Cook, Lakshmi Ramasubramanian,
	Matthew Garrett, Matthew Wilcox, Michael Kerrisk, Mimi Zohar,
	Philippe Trébuchet, Scott Shell, Sean Christopherson,
	Shuah Khan, Steve Dower, Steve Grubb, Tetsuo Handa,
	Thibaut Sautereau, Vincent Strubel, kernel-hardening, linux-api,
	linux-integrity, linux-security-module, linux-fsdevel,
	Thibaut Sautereau
In-Reply-To: <20200723171227.446711-5-mic@digikod.net>

On Thu, Jul 23, 2020 at 07:12:24PM +0200, Mickaël Salaün wrote:
> When the O_MAYEXEC flag is passed, openat2(2) may be subject to
> additional restrictions depending on a security policy managed by the
> kernel through a sysctl or implemented by an LSM thanks to the
> inode_permission hook.  This new flag is ignored by open(2) and
> openat(2) because of their unspecified flags handling.  When used with
> openat2(2), the default behavior is only to forbid to open a directory.

Correct me if I'm wrong, but it looks like you are introducing a magical
flag that would mean "let the Linux S&M take an extra special whip
for this open()".

Why is it done during open?  If the caller is passing it deliberately,
why not have an explicit request to apply given torture device to an
already opened file?  Why not sys_masochism(int fd, char *hurt_flavour),
for that matter?

^ permalink raw reply

* Re: [PATCH v3 03/19] firmware_loader: EFI firmware loader must handle pre-allocated buffer
From: Greg Kroah-Hartman @ 2020-07-25 17:20 UTC (permalink / raw)
  To: Kees Cook
  Cc: stable, Scott Branden, Mimi Zohar, Luis Chamberlain, Jessica Yu,
	SeongJae Park, KP Singh, linux-efi, linux-security-module,
	linux-integrity, selinux, linux-kselftest, linux-kernel
In-Reply-To: <202007250849.2B58CD3B@keescook>

On Sat, Jul 25, 2020 at 08:50:33AM -0700, Kees Cook wrote:
> On Sat, Jul 25, 2020 at 12:07:00PM +0200, Greg Kroah-Hartman wrote:
> > On Fri, Jul 24, 2020 at 02:36:24PM -0700, Kees Cook wrote:
> > > The EFI platform firmware fallback would clobber any pre-allocated
> > > buffers. Instead, correctly refuse to reallocate when too small (as
> > > already done in the sysfs fallback), or perform allocation normally
> > > when needed.
> > > 
> > > Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firm ware_request_platform()")
> > 
> > "firmware_request_platform()" :)
> 
> Weird... I'm not sure where that mangling happened. Perhaps a bad
> cut/paste at 80 columns? Hmpf; thanks for catching. I've updated it on
> my end (I assume you fixed this manually, though?)

Yes, I fixed it up already, no worries.

greg k-h

^ permalink raw reply

* [PATCH v2] netfilter: Replace HTTP links with HTTPS ones
From: Alexander A. Klimov @ 2020-07-25 17:02 UTC (permalink / raw)
  To: pablo, kadlec, fw, davem, kuba, paul, netfilter-devel, coreteam,
	linux-kernel, linux-decnet-user, netdev, linux-security-module
  Cc: Alexander A. Klimov
In-Reply-To: <20200724112856.GA26061@salvia>

Rationale:
Reduces attack surface on kernel devs opening the links for MITM
as HTTPS traffic is much harder to manipulate.

Deterministic algorithm:
For each file:
  If not .svg:
    For each line:
      If doesn't contain `\bxmlns\b`:
        For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
	  If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`:
            If both the HTTP and HTTPS versions
            return 200 OK and serve the same content:
              Replace HTTP with HTTPS.

Signed-off-by: Alexander A. Klimov <grandmaster@al2klimov.de>
---
 v2: Included other netfilter patch.

 include/uapi/linux/netfilter/xt_connmark.h | 2 +-
 net/decnet/netfilter/dn_rtmsg.c            | 2 +-
 net/netfilter/Kconfig                      | 2 +-
 net/netfilter/nfnetlink_acct.c             | 2 +-
 net/netfilter/nft_set_pipapo.c             | 4 ++--
 net/netfilter/xt_CONNSECMARK.c             | 2 +-
 net/netfilter/xt_connmark.c                | 2 +-
 net/netfilter/xt_nfacct.c                  | 2 +-
 net/netfilter/xt_time.c                    | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h
index 1aa5c955ee1e..f01c19b83a2b 100644
--- a/include/uapi/linux/netfilter/xt_connmark.h
+++ b/include/uapi/linux/netfilter/xt_connmark.h
@@ -4,7 +4,7 @@
 
 #include <linux/types.h>
 
-/* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+/* Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
  * by Henrik Nordstrom <hno@marasystems.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index dc705769acc9..26a9193df783 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -6,7 +6,7 @@
  *
  *              DECnet Routing Message Grabulator
  *
- *              (C) 2000 ChyGwyn Limited  -  http://www.chygwyn.com/
+ *              (C) 2000 ChyGwyn Limited  -  https://www.chygwyn.com/
  *
  * Author:      Steven Whitehouse <steve@chygwyn.com>
  */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0ffe2b8723c4..25313c29d799 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -447,7 +447,7 @@ config NF_TABLES
 	  replace the existing {ip,ip6,arp,eb}_tables infrastructure. It
 	  provides a pseudo-state machine with an extensible instruction-set
 	  (also known as expressions) that the userspace 'nft' utility
-	  (http://www.netfilter.org/projects/nftables) uses to build the
+	  (https://www.netfilter.org/projects/nftables) uses to build the
 	  rule-set. It also comes with the generic set infrastructure that
 	  allows you to construct mappings between matchings and actions
 	  for performance lookups.
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 5827117f2635..5bfec829c12f 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
- * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ * (C) 2011 Intra2net AG <https://www.intra2net.com>
  */
 #include <linux/init.h>
 #include <linux/module.h>
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 8c04388296b0..78070aa65f62 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -312,7 +312,7 @@
  *      Jay Ligatti, Josh Kuhn, and Chris Gage.
  *      Proceedings of the IEEE International Conference on Computer
  *      Communication Networks (ICCCN), August 2010.
- *      http://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf
+ *      https://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf
  *
  * [Rottenstreich 2010]
  *      Worst-Case TCAM Rule Expansion
@@ -325,7 +325,7 @@
  *      Kirill Kogan, Sergey Nikolenko, Ori Rottenstreich, William Culhane,
  *      and Patrick Eugster.
  *      Proceedings of the 2014 ACM conference on SIGCOMM, August 2014.
- *      http://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf
+ *      https://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf
  */
 
 #include <linux/kernel.h>
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index a5c8b653476a..76acecf3e757 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -6,7 +6,7 @@
  * with the SECMARK target and state match.
  *
  * Based somewhat on CONNMARK:
- *   Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ *   Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
  *    by Henrik Nordstrom <hno@marasystems.com>
  *
  * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index eec2f3a88d73..e5ebc0810675 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -2,7 +2,7 @@
 /*
  *	xt_connmark - Netfilter module to operate on connection marks
  *
- *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ *	Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
  *	by Henrik Nordstrom <hno@marasystems.com>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
  *	Jan Engelhardt <jengelh@medozas.de>
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 5aab6df74e0f..a97c2259bbc8 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
- * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ * (C) 2011 Intra2net AG <https://www.intra2net.com>
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 67cb98489415..6aa12d0f54e2 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -5,7 +5,7 @@
  *	based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
  *	This is a module which is used for time matching
  *	It is using some modified code from dietlibc (localtime() function)
- *	that you can find at http://www.fefe.de/dietlibc/
+ *	that you can find at https://www.fefe.de/dietlibc/
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from gnu.org/gpl.
  */
-- 
2.27.0


^ permalink raw reply related

* Re: [PATCH v3 03/19] firmware_loader: EFI firmware loader must handle pre-allocated buffer
From: Kees Cook @ 2020-07-25 15:50 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: stable, Scott Branden, Mimi Zohar, Luis Chamberlain, Jessica Yu,
	SeongJae Park, KP Singh, linux-efi, linux-security-module,
	linux-integrity, selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200725100700.GB1073708@kroah.com>

On Sat, Jul 25, 2020 at 12:07:00PM +0200, Greg Kroah-Hartman wrote:
> On Fri, Jul 24, 2020 at 02:36:24PM -0700, Kees Cook wrote:
> > The EFI platform firmware fallback would clobber any pre-allocated
> > buffers. Instead, correctly refuse to reallocate when too small (as
> > already done in the sysfs fallback), or perform allocation normally
> > when needed.
> > 
> > Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firm ware_request_platform()")
> 
> "firmware_request_platform()" :)

Weird... I'm not sure where that mangling happened. Perhaps a bad
cut/paste at 80 columns? Hmpf; thanks for catching. I've updated it on
my end (I assume you fixed this manually, though?)

Thanks!

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH v3 00/19] Introduce partial kernel_read_file() support
From: Kees Cook @ 2020-07-25 15:48 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Scott Branden, Mimi Zohar, Luis Chamberlain, Jessica Yu,
	SeongJae Park, KP Singh, linux-efi, linux-security-module,
	linux-integrity, selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200725100555.GA1073708@kroah.com>

On Sat, Jul 25, 2020 at 12:05:55PM +0200, Greg Kroah-Hartman wrote:
> On Fri, Jul 24, 2020 at 02:36:21PM -0700, Kees Cook wrote:
> > v3:
> > - add reviews/acks
> > - add "IMA: Add support for file reads without contents" patch
> > - trim CC list, in case that's why vger ignored v2
> > v2: [missing from lkml archives! (CC list too long?) repeating changes here]
> > - fix issues in firmware test suite
> > - add firmware partial read patches
> > - various bug fixes/cleanups
> > v1: https://lore.kernel.org/lkml/20200717174309.1164575-1-keescook@chromium.org/
> > 
> > Hi,
> > 
> > Here's my tree for adding partial read support in kernel_read_file(),
> > which fixes a number of issues along the way. It's got Scott's firmware
> > and IMA patches ported and everything tests cleanly for me (even with
> > CONFIG_IMA_APPRAISE=y).
> > 
> > I think the intention is for this to go via Greg's tree since Scott's
> > driver code will depend on it?
> 
> I've applied the first 3 now, as I think I need some acks/reviewed-by
> from the subsystem owners of the other patches before I can take them.

Sounds good; thanks!

(I would argue 4 and 5 are also bug fixes, 6 is already Acked by hch and
you, and 7 is a logical follow-up to 6, but I get your point.)

James, Luis, Mimi, and Jessica, the bulk of these patches are LSM,
firmware, IMA, and modules. How does this all look to you? And KP,
you'd mentioned privately that you were interested in being able to
use the new kernel_post_load_data LSM hook for better visibility into
non-file-backed blob loading.

Thanks!

-Kees

-- 
Kees Cook

^ permalink raw reply

* [PATCH] smack: fix slab-out-of-bounds by checking for overflow
From: B K Karthik @ 2020-07-25 12:58 UTC (permalink / raw)
  To: Casey Schaufler, James Morris, Serge E. Hallyn,
	linux-security-module, linux-kernel, gregkh, skhan,
	syzkaller-bugs

[-- Attachment #1: Type: text/plain, Size: 4857 bytes --]

add a barrier to smk_set_cipso() to check for overflow

==================================================================
BUG: KASAN: slab-out-of-bounds in vsscanf+0x2666/0x2ef0 lib/vsprintf.c:3321
Read of size 1 at addr ffff888097d682b8 by task syz-executor980/6804

CPU: 0 PID: 6804 Comm: syz-executor980 Not tainted 5.8.0-rc5-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1f0/0x31e lib/dump_stack.c:118
 print_address_description+0x66/0x5a0 mm/kasan/report.c:383
 __kasan_report mm/kasan/report.c:513 [inline]
 kasan_report+0x132/0x1d0 mm/kasan/report.c:530
 vsscanf+0x2666/0x2ef0 lib/vsprintf.c:3321
 sscanf+0x6c/0x90 lib/vsprintf.c:3527
 smk_set_cipso+0x374/0x6c0 security/smack/smackfs.c:908
 vfs_write+0x2dd/0xc70 fs/read_write.c:576
 ksys_write+0x11b/0x220 fs/read_write.c:631
 do_syscall_64+0x73/0xe0 arch/x86/entry/common.c:384
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x4402d9
Code: Bad RIP value.
RSP: 002b:00007ffe89010db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 00000000004402d9
RDX: 0000000000000037 RSI: 0000000020000040 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000014 R09: 00000000004002c8
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401ae0
R13: 0000000000401b70 R14: 0000000000000000 R15: 0000000000000000

Allocated by task 6804:
 save_stack mm/kasan/common.c:48 [inline]
 set_track mm/kasan/common.c:56 [inline]
 __kasan_kmalloc+0x103/0x140 mm/kasan/common.c:494
 __do_kmalloc mm/slab.c:3656 [inline]
 __kmalloc_track_caller+0x249/0x320 mm/slab.c:3671
 memdup_user_nul+0x26/0xf0 mm/util.c:259
 smk_set_cipso+0xff/0x6c0 security/smack/smackfs.c:859
 vfs_write+0x2dd/0xc70 fs/read_write.c:576
 ksys_write+0x11b/0x220 fs/read_write.c:631
 do_syscall_64+0x73/0xe0 arch/x86/entry/common.c:384
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Freed by task 4906:
 save_stack mm/kasan/common.c:48 [inline]
 set_track mm/kasan/common.c:56 [inline]
 kasan_set_free_info mm/kasan/common.c:316 [inline]
 __kasan_slab_free+0x114/0x170 mm/kasan/common.c:455
 __cache_free mm/slab.c:3426 [inline]
 kfree+0x10a/0x220 mm/slab.c:3757
 tomoyo_path_number_perm+0x525/0x690 security/tomoyo/file.c:736
 tomoyo_path_mknod+0x128/0x150 security/tomoyo/tomoyo.c:240
 security_path_mknod+0xdc/0x160 security/security.c:1077
 may_o_create fs/namei.c:2919 [inline]
 lookup_open fs/namei.c:3060 [inline]
 open_last_lookups fs/namei.c:3169 [inline]
 path_openat+0xbe8/0x37f0 fs/namei.c:3357
 do_filp_open+0x191/0x3a0 fs/namei.c:3387
 do_sys_openat2+0x463/0x770 fs/open.c:1179
 do_sys_open fs/open.c:1195 [inline]
 ksys_open include/linux/syscalls.h:1388 [inline]
 __do_sys_open fs/open.c:1201 [inline]
 __se_sys_open fs/open.c:1199 [inline]
 __x64_sys_open+0x1af/0x1e0 fs/open.c:1199
 do_syscall_64+0x73/0xe0 arch/x86/entry/common.c:384
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

The buggy address belongs to the object at ffff888097d68280
 which belongs to the cache kmalloc-64 of size 64
The buggy address is located 56 bytes inside of
 64-byte region [ffff888097d68280, ffff888097d682c0)
The buggy address belongs to the page:
page:ffffea00025f5a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888097d68c80
flags: 0xfffe0000000200(slab)
raw: 00fffe0000000200 ffffea000288fe08 ffffea00026f38c8 ffff8880aa400380
raw: ffff888097d68c80 ffff888097d68000 000000010000001e 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff888097d68180: 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc fc
 ffff888097d68200: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
>ffff888097d68280: 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc
                                        ^
 ffff888097d68300: 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc fc
 ffff888097d68380: 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc fc
==================================================================

Reported-and-testedby: syzbot+a22c6092d003d6fe1122@syzkaller.appspotmail.com
Signed-off-by: B K Karthik <bkkarthik@pesu.pes.edu>
---
 security/smack/smackfs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 58d3f43cc8bb..17809310d046 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -905,6 +905,11 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf,
 
 	for (i = 0; i < catlen; i++) {
 		rule += SMK_DIGITLEN;
+		if (rule > data + count) {
+			rc = -EOVERFLOW;
+			goto out;
+		}
+
 		ret = sscanf(rule, "%u", &cat);
 		if (ret != 1 || cat > SMACK_CIPSO_MAXCATNUM)
 			goto out;
-- 
2.20.1


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]

^ permalink raw reply related

* Re: [PATCH v7 0/7] Add support for O_MAYEXEC
From: Christian Brauner @ 2020-07-25 11:15 UTC (permalink / raw)
  To: Kees Cook, Al Viro, Aleksa Sarai
  Cc: Andrew Morton, linux-kernel, Mickaël Salaün,
	Alexei Starovoitov, Andy Lutomirski, Christian Heimes,
	Daniel Borkmann, Deven Bowers, Dmitry Vyukov, Eric Biggers,
	Eric Chiang, Florian Weimer, James Morris, Jan Kara, Jann Horn,
	Jonathan Corbet, Lakshmi Ramasubramanian, Matthew Garrett,
	Matthew Wilcox, Michael Kerrisk, Mimi Zohar,
	Philippe Trébuchet, Scott Shell, Sean Christopherson,
	Shuah Khan, Steve Dower, Steve Grubb, Tetsuo Handa,
	Thibaut Sautereau, Vincent Strubel, kernel-hardening, linux-api,
	linux-integrity, linux-security-module, linux-fsdevel
In-Reply-To: <202007241205.751EBE7@keescook>

On Fri, Jul 24, 2020 at 12:06:53PM -0700, Kees Cook wrote:
> I think this looks good now.
> 
> Andrew, since you're already carrying my exec clean-ups (repeated here
> in patch 1-3), can you pick the rest of this series too?

Al,

Not sure if you have already re-surfaced from your
csum()/raw_copy_from_user() work completely yet but you had thoughts
about this series iirc.

Aleksa, thoughts?

Thanks!
Christian

^ permalink raw reply

* Re: [PATCH v3 03/19] firmware_loader: EFI firmware loader must handle pre-allocated buffer
From: Greg Kroah-Hartman @ 2020-07-25 10:07 UTC (permalink / raw)
  To: Kees Cook
  Cc: stable, Scott Branden, Mimi Zohar, Luis Chamberlain, Jessica Yu,
	SeongJae Park, KP Singh, linux-efi, linux-security-module,
	linux-integrity, selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-4-keescook@chromium.org>

On Fri, Jul 24, 2020 at 02:36:24PM -0700, Kees Cook wrote:
> The EFI platform firmware fallback would clobber any pre-allocated
> buffers. Instead, correctly refuse to reallocate when too small (as
> already done in the sysfs fallback), or perform allocation normally
> when needed.
> 
> Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firm ware_request_platform()")

"firmware_request_platform()" :)


^ permalink raw reply

* Re: [PATCH v3 00/19] Introduce partial kernel_read_file() support
From: Greg Kroah-Hartman @ 2020-07-25 10:05 UTC (permalink / raw)
  To: Kees Cook
  Cc: Scott Branden, Mimi Zohar, Luis Chamberlain, Jessica Yu,
	SeongJae Park, KP Singh, linux-efi, linux-security-module,
	linux-integrity, selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-1-keescook@chromium.org>

On Fri, Jul 24, 2020 at 02:36:21PM -0700, Kees Cook wrote:
> v3:
> - add reviews/acks
> - add "IMA: Add support for file reads without contents" patch
> - trim CC list, in case that's why vger ignored v2
> v2: [missing from lkml archives! (CC list too long?) repeating changes here]
> - fix issues in firmware test suite
> - add firmware partial read patches
> - various bug fixes/cleanups
> v1: https://lore.kernel.org/lkml/20200717174309.1164575-1-keescook@chromium.org/
> 
> Hi,
> 
> Here's my tree for adding partial read support in kernel_read_file(),
> which fixes a number of issues along the way. It's got Scott's firmware
> and IMA patches ported and everything tests cleanly for me (even with
> CONFIG_IMA_APPRAISE=y).
> 
> I think the intention is for this to go via Greg's tree since Scott's
> driver code will depend on it?

I've applied the first 3 now, as I think I need some acks/reviewed-by
from the subsystem owners of the other patches before I can take them.

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH v3 00/19] Introduce partial kernel_read_file() support
From: Scott Branden @ 2020-07-25  5:14 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Mimi Zohar, Luis Chamberlain, Jessica Yu, SeongJae Park, KP Singh,
	linux-efi, linux-security-module, linux-integrity, selinux,
	linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-1-keescook@chromium.org>

On 2020-07-24 2:36 p.m., Kees Cook wrote:
> v3:
> - add reviews/acks
> - add "IMA: Add support for file reads without contents" patch
> - trim CC list, in case that's why vger ignored v2
> v2: [missing from lkml archives! (CC list too long?) repeating changes here]
> - fix issues in firmware test suite
> - add firmware partial read patches
> - various bug fixes/cleanups
> v1: https://lore.kernel.org/lkml/20200717174309.1164575-1-keescook@chromium.org/
>
> Hi,
>
> Here's my tree for adding partial read support in kernel_read_file(),
> which fixes a number of issues along the way. It's got Scott's firmware
> and IMA patches ported and everything tests cleanly for me (even with
> CONFIG_IMA_APPRAISE=y).
>
> I think the intention is for this to go via Greg's tree since Scott's
> driver code will depend on it?
v3 of this patch series looks good and passes all of my tests.
Remaining patches
Acked-by: Scott Branden <scott.branden@broadcom.com>

I have added latest bcm-vk driver code to Kees' patch series and added 
it here:
https://github.com/sbranden/linux/tree/kernel_read_file_for_kees_v3

If everyone finds Kees' patch series acceptable then the 3 patches 
adding the bcm-vk driver
need to be added to the series.  I can send the 3 patches out separately 
and then
the two patch series can be combined in Greg or someone's tree if that 
works?
Or if an in-kernel user beyond kernel selftest is needed for 
request_partial_firmware_into_buf
in Kees' patch series then another PATCH v4 needs to be sent out 
including the bcm-vk driver.
>
> Thanks,
>
> -Kees
>
>
> Kees Cook (15):

Thanks for help Kees, it's works now.
>    test_firmware: Test platform fw loading on non-EFI systems
>    selftest/firmware: Add selftest timeout in settings
>    firmware_loader: EFI firmware loader must handle pre-allocated buffer
>    fs/kernel_read_file: Remove FIRMWARE_PREALLOC_BUFFER enum
>    fs/kernel_read_file: Remove FIRMWARE_EFI_EMBEDDED enum
>    fs/kernel_read_file: Split into separate source file
>    fs/kernel_read_file: Remove redundant size argument
>    fs/kernel_read_file: Switch buffer size arg to size_t
>    fs/kernel_read_file: Add file_size output argument
>    LSM: Introduce kernel_post_load_data() hook
>    firmware_loader: Use security_post_load_data()
>    module: Call security_kernel_post_load_data()
>    LSM: Add "contents" flag to kernel_read_file hook
>    fs/kernel_file_read: Add "offset" arg for partial reads
>    firmware: Store opt_flags in fw_priv
>
> Scott Branden (4):
>    fs/kernel_read_file: Split into separate include file
>    IMA: Add support for file reads without contents
>    firmware: Add request_partial_firmware_into_buf()
>    test_firmware: Test partial read support
>
>   drivers/base/firmware_loader/fallback.c       |  19 +-
>   drivers/base/firmware_loader/fallback.h       |   5 +-
>   .../base/firmware_loader/fallback_platform.c  |  16 +-
>   drivers/base/firmware_loader/firmware.h       |   7 +-
>   drivers/base/firmware_loader/main.c           | 143 ++++++++++---
>   drivers/firmware/efi/embedded-firmware.c      |  21 +-
>   drivers/firmware/efi/embedded-firmware.h      |  19 ++
>   fs/Makefile                                   |   3 +-
>   fs/exec.c                                     | 132 +-----------
>   fs/kernel_read_file.c                         | 189 ++++++++++++++++++
>   include/linux/efi_embedded_fw.h               |  13 --
>   include/linux/firmware.h                      |  12 ++
>   include/linux/fs.h                            |  39 ----
>   include/linux/ima.h                           |  19 +-
>   include/linux/kernel_read_file.h              |  55 +++++
>   include/linux/lsm_hook_defs.h                 |   6 +-
>   include/linux/lsm_hooks.h                     |  12 ++
>   include/linux/security.h                      |  19 +-
>   kernel/kexec.c                                |   2 +-
>   kernel/kexec_file.c                           |  19 +-
>   kernel/module.c                               |  24 ++-
>   lib/test_firmware.c                           | 159 +++++++++++++--
>   security/integrity/digsig.c                   |   8 +-
>   security/integrity/ima/ima_fs.c               |  10 +-
>   security/integrity/ima/ima_main.c             |  70 +++++--
>   security/integrity/ima/ima_policy.c           |   1 +
>   security/loadpin/loadpin.c                    |  17 +-
>   security/security.c                           |  26 ++-
>   security/selinux/hooks.c                      |   8 +-
>   .../selftests/firmware/fw_filesystem.sh       |  91 +++++++++
>   tools/testing/selftests/firmware/settings     |   8 +
>   tools/testing/selftests/kselftest/runner.sh   |   6 +-
>   32 files changed, 860 insertions(+), 318 deletions(-)
>   create mode 100644 drivers/firmware/efi/embedded-firmware.h
>   create mode 100644 fs/kernel_read_file.c
>   create mode 100644 include/linux/kernel_read_file.h
>   create mode 100644 tools/testing/selftests/firmware/settings
>


^ permalink raw reply

* Re: [PATCH] watch_queue: Limit the number of watches a user can hold
From: Jarkko Sakkinen @ 2020-07-25  3:25 UTC (permalink / raw)
  To: David Howells
  Cc: torvalds, keyrings, linux-security-module, linux-fsdevel,
	linux-kernel
In-Reply-To: <159562904644.2287160.13294507067766261970.stgit@warthog.procyon.org.uk>

On Fri, Jul 24, 2020 at 11:17:26PM +0100, David Howells wrote:
> Impose a limit on the number of watches that a user can hold so that they
> can't use this mechanism to fill up all the available memory.
> 
> This is done by putting a counter in user_struct that's incremented when a
> watch is allocated and decreased when it is released.  If the number
> exceeds the RLIMIT_NOFILE limit, the watch is rejected with EAGAIN.
> 
> This can be tested by the following means:
> 
>  (1) Create a watch queue and attach it to fd 5 in the program given - in
>      this case, bash:
> 
> 	keyctl watch_session /tmp/nlog /tmp/gclog 5 bash
> 
>  (2) In the shell, set the maximum number of files to, say, 99:
> 
> 	ulimit -n 99
> 
>  (3) Add 200 keyrings:
> 
> 	for ((i=0; i<200; i++)); do keyctl newring a$i @s || break; done
> 
>  (4) Try to watch all of the keyrings:
> 
> 	for ((i=0; i<200; i++)); do echo $i; keyctl watch_add 5 %:a$i || break; done
> 
>      This should fail when the number of watches belonging to the user hits
>      99.
> 
>  (5) Remove all the keyrings and all of those watches should go away:
> 
> 	for ((i=0; i<200; i++)); do keyctl unlink %:a$i; done
> 
>  (6) Kill off the watch queue by exiting the shell spawned by
>      watch_session.
> 
> Fixes: c73be61cede5 ("pipe: Add general notification queue support")
> Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  include/linux/sched/user.h |    3 +++
>  kernel/watch_queue.c       |    8 ++++++++
>  2 files changed, 11 insertions(+)
> 
> diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
> index 917d88edb7b9..a8ec3b6093fc 100644
> --- a/include/linux/sched/user.h
> +++ b/include/linux/sched/user.h
> @@ -36,6 +36,9 @@ struct user_struct {
>      defined(CONFIG_NET) || defined(CONFIG_IO_URING)
>  	atomic_long_t locked_vm;
>  #endif
> +#ifdef CONFIG_WATCH_QUEUE
> +	atomic_t nr_watches;	/* The number of watches this user currently has */
> +#endif
>  
>  	/* Miscellaneous per-user rate limit */
>  	struct ratelimit_state ratelimit;
> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
> index f74020f6bd9d..0ef8f65bd2d7 100644
> --- a/kernel/watch_queue.c
> +++ b/kernel/watch_queue.c
> @@ -393,6 +393,7 @@ static void free_watch(struct rcu_head *rcu)
>  	struct watch *watch = container_of(rcu, struct watch, rcu);
>  
>  	put_watch_queue(rcu_access_pointer(watch->queue));
> +	atomic_dec(&watch->cred->user->nr_watches);
>  	put_cred(watch->cred);
>  }
>  
> @@ -452,6 +453,13 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
>  	watch->cred = get_current_cred();
>  	rcu_assign_pointer(watch->watch_list, wlist);
>  
> +	if (atomic_inc_return(&watch->cred->user->nr_watches) >
> +	    task_rlimit(current, RLIMIT_NOFILE)) {
> +		atomic_dec(&watch->cred->user->nr_watches);
> +		put_cred(watch->cred);
> +		return -EAGAIN;
> +	}
> +
>  	spin_lock_bh(&wqueue->lock);
>  	kref_get(&wqueue->usage);
>  	kref_get(&watch->usage);
> 
> 

Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>

David, BTW, would it be possible to push keyrings to lore.kernel.org?

I don't have an archive for keyrings, which means that I cannot push
this forward.

/Jarkko

^ permalink raw reply

* [RFC PATCH bpf-next] bpf: POC on local_storage charge and uncharge map_ops
From: Martin KaFai Lau @ 2020-07-25  1:30 UTC (permalink / raw)
  To: KP Singh, KP Singh
  Cc: bpf, linux-kernel, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200723115032.460770-4-kpsingh@chromium.org>

It is a direct replacement of the patch 3 in discussion [1]
and to test out the idea on adding
map_local_storage_charge, map_local_storage_uncharge,
and map_owner_storage_ptr.

It is only compiler tested to demo the idea.

[1]: https://patchwork.ozlabs.org/project/netdev/patch/20200723115032.460770-4-kpsingh@chromium.org/

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf.h            |  10 ++
 include/net/bpf_sk_storage.h   |  51 +++++++
 include/uapi/linux/bpf.h       |   8 +-
 net/core/bpf_sk_storage.c      | 250 +++++++++++++++++++++------------
 tools/include/uapi/linux/bpf.h |   8 +-
 5 files changed, 236 insertions(+), 91 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 72221aea1c60..d4eab7ccbb51 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -33,6 +33,9 @@ struct btf;
 struct btf_type;
 struct exception_table_entry;
 struct seq_operations;
+struct bpf_local_storage;
+struct bpf_local_storage_map;
+struct bpf_local_storage_elem;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -93,6 +96,13 @@ struct bpf_map_ops {
 	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
 			     struct poll_table_struct *pts);
 
+	/* Functions called by bpf_local_storage maps */
+	int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
+					void *owner, u32 size);
+	void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
+					   void *owner, u32 size);
+	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(struct bpf_local_storage_map *smap,
+								   void *owner);
 	/* BTF name and id of struct allocated by map_alloc */
 	const char * const map_btf_name;
 	int *map_btf_id;
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 950c5aaba15e..05b777950eb3 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,8 +3,15 @@
 #ifndef _BPF_SK_STORAGE_H
 #define _BPF_SK_STORAGE_H
 
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
 
 struct sock;
 
@@ -34,6 +41,50 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
 				      u16 idx);
 
+/* Helper functions for bpf_local_storage */
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+			 struct bpf_local_storage_map *smap,
+			 bool cacheit_lockit);
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+				    const struct btf *btf,
+				    const struct btf_type *key_type,
+				    const struct btf_type *value_type);
+
+void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
+			    struct bpf_local_storage_elem *selem);
+
+bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
+			      struct bpf_local_storage_elem *selem,
+			      bool uncharge_omem);
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *selem);
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
+		bool charge_mem);
+
+int
+bpf_local_storage_alloc(void *owner,
+			struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *first_selem);
+
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_map *map, void *value,
+			 u64 map_flags);
+
 #ifdef CONFIG_BPF_SYSCALL
 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
 struct bpf_sk_storage_diag *
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 54d0c886e3ba..b9d2e4792d08 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3630,9 +3630,13 @@ enum {
 	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
 };
 
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<local>_storage_get flags */
 enum {
-	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+	 */
+	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
 };
 
 /* BPF_FUNC_read_branch_records flags. */
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index aa3e3a47acb5..ec54b9d424c8 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -83,7 +83,7 @@ struct bpf_local_storage_elem {
 struct bpf_local_storage {
 	struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
 	struct hlist_head list; /* List of bpf_local_storage_elem */
-	struct sock *owner;	/* The object that owns the the above "list" of
+	void *owner;		/* The object that owns the the above "list" of
 				 * bpf_local_storage_elem.
 				 */
 	struct rcu_head rcu;
@@ -109,6 +109,33 @@ static int omem_charge(struct sock *sk, unsigned int size)
 	return -ENOMEM;
 }
 
+static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
+{
+	struct bpf_map *map = &smap->map;
+
+	if (!map->ops->map_local_storage_charge)
+		return 0;
+
+	return map->ops->map_local_storage_charge(smap, owner, size);
+}
+
+static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
+			 u32 size)
+{
+	struct bpf_map *map = &smap->map;
+
+	if (map->ops->map_local_storage_uncharge)
+		map->ops->map_local_storage_uncharge(smap, owner, size);
+}
+
+static struct bpf_local_storage __rcu **
+owner_storage(struct bpf_local_storage_map *smap, void *owner)
+{
+	struct bpf_map *map = &smap->map;
+
+	return map->ops->map_owner_storage_ptr(smap, owner);
+}
+
 static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
 {
 	return !hlist_unhashed(&selem->snode);
@@ -119,13 +146,13 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
 	return !hlist_unhashed(&selem->map_node);
 }
 
-static struct bpf_local_storage_elem *
-bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
-		void *value, bool charge_omem)
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
+		void *value, bool charge_mem)
 {
 	struct bpf_local_storage_elem *selem;
 
-	if (charge_omem && omem_charge(sk, smap->elem_size))
+	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
 		return NULL;
 
 	selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
@@ -135,40 +162,42 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
 		return selem;
 	}
 
-	if (charge_omem)
-		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+	if (charge_mem)
+		mem_uncharge(smap, owner, smap->elem_size);
 
 	return NULL;
 }
 
-/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
+/* local_storage->lock must be held and selem->sk_storage == sk_storage.
  * The caller must ensure selem->smap is still valid to be
  * dereferenced for its smap->elem_size and smap->cache_idx.
  */
-static bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
-				     struct bpf_local_storage_elem *selem,
-				     bool uncharge_omem)
+bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
+			      struct bpf_local_storage_elem *selem,
+			      bool uncharge_mem)
 {
 	struct bpf_local_storage_map *smap;
 	bool free_local_storage;
-	struct sock *sk;
+	void *owner;
 
 	smap = rcu_dereference(SDATA(selem)->smap);
-	sk = local_storage->owner;
+	owner = local_storage->owner;
 
-	/* All uncharging on sk->sk_omem_alloc must be done first.
-	 * sk may be freed once the last selem is unlinked from local_storage.
+	/* All uncharging on owner must be done first.
+	 * The owner may be freed once the last selem is unlinked from
+	 * local_storage.
 	 */
-	if (uncharge_omem)
-		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+	if (uncharge_mem)
+		mem_uncharge(smap, owner, smap->elem_size);
 
 	free_local_storage = hlist_is_singular_node(&selem->snode,
 						    &local_storage->list);
 	if (free_local_storage) {
-		atomic_sub(sizeof(struct bpf_local_storage), &sk->sk_omem_alloc);
+		mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
 		local_storage->owner = NULL;
-		/* After this RCU_INIT, sk may be freed and cannot be used */
-		RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
+
+		/* After this RCU_INIT, owner may be freed and cannot be used */
+		RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
 
 		/* local_storage is not freed now.  local_storage->lock is
 		 * still held and raw_spin_unlock_bh(&local_storage->lock)
@@ -214,14 +243,14 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
 		kfree_rcu(local_storage, rcu);
 }
 
-static void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
-				   struct bpf_local_storage_elem *selem)
+void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
+			    struct bpf_local_storage_elem *selem)
 {
 	RCU_INIT_POINTER(selem->local_storage, local_storage);
 	hlist_add_head(&selem->snode, &local_storage->list);
 }
 
-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map *smap;
 	struct bpf_local_storage_map_bucket *b;
@@ -238,8 +267,8 @@ static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 	raw_spin_unlock_bh(&b->lock);
 }
 
-static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
-			       struct bpf_local_storage_elem *selem)
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
 
@@ -249,7 +278,7 @@ static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
 	raw_spin_unlock_bh(&b->lock);
 }
 
-static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
 {
 	/* Always unlink from map before unlinking from local_storage
 	 * because selem will be freed after successfully unlinked from
@@ -259,7 +288,7 @@ static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
 	__bpf_selem_unlink_storage(selem);
 }
 
-static struct bpf_local_storage_data *
+struct bpf_local_storage_data *
 bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 			 struct bpf_local_storage_map *smap,
 			 bool cacheit_lockit)
@@ -325,40 +354,45 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
 	return 0;
 }
 
-static int sk_storage_alloc(struct sock *sk,
+int bpf_local_storage_alloc(void *owner,
 			    struct bpf_local_storage_map *smap,
 			    struct bpf_local_storage_elem *first_selem)
 {
-	struct bpf_local_storage *prev_sk_storage, *sk_storage;
+	struct bpf_local_storage *prev_storage, *storage;
+	struct bpf_local_storage **owner_storage_ptr;
 	int err;
 
-	err = omem_charge(sk, sizeof(*sk_storage));
+	err = mem_charge(smap, owner, sizeof(*storage));
 	if (err)
 		return err;
 
-	sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
-	if (!sk_storage) {
+	storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+	if (!storage) {
 		err = -ENOMEM;
 		goto uncharge;
 	}
-	INIT_HLIST_HEAD(&sk_storage->list);
-	raw_spin_lock_init(&sk_storage->lock);
-	sk_storage->owner = sk;
 
-	bpf_selem_link_storage(sk_storage, first_selem);
+	INIT_HLIST_HEAD(&storage->list);
+	raw_spin_lock_init(&storage->lock);
+	storage->owner = owner;
+
+	bpf_selem_link_storage(storage, first_selem);
 	bpf_selem_link_map(smap, first_selem);
-	/* Publish sk_storage to sk.  sk->sk_lock cannot be acquired.
-	 * Hence, atomic ops is used to set sk->sk_bpf_storage
-	 * from NULL to the newly allocated sk_storage ptr.
+
+	owner_storage_ptr =
+		(struct bpf_local_storage **)owner_storage(smap, owner);
+	/* Publish storage to the owner.
+	 * Instead of using any lock of the kernel object (i.e. owner),
+	 * cmpxchg will work with any kernel object regardless what
+	 * the running context is, bh, irq...etc.
 	 *
-	 * From now on, the sk->sk_bpf_storage pointer is protected
-	 * by the sk_storage->lock.  Hence,  when freeing
-	 * the sk->sk_bpf_storage, the sk_storage->lock must
-	 * be held before setting sk->sk_bpf_storage to NULL.
+	 * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
+	 * is protected by the storage->lock.  Hence, when freeing
+	 * the owner->storage, the storage->lock must be held before
+	 * setting owner->storage ptr to NULL.
 	 */
-	prev_sk_storage = cmpxchg((struct bpf_local_storage **)&sk->sk_bpf_storage,
-				  NULL, sk_storage);
-	if (unlikely(prev_sk_storage)) {
+	prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
+	if (unlikely(prev_storage)) {
 		bpf_selem_unlink_map(first_selem);
 		err = -EAGAIN;
 		goto uncharge;
@@ -366,7 +400,7 @@ static int sk_storage_alloc(struct sock *sk,
 		/* Note that even first_selem was linked to smap's
 		 * bucket->list, first_selem can be freed immediately
 		 * (instead of kfree_rcu) because
-		 * bpf_sk_storage_map_free() does a
+		 * bpf_local_storage_map_free() does a
 		 * synchronize_rcu() before walking the bucket->list.
 		 * Hence, no one is accessing selem from the
 		 * bucket->list under rcu_read_lock().
@@ -376,8 +410,8 @@ static int sk_storage_alloc(struct sock *sk,
 	return 0;
 
 uncharge:
-	kfree(sk_storage);
-	atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
+	kfree(storage);
+	mem_uncharge(smap, owner, sizeof(*storage));
 	return err;
 }
 
@@ -386,9 +420,9 @@ static int sk_storage_alloc(struct sock *sk,
  * Otherwise, it will become a leak (and other memory issues
  * during map destruction).
  */
-static struct bpf_local_storage_data *
-bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
-			 u64 map_flags)
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_map *map,
+			 void *value, u64 map_flags)
 {
 	struct bpf_local_storage_data *old_sdata = NULL;
 	struct bpf_local_storage_elem *selem;
@@ -403,21 +437,21 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
 		return ERR_PTR(-EINVAL);
 
 	smap = (struct bpf_local_storage_map *)map;
-	local_storage = rcu_dereference(sk->sk_bpf_storage);
+	local_storage = rcu_dereference(*owner_storage(smap, owner));
 	if (!local_storage || hlist_empty(&local_storage->list)) {
-		/* Very first elem for this object */
+		/* Very first elem for the owner */
 		err = check_flags(NULL, map_flags);
 		if (err)
 			return ERR_PTR(err);
 
-		selem = bpf_selem_alloc(smap, sk, value, true);
+		selem = bpf_selem_alloc(smap, owner, value, true);
 		if (!selem)
 			return ERR_PTR(-ENOMEM);
 
-		err = sk_storage_alloc(sk, smap, selem);
+		err = bpf_local_storage_alloc(owner, smap, selem);
 		if (err) {
 			kfree(selem);
-			atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+			mem_uncharge(smap, owner, smap->elem_size);
 			return ERR_PTR(err);
 		}
 
@@ -429,8 +463,8 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
 		 * such that it can avoid taking the local_storage->lock
 		 * and changing the lists.
 		 */
-		old_sdata =
-			bpf_local_storage_lookup(local_storage, smap, false);
+		old_sdata = bpf_local_storage_lookup(local_storage, smap,
+						     false);
 		err = check_flags(old_sdata, map_flags);
 		if (err)
 			return ERR_PTR(err);
@@ -474,7 +508,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
 	 * old_sdata will not be uncharged later during
 	 * bpf_selem_unlink_storage().
 	 */
-	selem = bpf_selem_alloc(smap, sk, value, !old_sdata);
+	selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
 	if (!selem) {
 		err = -ENOMEM;
 		goto unlock_err;
@@ -566,7 +600,7 @@ void bpf_sk_storage_free(struct sock *sk)
 	 * Thus, no elem can be added-to or deleted-from the
 	 * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
 	 *
-	 * It is racing with bpf_sk_storage_map_free() alone
+	 * It is racing with bpf_local_storage_map_free() alone
 	 * when unlinking elem from the sk_storage->list and
 	 * the map's bucket->list.
 	 */
@@ -586,17 +620,12 @@ void bpf_sk_storage_free(struct sock *sk)
 		kfree_rcu(sk_storage, rcu);
 }
 
-static void bpf_local_storage_map_free(struct bpf_map *map)
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
 {
 	struct bpf_local_storage_elem *selem;
-	struct bpf_local_storage_map *smap;
 	struct bpf_local_storage_map_bucket *b;
 	unsigned int i;
 
-	smap = (struct bpf_local_storage_map *)map;
-
-	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
-
 	/* Note that this map might be concurrently cloned from
 	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
 	 * RCU read section to finish before proceeding. New RCU
@@ -606,11 +635,12 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
 
 	/* bpf prog and the userspace can no longer access this map
 	 * now.  No new selem (of this map) can be added
-	 * to the sk->sk_bpf_storage or to the map bucket's list.
+	 * to the bpf_local_storage or to the map bucket's list.
 	 *
 	 * The elem of this map can be cleaned up here
 	 * or
-	 * by bpf_sk_storage_free() during __sk_destruct().
+	 * by bpf_local_storage_free() during the destruction of the
+	 * owner object. eg. __sk_destruct.
 	 */
 	for (i = 0; i < (1U << smap->bucket_log); i++) {
 		b = &smap->buckets[i];
@@ -626,22 +656,31 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
 		rcu_read_unlock();
 	}
 
-	/* bpf_sk_storage_free() may still need to access the map.
-	 * e.g. bpf_sk_storage_free() has unlinked selem from the map
+	/* bpf_local_storage_free() may still need to access the map.
+	 * e.g. bpf_local_storage_free() has unlinked selem from the map
 	 * which then made the above while((selem = ...)) loop
 	 * exited immediately.
 	 *
-	 * However, the bpf_sk_storage_free() still needs to access
+	 * However, the bpf_local_storage_free() still needs to access
 	 * the smap->elem_size to do the uncharging in
 	 * bpf_selem_unlink_storage().
 	 *
 	 * Hence, wait another rcu grace period for the
-	 * bpf_sk_storage_free() to finish.
+	 * bpf_local_storage_free() to finish.
 	 */
 	synchronize_rcu();
 
 	kvfree(smap->buckets);
-	kfree(map);
+	kfree(smap);
+}
+
+static void sk_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap);
 }
 
 /* U16_MAX is much more than enough for sk local storage
@@ -653,7 +692,7 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
 	       sizeof(struct bpf_local_storage_elem)),			\
 	      (U16_MAX - sizeof(struct bpf_local_storage_elem)))
 
-static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
 {
 	if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
 	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
@@ -672,7 +711,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
 	return 0;
 }
 
-static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
 	unsigned int i;
@@ -710,9 +749,21 @@ static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
 		raw_spin_lock_init(&smap->buckets[i].lock);
 	}
 
-	smap->elem_size = sizeof(struct bpf_local_storage_elem) + attr->value_size;
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
+	smap->elem_size =
+		sizeof(struct bpf_local_storage_elem) + attr->value_size;
+
+	return smap;
+}
+
+static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
 
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
 	return &smap->map;
 }
 
@@ -722,10 +773,10 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
 	return -ENOTSUPP;
 }
 
-static int bpf_sk_storage_map_check_btf(const struct bpf_map *map,
-					const struct btf *btf,
-					const struct btf_type *key_type,
-					const struct btf_type *value_type)
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+				    const struct btf *btf,
+				    const struct btf_type *key_type,
+				    const struct btf_type *value_type)
 {
 	u32 int_data;
 
@@ -856,7 +907,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
 			bpf_selem_link_map(smap, copy_selem);
 			bpf_selem_link_storage(new_sk_storage, copy_selem);
 		} else {
-			ret = sk_storage_alloc(newsk, smap, copy_selem);
+			ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
 			if (ret) {
 				kfree(copy_selem);
 				atomic_sub(smap->elem_size,
@@ -925,18 +976,43 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
 	return -ENOENT;
 }
 
+static int sk_storage_charge(struct bpf_local_storage_map *smap,
+			     void *owner, u32 size)
+{
+	return omem_charge(owner, size);
+}
+
+static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
+				void *owner, u32 size)
+{
+	struct sock *sk = owner;
+
+	atomic_sub(size, &sk->sk_omem_alloc);
+}
+
+static struct bpf_local_storage __rcu **
+sk_storage_ptr(struct bpf_local_storage_map *smap, void *owner)
+{
+	struct sock *sk = owner;
+
+	return &sk->sk_bpf_storage;
+}
+
 static int sk_storage_map_btf_id;
 const struct bpf_map_ops sk_storage_map_ops = {
-	.map_alloc_check = bpf_sk_storage_map_alloc_check,
-	.map_alloc = bpf_local_storage_map_alloc,
-	.map_free = bpf_local_storage_map_free,
+	.map_alloc_check = bpf_local_storage_map_alloc_check,
+	.map_alloc = sk_storage_map_alloc,
+	.map_free = sk_storage_map_free,
 	.map_get_next_key = notsupp_get_next_key,
 	.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
 	.map_update_elem = bpf_fd_sk_storage_update_elem,
 	.map_delete_elem = bpf_fd_sk_storage_delete_elem,
-	.map_check_btf = bpf_sk_storage_map_check_btf,
+	.map_check_btf = bpf_local_storage_map_check_btf,
 	.map_btf_name = "bpf_local_storage_map",
 	.map_btf_id = &sk_storage_map_btf_id,
+	.map_local_storage_charge = sk_storage_charge,
+	.map_local_storage_uncharge = sk_storage_uncharge,
+	.map_owner_storage_ptr = sk_storage_ptr,
 };
 
 const struct bpf_func_proto bpf_sk_storage_get_proto = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 54d0c886e3ba..b9d2e4792d08 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3630,9 +3630,13 @@ enum {
 	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
 };
 
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<local>_storage_get flags */
 enum {
-	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+	 */
+	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
 };
 
 /* BPF_FUNC_read_branch_records flags. */
-- 
2.24.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox