Linux Security Modules development
 help / color / mirror / Atom feed
* [RFC PATCH 06/14] keys: Add a notification facility [ver #3]
From: David Howells @ 2020-01-15 13:31 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Add a key/keyring change notification facility whereby notifications about
changes in key and keyring content and attributes can be received.

Firstly, an event queue needs to be created:

	pipe2(fds, O_NOTIFICATION_PIPE);
	ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);

then a notification can be set up to report notifications via that queue:

	struct watch_notification_filter filter = {
		.nr_filters = 1,
		.filters = {
			[0] = {
				.type = WATCH_TYPE_KEY_NOTIFY,
				.subtype_filter[0] = UINT_MAX,
			},
		},
	};
	ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter);
	keyctl_watch_key(KEY_SPEC_SESSION_KEYRING, fds[1], 0x01);

After that, records will be placed into the queue when events occur in
which keys are changed in some way.  Records are of the following format:

	struct key_notification {
		struct watch_notification watch;
		__u32	key_id;
		__u32	aux;
	} *n;

Where:

	n->watch.type will be WATCH_TYPE_KEY_NOTIFY.

	n->watch.subtype will indicate the type of event, such as
	NOTIFY_KEY_REVOKED.

	n->watch.info & WATCH_INFO_LENGTH will indicate the length of the
	record.

	n->watch.info & WATCH_INFO_ID will be the second argument to
	keyctl_watch_key(), shifted.

	n->key will be the ID of the affected key.

	n->aux will hold subtype-dependent information, such as the key
	being linked into the keyring specified by n->key in the case of
	NOTIFY_KEY_LINKED.

Note that it is permissible for event records to be of variable length -
or, at least, the length may be dependent on the subtype.  Note also that
the queue can be shared between multiple notifications of various types.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 Documentation/security/keys/core.rst |   58 ++++++++++++++++++++
 include/linux/key.h                  |    3 +
 include/uapi/linux/keyctl.h          |    2 +
 include/uapi/linux/watch_queue.h     |   28 +++++++++-
 security/keys/Kconfig                |    9 +++
 security/keys/compat.c               |    3 +
 security/keys/gc.c                   |    5 ++
 security/keys/internal.h             |   30 ++++++++++
 security/keys/key.c                  |   38 ++++++++-----
 security/keys/keyctl.c               |   99 +++++++++++++++++++++++++++++++++-
 security/keys/keyring.c              |   20 ++++---
 security/keys/request_key.c          |    4 +
 12 files changed, 271 insertions(+), 28 deletions(-)

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index d9b0b859018b..0154721b20b2 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -833,6 +833,7 @@ The keyctl syscall functions are:
      A process must have search permission on the key for this function to be
      successful.
 
+
   *  Compute a Diffie-Hellman shared secret or public key::
 
 	long keyctl(KEYCTL_DH_COMPUTE, struct keyctl_dh_params *params,
@@ -1026,6 +1027,63 @@ The keyctl syscall functions are:
      written into the output buffer.  Verification returns 0 on success.
 
 
+  *  Watch a key or keyring for changes::
+
+	long keyctl(KEYCTL_WATCH_KEY, key_serial_t key, int queue_fd,
+		    const struct watch_notification_filter *filter);
+
+     This will set or remove a watch for changes on the specified key or
+     keyring.
+
+     "key" is the ID of the key to be watched.
+
+     "queue_fd" is a file descriptor referring to an open "/dev/watch_queue"
+     which manages the buffer into which notifications will be delivered.
+
+     "filter" is either NULL to remove a watch or a filter specification to
+     indicate what events are required from the key.
+
+     See Documentation/watch_queue.rst for more information.
+
+     Note that only one watch may be emplaced for any particular { key,
+     queue_fd } combination.
+
+     Notification records look like::
+
+	struct key_notification {
+		struct watch_notification watch;
+		__u32	key_id;
+		__u32	aux;
+	};
+
+     In this, watch::type will be "WATCH_TYPE_KEY_NOTIFY" and subtype will be
+     one of::
+
+	NOTIFY_KEY_INSTANTIATED
+	NOTIFY_KEY_UPDATED
+	NOTIFY_KEY_LINKED
+	NOTIFY_KEY_UNLINKED
+	NOTIFY_KEY_CLEARED
+	NOTIFY_KEY_REVOKED
+	NOTIFY_KEY_INVALIDATED
+	NOTIFY_KEY_SETATTR
+
+     Where these indicate a key being instantiated/rejected, updated, a link
+     being made in a keyring, a link being removed from a keyring, a keyring
+     being cleared, a key being revoked, a key being invalidated or a key
+     having one of its attributes changed (user, group, perm, timeout,
+     restriction).
+
+     If a watched key is deleted, a basic watch_notification will be issued
+     with "type" set to WATCH_TYPE_META and "subtype" set to
+     watch_meta_removal_notification.  The watchpoint ID will be set in the
+     "info" field.
+
+     This needs to be configured by enabling:
+
+	"Provide key/keyring change notifications" (KEY_NOTIFICATIONS)
+
+
 Kernel Services
 ===============
 
diff --git a/include/linux/key.h b/include/linux/key.h
index 6cf8e71cf8b7..b99b40db08fc 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -176,6 +176,9 @@ struct key {
 		struct list_head graveyard_link;
 		struct rb_node	serial_node;
 	};
+#ifdef CONFIG_KEY_NOTIFICATIONS
+	struct watch_list	*watchers;	/* Entities watching this key for changes */
+#endif
 	struct rw_semaphore	sem;		/* change vs change sem */
 	struct key_user		*user;		/* owner of this key */
 	void			*security;	/* security data for this key */
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index ed3d5893830d..4c8884eea808 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -69,6 +69,7 @@
 #define KEYCTL_RESTRICT_KEYRING		29	/* Restrict keys allowed to link to a keyring */
 #define KEYCTL_MOVE			30	/* Move keys between keyrings */
 #define KEYCTL_CAPABILITIES		31	/* Find capabilities of keyrings subsystem */
+#define KEYCTL_WATCH_KEY		32	/* Watch a key or ring of keys for changes */
 
 /* keyctl structures */
 struct keyctl_dh_params {
@@ -130,5 +131,6 @@ struct keyctl_pkey_params {
 #define KEYCTL_CAPS0_MOVE		0x80 /* KEYCTL_MOVE supported */
 #define KEYCTL_CAPS1_NS_KEYRING_NAME	0x01 /* Keyring names are per-user_namespace */
 #define KEYCTL_CAPS1_NS_KEY_TAG		0x02 /* Key indexing can include a namespace tag */
+#define KEYCTL_CAPS1_NOTIFICATIONS	0x04 /* Keys generate watchable notifications */
 
 #endif /*  _LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h
index 3a5790f1f05d..c3d8320b5d3a 100644
--- a/include/uapi/linux/watch_queue.h
+++ b/include/uapi/linux/watch_queue.h
@@ -13,7 +13,8 @@
 
 enum watch_notification_type {
 	WATCH_TYPE_META		= 0,	/* Special record */
-	WATCH_TYPE__NR		= 1
+	WATCH_TYPE_KEY_NOTIFY	= 1,	/* Key change event notification */
+	WATCH_TYPE__NR		= 2
 };
 
 enum watch_meta_notification_subtype {
@@ -75,4 +76,29 @@ struct watch_notification_removal {
 	__u64	id;		/* Type-dependent identifier */
 };
 
+/*
+ * Type of key/keyring change notification.
+ */
+enum key_notification_subtype {
+	NOTIFY_KEY_INSTANTIATED	= 0, /* Key was instantiated (aux is error code) */
+	NOTIFY_KEY_UPDATED	= 1, /* Key was updated */
+	NOTIFY_KEY_LINKED	= 2, /* Key (aux) was added to watched keyring */
+	NOTIFY_KEY_UNLINKED	= 3, /* Key (aux) was removed from watched keyring */
+	NOTIFY_KEY_CLEARED	= 4, /* Keyring was cleared */
+	NOTIFY_KEY_REVOKED	= 5, /* Key was revoked */
+	NOTIFY_KEY_INVALIDATED	= 6, /* Key was invalidated */
+	NOTIFY_KEY_SETATTR	= 7, /* Key's attributes got changed */
+};
+
+/*
+ * Key/keyring notification record.
+ * - watch.type = WATCH_TYPE_KEY_NOTIFY
+ * - watch.subtype = enum key_notification_type
+ */
+struct key_notification {
+	struct watch_notification watch;
+	__u32	key_id;		/* The key/keyring affected */
+	__u32	aux;		/* Per-type auxiliary data */
+};
+
 #endif /* _UAPI_LINUX_WATCH_QUEUE_H */
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index 47c041563d41..d4dc5ea208af 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -116,3 +116,12 @@ config KEY_DH_OPERATIONS
 	 in the kernel.
 
 	 If you are unsure as to whether this is required, answer N.
+
+config KEY_NOTIFICATIONS
+	bool "Provide key/keyring change notifications"
+	depends on KEYS && WATCH_QUEUE
+	help
+	  This option provides support for getting change notifications on keys
+	  and keyrings on which the caller has View permission.  This makes use
+	  of the /dev/watch_queue misc device to handle the notification
+	  buffer and provides KEYCTL_WATCH_KEY to enable/disable watches.
diff --git a/security/keys/compat.c b/security/keys/compat.c
index b975f8f11124..6ee9d8f6a4a5 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -156,6 +156,9 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
 	case KEYCTL_CAPABILITIES:
 		return keyctl_capabilities(compat_ptr(arg2), arg3);
 
+	case KEYCTL_WATCH_KEY:
+		return keyctl_watch_key(arg2, arg3, arg4);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 671dd730ecfc..3c90807476eb 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -131,6 +131,11 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
 		kdebug("- %u", key->serial);
 		key_check(key);
 
+#ifdef CONFIG_KEY_NOTIFICATIONS
+		remove_watch_list(key->watchers, key->serial);
+		key->watchers = NULL;
+#endif
+
 		/* Throw away the key data if the key is instantiated */
 		if (state == KEY_IS_POSITIVE && key->type->destroy)
 			key->type->destroy(key);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index ba3e2da14cef..2a230c665162 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -15,6 +15,7 @@
 #include <linux/task_work.h>
 #include <linux/keyctl.h>
 #include <linux/refcount.h>
+#include <linux/watch_queue.h>
 #include <linux/compat.h>
 
 struct iovec;
@@ -97,7 +98,8 @@ extern int __key_link_begin(struct key *keyring,
 			    const struct keyring_index_key *index_key,
 			    struct assoc_array_edit **_edit);
 extern int __key_link_check_live_key(struct key *keyring, struct key *key);
-extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
+extern void __key_link(struct key *keyring, struct key *key,
+		       struct assoc_array_edit **_edit);
 extern void __key_link_end(struct key *keyring,
 			   const struct keyring_index_key *index_key,
 			   struct assoc_array_edit *edit);
@@ -181,6 +183,23 @@ extern int key_task_permission(const key_ref_t key_ref,
 			       const struct cred *cred,
 			       key_perm_t perm);
 
+static inline void notify_key(struct key *key,
+			      enum key_notification_subtype subtype, u32 aux)
+{
+#ifdef CONFIG_KEY_NOTIFICATIONS
+	struct key_notification n = {
+		.watch.type	= WATCH_TYPE_KEY_NOTIFY,
+		.watch.subtype	= subtype,
+		.watch.info	= watch_sizeof(n),
+		.key_id		= key_serial(key),
+		.aux		= aux,
+	};
+
+	post_watch_notification(key->watchers, &n.watch, current_cred(),
+				n.key_id);
+#endif
+}
+
 /*
  * Check to see whether permission is granted to use a key in the desired way.
  */
@@ -331,6 +350,15 @@ static inline long keyctl_pkey_e_d_s(int op,
 
 extern long keyctl_capabilities(unsigned char __user *_buffer, size_t buflen);
 
+#ifdef CONFIG_KEY_NOTIFICATIONS
+extern long keyctl_watch_key(key_serial_t, int, int);
+#else
+static inline long keyctl_watch_key(key_serial_t key_id, int watch_fd, int watch_id)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 /*
  * Debugging key validation
  */
diff --git a/security/keys/key.c b/security/keys/key.c
index 764f4c57913e..83e8d7c4bb6f 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -443,6 +443,7 @@ static int __key_instantiate_and_link(struct key *key,
 			/* mark the key as being instantiated */
 			atomic_inc(&key->user->nikeys);
 			mark_key_instantiated(key, 0);
+			notify_key(key, NOTIFY_KEY_INSTANTIATED, 0);
 
 			if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags))
 				awaken = 1;
@@ -452,7 +453,7 @@ static int __key_instantiate_and_link(struct key *key,
 				if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
 					set_bit(KEY_FLAG_KEEP, &key->flags);
 
-				__key_link(key, _edit);
+				__key_link(keyring, key, _edit);
 			}
 
 			/* disable the authorisation key */
@@ -600,6 +601,7 @@ int key_reject_and_link(struct key *key,
 		/* mark the key as being negatively instantiated */
 		atomic_inc(&key->user->nikeys);
 		mark_key_instantiated(key, -error);
+		notify_key(key, NOTIFY_KEY_INSTANTIATED, -error);
 		key->expiry = ktime_get_real_seconds() + timeout;
 		key_schedule_gc(key->expiry + key_gc_delay);
 
@@ -610,7 +612,7 @@ int key_reject_and_link(struct key *key,
 
 		/* and link it into the destination keyring */
 		if (keyring && link_ret == 0)
-			__key_link(key, &edit);
+			__key_link(keyring, key, &edit);
 
 		/* disable the authorisation key */
 		if (authkey)
@@ -763,9 +765,11 @@ static inline key_ref_t __key_update(key_ref_t key_ref,
 	down_write(&key->sem);
 
 	ret = key->type->update(key, prep);
-	if (ret == 0)
+	if (ret == 0) {
 		/* Updating a negative key positively instantiates it */
 		mark_key_instantiated(key, 0);
+		notify_key(key, NOTIFY_KEY_UPDATED, 0);
+	}
 
 	up_write(&key->sem);
 
@@ -1013,9 +1017,11 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 	down_write(&key->sem);
 
 	ret = key->type->update(key, &prep);
-	if (ret == 0)
+	if (ret == 0) {
 		/* Updating a negative key positively instantiates it */
 		mark_key_instantiated(key, 0);
+		notify_key(key, NOTIFY_KEY_UPDATED, 0);
+	}
 
 	up_write(&key->sem);
 
@@ -1047,15 +1053,17 @@ void key_revoke(struct key *key)
 	 *   instantiated
 	 */
 	down_write_nested(&key->sem, 1);
-	if (!test_and_set_bit(KEY_FLAG_REVOKED, &key->flags) &&
-	    key->type->revoke)
-		key->type->revoke(key);
-
-	/* set the death time to no more than the expiry time */
-	time = ktime_get_real_seconds();
-	if (key->revoked_at == 0 || key->revoked_at > time) {
-		key->revoked_at = time;
-		key_schedule_gc(key->revoked_at + key_gc_delay);
+	if (!test_and_set_bit(KEY_FLAG_REVOKED, &key->flags)) {
+		notify_key(key, NOTIFY_KEY_REVOKED, 0);
+		if (key->type->revoke)
+			key->type->revoke(key);
+
+		/* set the death time to no more than the expiry time */
+		time = ktime_get_real_seconds();
+		if (key->revoked_at == 0 || key->revoked_at > time) {
+			key->revoked_at = time;
+			key_schedule_gc(key->revoked_at + key_gc_delay);
+		}
 	}
 
 	up_write(&key->sem);
@@ -1077,8 +1085,10 @@ void key_invalidate(struct key *key)
 
 	if (!test_bit(KEY_FLAG_INVALIDATED, &key->flags)) {
 		down_write_nested(&key->sem, 1);
-		if (!test_and_set_bit(KEY_FLAG_INVALIDATED, &key->flags))
+		if (!test_and_set_bit(KEY_FLAG_INVALIDATED, &key->flags)) {
+			notify_key(key, NOTIFY_KEY_INVALIDATED, 0);
 			key_schedule_gc_links();
+		}
 		up_write(&key->sem);
 	}
 }
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9b898c969558..6610649514fb 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -37,7 +37,9 @@ static const unsigned char keyrings_capabilities[2] = {
 	       KEYCTL_CAPS0_MOVE
 	       ),
 	[1] = (KEYCTL_CAPS1_NS_KEYRING_NAME |
-	       KEYCTL_CAPS1_NS_KEY_TAG),
+	       KEYCTL_CAPS1_NS_KEY_TAG |
+	       (IS_ENABLED(CONFIG_KEY_NOTIFICATIONS)	? KEYCTL_CAPS1_NOTIFICATIONS : 0)
+	       ),
 };
 
 static int key_get_type_from_user(char *type,
@@ -970,6 +972,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
 	if (group != (gid_t) -1)
 		key->gid = gid;
 
+	notify_key(key, NOTIFY_KEY_SETATTR, 0);
 	ret = 0;
 
 error_put:
@@ -1020,6 +1023,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
 	/* if we're not the sysadmin, we can only change a key that we own */
 	if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) {
 		key->perm = perm;
+		notify_key(key, NOTIFY_KEY_SETATTR, 0);
 		ret = 0;
 	}
 
@@ -1411,10 +1415,12 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
 okay:
 	key = key_ref_to_ptr(key_ref);
 	ret = 0;
-	if (test_bit(KEY_FLAG_KEEP, &key->flags))
+	if (test_bit(KEY_FLAG_KEEP, &key->flags)) {
 		ret = -EPERM;
-	else
+	} else {
 		key_set_timeout(key, timeout);
+		notify_key(key, NOTIFY_KEY_SETATTR, 0);
+	}
 	key_put(key);
 
 error:
@@ -1688,6 +1694,90 @@ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type,
 	return ret;
 }
 
+#ifdef CONFIG_KEY_NOTIFICATIONS
+/*
+ * Watch for changes to a key.
+ *
+ * The caller must have View permission to watch a key or keyring.
+ */
+long keyctl_watch_key(key_serial_t id, int watch_queue_fd, int watch_id)
+{
+	struct watch_queue *wqueue;
+	struct watch_list *wlist = NULL;
+	struct watch *watch = NULL;
+	struct key *key;
+	key_ref_t key_ref;
+	long ret;
+
+	if (watch_id < -1 || watch_id > 0xff)
+		return -EINVAL;
+
+	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_VIEW);
+	if (IS_ERR(key_ref))
+		return PTR_ERR(key_ref);
+	key = key_ref_to_ptr(key_ref);
+
+	wqueue = get_watch_queue(watch_queue_fd);
+	if (IS_ERR(wqueue)) {
+		ret = PTR_ERR(wqueue);
+		goto err_key;
+	}
+
+	if (watch_id >= 0) {
+		ret = -ENOMEM;
+		if (!key->watchers) {
+			wlist = kzalloc(sizeof(*wlist), GFP_KERNEL);
+			if (!wlist)
+				goto err_wqueue;
+			init_watch_list(wlist, NULL);
+		}
+
+		watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+		if (!watch)
+			goto err_wlist;
+
+		init_watch(watch, wqueue);
+		watch->id	= key->serial;
+		watch->info_id	= (u32)watch_id << WATCH_INFO_ID__SHIFT;
+
+		ret = security_watch_key(key);
+		if (ret < 0)
+			goto err_watch;
+
+		down_write(&key->sem);
+		if (!key->watchers) {
+			key->watchers = wlist;
+			wlist = NULL;
+		}
+
+		ret = add_watch_to_object(watch, key->watchers);
+		up_write(&key->sem);
+
+		if (ret == 0)
+			watch = NULL;
+	} else {
+		ret = -EBADSLT;
+		if (key->watchers) {
+			down_write(&key->sem);
+			ret = remove_watch_from_object(key->watchers,
+						       wqueue, key_serial(key),
+						       false);
+			up_write(&key->sem);
+		}
+	}
+
+err_watch:
+	kfree(watch);
+err_wlist:
+	kfree(wlist);
+err_wqueue:
+	put_watch_queue(wqueue);
+err_key:
+	key_put(key);
+	return ret;
+}
+#endif /* CONFIG_KEY_NOTIFICATIONS */
+
 /*
  * Get keyrings subsystem capabilities.
  */
@@ -1857,6 +1947,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case KEYCTL_CAPABILITIES:
 		return keyctl_capabilities((unsigned char __user *)arg2, (size_t)arg3);
 
+	case KEYCTL_WATCH_KEY:
+		return keyctl_watch_key((key_serial_t)arg2, (int)arg3, (int)arg4);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index febf36c6ddc5..40a0dcdfda44 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1060,12 +1060,14 @@ int keyring_restrict(key_ref_t keyring_ref, const char *type,
 	down_write(&keyring->sem);
 	down_write(&keyring_serialise_restrict_sem);
 
-	if (keyring->restrict_link)
+	if (keyring->restrict_link) {
 		ret = -EEXIST;
-	else if (keyring_detect_restriction_cycle(keyring, restrict_link))
+	} else if (keyring_detect_restriction_cycle(keyring, restrict_link)) {
 		ret = -EDEADLK;
-	else
+	} else {
 		keyring->restrict_link = restrict_link;
+		notify_key(keyring, NOTIFY_KEY_SETATTR, 0);
+	}
 
 	up_write(&keyring_serialise_restrict_sem);
 	up_write(&keyring->sem);
@@ -1366,12 +1368,14 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
  * holds at most one link to any given key of a particular type+description
  * combination.
  */
-void __key_link(struct key *key, struct assoc_array_edit **_edit)
+void __key_link(struct key *keyring, struct key *key,
+		struct assoc_array_edit **_edit)
 {
 	__key_get(key);
 	assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
 	assoc_array_apply_edit(*_edit);
 	*_edit = NULL;
+	notify_key(keyring, NOTIFY_KEY_LINKED, key_serial(key));
 }
 
 /*
@@ -1455,7 +1459,7 @@ int key_link(struct key *keyring, struct key *key)
 	if (ret == 0)
 		ret = __key_link_check_live_key(keyring, key);
 	if (ret == 0)
-		__key_link(key, &edit);
+		__key_link(keyring, key, &edit);
 
 error_end:
 	__key_link_end(keyring, &key->index_key, edit);
@@ -1487,7 +1491,7 @@ static int __key_unlink_begin(struct key *keyring, struct key *key,
 	struct assoc_array_edit *edit;
 
 	BUG_ON(*_edit != NULL);
-	
+
 	edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
 				  &key->index_key);
 	if (IS_ERR(edit))
@@ -1507,6 +1511,7 @@ static void __key_unlink(struct key *keyring, struct key *key,
 			 struct assoc_array_edit **_edit)
 {
 	assoc_array_apply_edit(*_edit);
+	notify_key(keyring, NOTIFY_KEY_UNLINKED, key_serial(key));
 	*_edit = NULL;
 	key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES);
 }
@@ -1625,7 +1630,7 @@ int key_move(struct key *key,
 		goto error;
 
 	__key_unlink(from_keyring, key, &from_edit);
-	__key_link(key, &to_edit);
+	__key_link(to_keyring, key, &to_edit);
 error:
 	__key_link_end(to_keyring, &key->index_key, to_edit);
 	__key_unlink_end(from_keyring, key, from_edit);
@@ -1659,6 +1664,7 @@ int keyring_clear(struct key *keyring)
 	} else {
 		if (edit)
 			assoc_array_apply_edit(edit);
+		notify_key(keyring, NOTIFY_KEY_CLEARED, 0);
 		key_payload_reserve(keyring, 0);
 		ret = 0;
 	}
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 957b9e3e1492..e1b9f1a80676 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -418,7 +418,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
 		goto key_already_present;
 
 	if (dest_keyring)
-		__key_link(key, &edit);
+		__key_link(dest_keyring, key, &edit);
 
 	mutex_unlock(&key_construction_mutex);
 	if (dest_keyring)
@@ -437,7 +437,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
 	if (dest_keyring) {
 		ret = __key_link_check_live_key(dest_keyring, key);
 		if (ret == 0)
-			__key_link(key, &edit);
+			__key_link(dest_keyring, key, &edit);
 		__key_link_end(dest_keyring, &ctx->index_key, edit);
 		if (ret < 0)
 			goto link_check_failed;


^ permalink raw reply related

* [RFC PATCH 07/14] Add sample notification program [ver #3]
From: David Howells @ 2020-01-15 13:31 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

The sample program is run like:

	./samples/watch_queue/watch_test

and watches "/" for mount changes and the current session keyring for key
changes:

	# keyctl add user a a @s
	1035096409
	# keyctl unlink 1035096409 @s

producing:

	# ./watch_test
	read() = 16
	NOTIFY[000]: ty=000001 sy=02 i=00000110
	KEY 2ffc2e5d change=2[linked] aux=1035096409
	read() = 16
	NOTIFY[000]: ty=000001 sy=02 i=00000110
	KEY 2ffc2e5d change=3[unlinked] aux=1035096409

Other events may be produced, such as with a failing disk:

	read() = 22
	NOTIFY[000]: ty=000003 sy=02 i=00000416
	USB 3-7.7 dev-reset e=0 r=0
	read() = 24
	NOTIFY[000]: ty=000002 sy=06 i=00000418
	BLOCK 00800050 e=6[critical medium] s=64000ef8

This corresponds to:

	blk_update_request: critical medium error, dev sdf, sector 1677725432 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0

in dmesg.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 samples/Kconfig                  |    6 +
 samples/Makefile                 |    1 
 samples/watch_queue/Makefile     |    7 +
 samples/watch_queue/watch_test.c |  183 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 197 insertions(+)
 create mode 100644 samples/watch_queue/Makefile
 create mode 100644 samples/watch_queue/watch_test.c

diff --git a/samples/Kconfig b/samples/Kconfig
index 9d236c346de5..5c31971a5745 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -190,5 +190,11 @@ config SAMPLE_INTEL_MEI
 	help
 	  Build a sample program to work with mei device.
 
+config SAMPLE_WATCH_QUEUE
+	bool "Build example /dev/watch_queue notification consumer"
+	depends on HEADERS_INSTALL
+	help
+	  Build example userspace program to use the new mount_notify(),
+	  sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function.
 
 endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index 5ce50ef0f2b2..a80ab2dbaae7 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_VIDEO_PCI_SKELETON)	+= v4l/
 obj-y					+= vfio-mdev/
 subdir-$(CONFIG_SAMPLE_VFS)		+= vfs
 obj-$(CONFIG_SAMPLE_INTEL_MEI)		+= mei/
+subdir-$(CONFIG_SAMPLE_WATCH_QUEUE)	+= watch_queue
diff --git a/samples/watch_queue/Makefile b/samples/watch_queue/Makefile
new file mode 100644
index 000000000000..eec00dd0a8df
--- /dev/null
+++ b/samples/watch_queue/Makefile
@@ -0,0 +1,7 @@
+# List of programs to build
+hostprogs-y := watch_test
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_watch_test.o += -I$(objtree)/usr/include
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
new file mode 100644
index 000000000000..9bf60abf5c7e
--- /dev/null
+++ b/samples/watch_queue/watch_test.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Use /dev/watch_queue to watch for notifications.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <limits.h>
+#include <linux/watch_queue.h>
+#include <linux/unistd.h>
+#include <linux/keyctl.h>
+
+#ifndef KEYCTL_WATCH_KEY
+#define KEYCTL_WATCH_KEY -1
+#endif
+#ifndef __NR_watch_devices
+#define __NR_watch_devices -1
+#endif
+
+#define BUF_SIZE 256
+
+static long keyctl_watch_key(int key, int watch_fd, int watch_id)
+{
+	return syscall(__NR_keyctl, KEYCTL_WATCH_KEY, key, watch_fd, watch_id);
+}
+
+static const char *key_subtypes[256] = {
+	[NOTIFY_KEY_INSTANTIATED]	= "instantiated",
+	[NOTIFY_KEY_UPDATED]		= "updated",
+	[NOTIFY_KEY_LINKED]		= "linked",
+	[NOTIFY_KEY_UNLINKED]		= "unlinked",
+	[NOTIFY_KEY_CLEARED]		= "cleared",
+	[NOTIFY_KEY_REVOKED]		= "revoked",
+	[NOTIFY_KEY_INVALIDATED]	= "invalidated",
+	[NOTIFY_KEY_SETATTR]		= "setattr",
+};
+
+static void saw_key_change(struct watch_notification *n, size_t len)
+{
+	struct key_notification *k = (struct key_notification *)n;
+
+	if (len != sizeof(struct key_notification)) {
+		fprintf(stderr, "Incorrect key message length\n");
+		return;
+	}
+
+	printf("KEY %08x change=%u[%s] aux=%u\n",
+	       k->key_id, n->subtype, key_subtypes[n->subtype], k->aux);
+}
+
+/*
+ * Consume and display events.
+ */
+static void consumer(int fd)
+{
+	unsigned char buffer[4096], *p, *end;
+	union {
+		struct watch_notification n;
+		unsigned char buf1[128];
+	} n;
+	ssize_t buf_len;
+
+	for (;;) {
+		buf_len = read(fd, buffer, sizeof(buffer));
+		if (buf_len == -1) {
+			perror("read");
+			exit(1);
+		}
+
+		if (buf_len == 0) {
+			printf("-- END --\n");
+			return;
+		}
+
+		if (buf_len > sizeof(buffer)) {
+			fprintf(stderr, "Read buffer overrun: %zd\n", buf_len);
+			return;
+		}
+
+		printf("read() = %zd\n", buf_len);
+
+		p = buffer;
+		end = buffer + buf_len;
+		while (p < end) {
+			size_t largest, len;
+
+			largest = end - p;
+			if (largest > 128)
+				largest = 128;
+			if (largest < sizeof(struct watch_notification)) {
+				fprintf(stderr, "Short message header: %zu\n", largest);
+				return;
+			}
+			memcpy(&n, p, largest);
+
+			printf("NOTIFY[%03zx]: ty=%06x sy=%02x i=%08x\n",
+			       p - buffer, n.n.type, n.n.subtype, n.n.info);
+
+			len = n.n.info & WATCH_INFO_LENGTH;
+			if (len < sizeof(n.n) || len > largest) {
+				fprintf(stderr, "Bad message length: %zu/%zu\n", len, largest);
+				exit(1);
+			}
+
+			switch (n.n.type) {
+			case WATCH_TYPE_META:
+				switch (n.n.subtype) {
+				case WATCH_META_REMOVAL_NOTIFICATION:
+					printf("REMOVAL of watchpoint %08x\n",
+					       (n.n.info & WATCH_INFO_ID) >>
+					       WATCH_INFO_ID__SHIFT);
+					break;
+				default:
+					printf("other meta record\n");
+					break;
+				}
+				break;
+			case WATCH_TYPE_KEY_NOTIFY:
+				saw_key_change(&n.n, len);
+				break;
+			default:
+				printf("other type\n");
+				break;
+			}
+
+			p += len;
+		}
+	}
+}
+
+static struct watch_notification_filter filter = {
+	.nr_filters	= 1,
+	.filters = {
+		[0]	= {
+			.type			= WATCH_TYPE_KEY_NOTIFY,
+			.subtype_filter[0]	= UINT_MAX,
+		},
+	},
+};
+
+int main(int argc, char **argv)
+{
+	int pipefd[2], fd;
+
+	if (pipe2(pipefd, O_NOTIFICATION_PIPE) == -1) {
+		perror("pipe2");
+		exit(1);
+	}
+	fd = pipefd[0];
+
+	if (ioctl(fd, IOC_WATCH_QUEUE_SET_SIZE, BUF_SIZE) == -1) {
+		perror("/dev/watch_queue(size)");
+		exit(1);
+	}
+
+	if (ioctl(fd, IOC_WATCH_QUEUE_SET_FILTER, &filter) == -1) {
+		perror("/dev/watch_queue(filter)");
+		exit(1);
+	}
+
+	if (keyctl_watch_key(KEY_SPEC_SESSION_KEYRING, fd, 0x01) == -1) {
+		perror("keyctl");
+		exit(1);
+	}
+
+	if (keyctl_watch_key(KEY_SPEC_USER_KEYRING, fd, 0x02) == -1) {
+		perror("keyctl");
+		exit(1);
+	}
+
+	consumer(fd);
+	exit(0);
+}


^ permalink raw reply related

* [RFC PATCH 08/14] pipe: Allow buffers to be marked read-whole-or-error for notifications [ver #3]
From: David Howells @ 2020-01-15 13:31 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Allow a buffer to be marked such that read() must return the entire buffer
in one go or return ENOBUFS.  Multiple buffers can be amalgamated into a
single read, but a short read will occur if the next "whole" buffer won't
fit.

This is useful for watch queue notifications to make sure we don't split a
notification across multiple reads, especially given that we need to
fabricate an overrun record under some circumstances - and that isn't in
the buffers.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/pipe.c                        |    8 +++++++-
 include/linux/pipe_fs_i.h        |    1 +
 kernel/watch_queue.c             |    2 +-
 samples/watch_queue/watch_test.c |    2 +-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 5352c07be47f..05d0b02ed08d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -317,8 +317,14 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			size_t written;
 			int error;
 
-			if (chars > total_len)
+			if (chars > total_len) {
+				if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
+					if (ret == 0)
+						ret = -ENOBUFS;
+					break;
+				}
 				chars = total_len;
+			}
 
 			error = pipe_buf_confirm(pipe, buf);
 			if (error) {
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index cef70acd99bf..f86ae087aaca 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -8,6 +8,7 @@
 #define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
 #define PIPE_BUF_FLAG_GIFT	0x04	/* page is a gift */
 #define PIPE_BUF_FLAG_PACKET	0x08	/* read() as a packet */
+#define PIPE_BUF_FLAG_WHOLE	0x10	/* read() must return entire buffer or error */
 
 /**
  *	struct pipe_buffer - a linux kernel pipe buffer
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index e2e3344a2586..a01f2fed0983 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -115,7 +115,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	buf->ops = &watch_queue_pipe_buf_ops;
 	buf->offset = offset;
 	buf->len = len;
-	buf->flags = 0;
+	buf->flags = PIPE_BUF_FLAG_WHOLE;
 	pipe->head = head + 1;
 
 	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
index 9bf60abf5c7e..924e13a49c37 100644
--- a/samples/watch_queue/watch_test.c
+++ b/samples/watch_queue/watch_test.c
@@ -63,7 +63,7 @@ static void saw_key_change(struct watch_notification *n, size_t len)
  */
 static void consumer(int fd)
 {
-	unsigned char buffer[4096], *p, *end;
+	unsigned char buffer[433], *p, *end;
 	union {
 		struct watch_notification n;
 		unsigned char buf1[128];


^ permalink raw reply related

* [RFC PATCH 09/14] pipe: Add notification lossage handling [ver #3]
From: David Howells @ 2020-01-15 13:31 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Add handling for loss of notifications by having read() insert a
loss-notification message after it has read the pipe buffer that was last
in the ring when the loss occurred.

Lossage can come about either by running out of notification descriptors or
by running out of space in the pipe ring.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/pipe.c                        |   28 ++++++++++++++++++++++++++++
 include/linux/pipe_fs_i.h        |    7 +++++++
 kernel/watch_queue.c             |    2 ++
 samples/watch_queue/watch_test.c |    3 +++
 4 files changed, 40 insertions(+)

diff --git a/fs/pipe.c b/fs/pipe.c
index 05d0b02ed08d..423aafca4338 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -311,6 +311,30 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		unsigned int tail = pipe->tail;
 		unsigned int mask = pipe->ring_size - 1;
 
+#ifdef CONFIG_WATCH_QUEUE
+		if (pipe->note_loss) {
+			struct watch_notification n;
+
+			if (total_len < 8) {
+				if (ret == 0)
+					ret = -ENOBUFS;
+				break;
+			}
+
+			n.type = WATCH_TYPE_META;
+			n.subtype = WATCH_META_LOSS_NOTIFICATION;
+			n.info = watch_sizeof(n);
+			if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
+				if (ret == 0)
+					ret = -EFAULT;
+				break;
+			}
+			ret += sizeof(n);
+			total_len -= sizeof(n);
+			pipe->note_loss = false;
+		}
+#endif
+
 		if (!pipe_empty(head, tail)) {
 			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
 			size_t chars = buf->len;
@@ -352,6 +376,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			if (!buf->len) {
 				pipe_buf_release(pipe, buf);
 				spin_lock_irq(&pipe->wait.lock);
+#ifdef CONFIG_WATCH_QUEUE
+				if (buf->flags & PIPE_BUF_FLAG_LOSS)
+					pipe->note_loss = true;
+#endif
 				tail++;
 				pipe->tail = tail;
 				spin_unlock_irq(&pipe->wait.lock);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index f86ae087aaca..810eb2b1efc6 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -9,6 +9,9 @@
 #define PIPE_BUF_FLAG_GIFT	0x04	/* page is a gift */
 #define PIPE_BUF_FLAG_PACKET	0x08	/* read() as a packet */
 #define PIPE_BUF_FLAG_WHOLE	0x10	/* read() must return entire buffer or error */
+#ifdef CONFIG_WATCH_QUEUE
+#define PIPE_BUF_FLAG_LOSS	0x20	/* Message loss happened after this buffer */
+#endif
 
 /**
  *	struct pipe_buffer - a linux kernel pipe buffer
@@ -33,6 +36,7 @@ struct pipe_buffer {
  *	@wait: reader/writer wait point in case of empty/full pipe
  *	@head: The point of buffer production
  *	@tail: The point of buffer consumption
+ *	@note_loss: The next read() should insert a data-lost message
  *	@max_usage: The maximum number of slots that may be used in the ring
  *	@ring_size: total number of buffers (should be a power of 2)
  *	@nr_accounted: The amount this pipe accounts for in user->pipe_bufs
@@ -55,6 +59,9 @@ struct pipe_inode_info {
 	unsigned int tail;
 	unsigned int max_usage;
 	unsigned int ring_size;
+#ifdef CONFIG_WATCH_QUEUE
+	bool note_loss;
+#endif
 	unsigned int nr_accounted;
 	unsigned int readers;
 	unsigned int writers;
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index a01f2fed0983..d48f422f391a 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -132,6 +132,8 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	return done;
 
 lost:
+	buf = &pipe->bufs[(head - 1) & mask];
+	buf->flags |= PIPE_BUF_FLAG_LOSS;
 	goto out;
 }
 
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
index 924e13a49c37..0eaff5dc04c3 100644
--- a/samples/watch_queue/watch_test.c
+++ b/samples/watch_queue/watch_test.c
@@ -120,6 +120,9 @@ static void consumer(int fd)
 					       (n.n.info & WATCH_INFO_ID) >>
 					       WATCH_INFO_ID__SHIFT);
 					break;
+				case WATCH_META_LOSS_NOTIFICATION:
+					printf("-- LOSS --\n");
+					break;
 				default:
 					printf("other meta record\n");
 					break;


^ permalink raw reply related

* [RFC PATCH 10/14] Add a general, global device notification watch list [ver #3]
From: David Howells @ 2020-01-15 13:32 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Create a general, global watch list that can be used for the posting of
device notification events, for such things as device attachment,
detachment and errors on sources such as block devices and USB devices.
This can be enabled with:

	CONFIG_DEVICE_NOTIFICATIONS

To add a watch on this list, an event queue must be created and configured:

        pipe2(fds, O_NOTIFICATION_PIPE);
        ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);

and then a watch can be placed upon it using a system call:

        watch_devices(fds[1], 12, 0);

Unless the application wants to receive all events, it should employ
appropriate filters.  For example, to receive just USB notifications, it
could do:

	struct watch_notification_filter filter = {
		.nr_filters = 1,
		.filters = {
			[0] = {
				.type = WATCH_TYPE_USB_NOTIFY,
				.subtype_filter[0] = UINT_MAX;
			},
		},
	};
	ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter);

Signed-off-by: David Howells <dhowells@redhat.com>
---

 Documentation/watch_queue.rst               |   22 ++++++-
 arch/alpha/kernel/syscalls/syscall.tbl      |    1 
 arch/arm/tools/syscall.tbl                  |    1 
 arch/arm64/include/asm/unistd.h             |    2 -
 arch/arm64/include/asm/unistd32.h           |    2 +
 arch/ia64/kernel/syscalls/syscall.tbl       |    1 
 arch/m68k/kernel/syscalls/syscall.tbl       |    1 
 arch/microblaze/kernel/syscalls/syscall.tbl |    1 
 arch/mips/kernel/syscalls/syscall_n32.tbl   |    1 
 arch/mips/kernel/syscalls/syscall_n64.tbl   |    1 
 arch/mips/kernel/syscalls/syscall_o32.tbl   |    1 
 arch/parisc/kernel/syscalls/syscall.tbl     |    1 
 arch/powerpc/kernel/syscalls/syscall.tbl    |    1 
 arch/s390/kernel/syscalls/syscall.tbl       |    1 
 arch/sh/kernel/syscalls/syscall.tbl         |    1 
 arch/sparc/kernel/syscalls/syscall.tbl      |    1 
 arch/x86/entry/syscalls/syscall_32.tbl      |    1 
 arch/x86/entry/syscalls/syscall_64.tbl      |    1 
 arch/xtensa/kernel/syscalls/syscall.tbl     |    1 
 drivers/base/Kconfig                        |    9 +++
 drivers/base/Makefile                       |    1 
 drivers/base/watch.c                        |   90 +++++++++++++++++++++++++++
 include/linux/device.h                      |    7 ++
 include/linux/syscalls.h                    |    1 
 include/uapi/asm-generic/unistd.h           |    4 +
 kernel/sys_ni.c                             |    1 
 26 files changed, 152 insertions(+), 3 deletions(-)
 create mode 100644 drivers/base/watch.c

diff --git a/Documentation/watch_queue.rst b/Documentation/watch_queue.rst
index d8f70282d247..ed592700be0e 100644
--- a/Documentation/watch_queue.rst
+++ b/Documentation/watch_queue.rst
@@ -223,6 +223,25 @@ The ``id`` is the ID of the source object (such as the serial number on a key).
 Only watches that have the same ID set in them will see this notification.
 
 
+Global Device Watch List
+========================
+
+There is a global watch list that hardware generated events, such as device
+connection, disconnection, failure and error can be posted upon.  It must be
+enabled using::
+
+	CONFIG_DEVICE_NOTIFICATIONS
+
+Watchpoints are set in userspace using the device_notify(2) system call.
+Within the kernel events are posted upon it using::
+
+	void post_device_notification(struct watch_notification *n, u64 id);
+
+where ``n`` is the formatted notification record to post.  ``id`` is an
+identifier that can be used to direct to specific watches, but it should be 0
+for general use on this queue.
+
+
 Watch Sources
 =============
 
@@ -238,7 +257,8 @@ Any particular buffer can be fed from multiple sources.  Sources include:
   * WATCH_TYPE_BLOCK_NOTIFY
 
     Notifications of this type indicate block layer events, such as I/O errors
-    or temporary link loss.  Watches of this type are set on a global queue.
+    or temporary link loss.  Watches of this type are set on the global device
+    watch list.
 
 
 Event Filtering
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 8e13b0b2928d..3b355b0996a1 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -475,3 +475,4 @@
 543	common	fspick				sys_fspick
 544	common	pidfd_open			sys_pidfd_open
 # 545 reserved for clone3
+546	common	watch_devices			sys_watch_devices
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 6da7dc4d79cc..0f080cf44cc9 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -449,3 +449,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2629a68b8724..368761302768 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		436
+#define __NR_compat_syscalls		437
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 94ab29cf4f00..b5310789ce7a 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -879,6 +879,8 @@ __SYSCALL(__NR_fspick, sys_fspick)
 __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
 #define __NR_clone3 435
 __SYSCALL(__NR_clone3, sys_clone3)
+#define __NR_watch_devices 436
+__SYSCALL(__NR_watch_devices, sys_watch_devices)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 36d5faf4c86c..2f33f5db2fed 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -356,3 +356,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 # 435 reserved for clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index a88a285a0e5f..83e4e8784b88 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -435,3 +435,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 # 435 reserved for clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 09b0cd7dab0a..9a70a3be3b7b 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -441,3 +441,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index e7c5ab38e403..b39527fc32c9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -374,3 +374,4 @@
 433	n32	fspick				sys_fspick
 434	n32	pidfd_open			sys_pidfd_open
 435	n32	clone3				__sys_clone3
+436	n32	watch_devices			sys_watch_devices
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 13cd66581f3b..a7f0c5e71768 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -350,3 +350,4 @@
 433	n64	fspick				sys_fspick
 434	n64	pidfd_open			sys_pidfd_open
 435	n64	clone3				__sys_clone3
+436	n64	watch_devices			sys_watch_devices
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 353539ea4140..6f378288598c 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -423,3 +423,4 @@
 433	o32	fspick				sys_fspick
 434	o32	pidfd_open			sys_pidfd_open
 435	o32	clone3				__sys_clone3
+436	o32	watch_devices			sys_watch_devices
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 285ff516150c..b64bbafa5919 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -433,3 +433,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3_wrapper
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..0a503239ab5c 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -517,3 +517,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	nospu	clone3				ppc_clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 3054e9c035a3..19b43c0d928a 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -438,3 +438,4 @@
 433  common	fspick			sys_fspick			sys_fspick
 434  common	pidfd_open		sys_pidfd_open			sys_pidfd_open
 435  common	clone3			sys_clone3			sys_clone3
+436  common	watch_devices		sys_watch_devices		sys_watch_devices
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index b5ed26c4c005..b454e07c9372 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -438,3 +438,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 # 435 reserved for clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 8c8cc7537fb2..8ef43c27457e 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -481,3 +481,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 # 435 reserved for clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 15908eb9b17e..bdf7e845d1f8 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -440,3 +440,4 @@
 433	i386	fspick			sys_fspick			__ia32_sys_fspick
 434	i386	pidfd_open		sys_pidfd_open			__ia32_sys_pidfd_open
 435	i386	clone3			sys_clone3			__ia32_sys_clone3
+436	i386	watch_devices		sys_watch_devices		__ia32_sys_watch_devices
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index c29976eca4a8..29293d103829 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -357,6 +357,7 @@
 433	common	fspick			__x64_sys_fspick
 434	common	pidfd_open		__x64_sys_pidfd_open
 435	common	clone3			__x64_sys_clone3/ptregs
+436	common	watch_devices		__x64_sys_watch_devices
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 25f4de729a6d..243fa18b8d1e 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -406,3 +406,4 @@
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 435	common	clone3				sys_clone3
+436	common	watch_devices			sys_watch_devices
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index c3b3b5c0b0da..d4ae1c1adf69 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -1,6 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
 menu "Generic Driver Options"
 
+config DEVICE_NOTIFICATIONS
+	bool "Provide device event notifications"
+	depends on WATCH_QUEUE
+	help
+	  This option provides support for getting hardware event notifications
+	  on devices, buses and interfaces.  This makes use of the
+	  /dev/watch_queue misc device to handle the notification buffer.
+	  device_notify(2) is used to set/remove watches.
+
 config UEVENT_HELPER
 	bool "Support for uevent helper"
 	help
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 157452080f3d..4db2e8f1a1f4 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -7,6 +7,7 @@ obj-y			:= component.o core.o bus.o dd.o syscore.o \
 			   attribute_container.o transport_class.o \
 			   topology.o container.o property.o cacheinfo.o \
 			   devcon.o swnode.o
+obj-$(CONFIG_DEVICE_NOTIFICATIONS) += watch.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-y			+= power/
 obj-$(CONFIG_ISA_BUS_API)	+= isa.o
diff --git a/drivers/base/watch.c b/drivers/base/watch.c
new file mode 100644
index 000000000000..725aaa24275b
--- /dev/null
+++ b/drivers/base/watch.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Event notifications.
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/device.h>
+#include <linux/watch_queue.h>
+#include <linux/syscalls.h>
+#include <linux/init_task.h>
+#include <linux/security.h>
+
+/*
+ * Global queue for watching for device layer events.
+ */
+static struct watch_list device_watchers = {
+	.watchers	= HLIST_HEAD_INIT,
+	.lock		= __SPIN_LOCK_UNLOCKED(&device_watchers.lock),
+};
+
+static DEFINE_SPINLOCK(device_watchers_lock);
+
+/**
+ * post_device_notification - Post notification of a device event
+ * @n - The notification to post
+ * @id - The device ID
+ *
+ * Note that there's only a global queue to which all events are posted.  Might
+ * want to provide per-dev queues also.
+ */
+void post_device_notification(struct watch_notification *n, u64 id)
+{
+	post_watch_notification(&device_watchers, n, &init_cred, id);
+}
+EXPORT_SYMBOL(post_device_notification);
+
+/**
+ * sys_watch_devices - Watch for device events.
+ * @watch_fd: The watch queue to send notifications to.
+ * @watch_id: The watch ID to be placed in the notification (-1 to remove watch)
+ * @flags: Flags (reserved for future)
+ */
+SYSCALL_DEFINE3(watch_devices, int, watch_fd, int, watch_id, unsigned int, flags)
+{
+	struct watch_queue *wqueue;
+	struct watch *watch = NULL;
+	long ret = -ENOMEM;
+
+	if (watch_id < -1 || watch_id > 0xff || flags)
+		return -EINVAL;
+
+	wqueue = get_watch_queue(watch_fd);
+	if (IS_ERR(wqueue)) {
+		ret = PTR_ERR(wqueue);
+		goto err;
+	}
+
+	if (watch_id >= 0) {
+		watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+		if (!watch)
+			goto err_wqueue;
+
+		init_watch(watch, wqueue);
+		watch->info_id = (u32)watch_id << WATCH_INFO_ID__SHIFT;
+
+		ret = security_watch_devices();
+		if (ret < 0)
+			goto err_watch;
+
+		spin_lock(&device_watchers_lock);
+		ret = add_watch_to_object(watch, &device_watchers);
+		spin_unlock(&device_watchers_lock);
+		if (ret == 0)
+			watch = NULL;
+	} else {
+		spin_lock(&device_watchers_lock);
+		ret = remove_watch_from_object(&device_watchers, wqueue, 0,
+					       false);
+		spin_unlock(&device_watchers_lock);
+	}
+
+err_watch:
+	kfree(watch);
+err_wqueue:
+	put_watch_queue(wqueue);
+err:
+	return ret;
+}
diff --git a/include/linux/device.h b/include/linux/device.h
index 96ff76731e93..f32de6466092 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -43,6 +43,7 @@ struct iommu_group;
 struct iommu_fwspec;
 struct dev_pin_info;
 struct iommu_param;
+struct watch_notification;
 
 struct bus_attribute {
 	struct attribute	attr;
@@ -1687,6 +1688,12 @@ void device_link_remove(void *consumer, struct device *supplier);
 void device_links_supplier_sync_state_pause(void);
 void device_links_supplier_sync_state_resume(void);
 
+#ifdef CONFIG_DEVICE_NOTIFICATIONS
+extern void post_device_notification(struct watch_notification *n, u64 id);
+#else
+static inline void post_device_notification(struct watch_notification *n, u64 id) {}
+#endif
+
 #ifndef dev_fmt
 #define dev_fmt(fmt) fmt
 #endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2960dedcfde8..393661015a30 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1000,6 +1000,7 @@ asmlinkage long sys_fspick(int dfd, const char __user *path, unsigned int flags)
 asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
 				       siginfo_t __user *info,
 				       unsigned int flags);
+asmlinkage long sys_watch_devices(int watch_fd, int watch_id, unsigned int flags);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1fc8faa6e973..4794d3c2afd7 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -850,9 +850,11 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
 #define __NR_clone3 435
 __SYSCALL(__NR_clone3, sys_clone3)
 #endif
+#define __NR_watch_devices 436
+__SYSCALL(__NR_watch_devices, sys_watch_devices)
 
 #undef __NR_syscalls
-#define __NR_syscalls 436
+#define __NR_syscalls 437
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 3b69a560a7ac..0e9b275260f8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -51,6 +51,7 @@ COND_SYSCALL_COMPAT(io_pgetevents);
 COND_SYSCALL(io_uring_setup);
 COND_SYSCALL(io_uring_enter);
 COND_SYSCALL(io_uring_register);
+COND_SYSCALL(watch_devices);
 
 /* fs/xattr.c */
 


^ permalink raw reply related

* [RFC PATCH 11/14] block: Add block layer notifications [ver #3]
From: David Howells @ 2020-01-15 13:32 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Add a block layer notification mechanism whereby notifications about
block-layer events such as I/O errors, can be reported to a monitoring
process asynchronously.

Firstly, an event queue needs to be created:

	pipe2(fds, O_NOTIFICATION_PIPE);
	ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);

then a notification can be set up to report block notifications via that
queue:

	struct watch_notification_filter filter = {
		.nr_filters = 1,
		.filters = {
			[0] = {
				.type = WATCH_TYPE_BLOCK_NOTIFY,
				.subtype_filter[0] = UINT_MAX;
			},
		},
	};
	ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter);
	watch_devices(fds[1], 12);

After that, records will be placed into the queue when, for example, errors
occur on a block device.  Records are of the following format:

	struct block_notification {
		struct watch_notification watch;
		__u64	dev;
		__u64	sector;
	} *n;

Where:

	n->watch.type will be WATCH_TYPE_BLOCK_NOTIFY

	n->watch.subtype will be the type of notification, such as
	NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM.

	n->watch.info & WATCH_INFO_LENGTH will indicate the length of the
	record.

	n->watch.info & WATCH_INFO_ID will be the second argument to
	watch_devices(), shifted.

	n->dev will be the device numbers munged together.

	n->sector will indicate the affected sector (if appropriate for the
	event).

Note that it is permissible for event records to be of variable length -
or, at least, the length may be dependent on the subtype.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 Documentation/watch_queue.rst    |    4 +++-
 block/Kconfig                    |    9 +++++++++
 block/blk-core.c                 |   29 ++++++++++++++++++++++++++++
 include/linux/blkdev.h           |   15 ++++++++++++++
 include/uapi/linux/watch_queue.h |   30 ++++++++++++++++++++++++++++-
 samples/watch_queue/watch_test.c |   40 +++++++++++++++++++++++++++++++++++++-
 6 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/Documentation/watch_queue.rst b/Documentation/watch_queue.rst
index ed592700be0e..f2299f631ae8 100644
--- a/Documentation/watch_queue.rst
+++ b/Documentation/watch_queue.rst
@@ -8,7 +8,9 @@ opened by userspace.  This can be used in conjunction with::
 
   * Key/keyring notifications
 
-  * General device event notifications
+  * General device event notifications, including::
+
+    * Block layer event notifications
 
 
 The notifications buffers can be enabled by:
diff --git a/block/Kconfig b/block/Kconfig
index c23094a14a2b..9fa6ce2177bc 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -181,6 +181,15 @@ config BLK_SED_OPAL
 	Enabling this option enables users to setup/unlock/lock
 	Locking ranges for SED devices using the Opal protocol.
 
+config BLK_NOTIFICATIONS
+	bool "Block layer event notifications"
+	depends on DEVICE_NOTIFICATIONS
+	help
+	  This option provides support for getting block layer event
+	  notifications.  This makes use of the /dev/watch_queue misc device to
+	  handle the notification buffer and provides the device_notify() system
+	  call to enable/disable watches.
+
 menu "Partition Types"
 
 source "block/partitions/Kconfig"
diff --git a/block/blk-core.c b/block/blk-core.c
index 089e890ab208..50a5de025d5e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -187,6 +187,22 @@ static const struct {
 	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
 };
 
+#ifdef CONFIG_BLK_NOTIFICATIONS
+static const
+enum block_notification_type blk_notifications[ARRAY_SIZE(blk_errors)] = {
+	[BLK_STS_TIMEOUT]	= NOTIFY_BLOCK_ERROR_TIMEOUT,
+	[BLK_STS_NOSPC]		= NOTIFY_BLOCK_ERROR_NO_SPACE,
+	[BLK_STS_TRANSPORT]	= NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT,
+	[BLK_STS_TARGET]	= NOTIFY_BLOCK_ERROR_CRITICAL_TARGET,
+	[BLK_STS_NEXUS]		= NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS,
+	[BLK_STS_MEDIUM]	= NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM,
+	[BLK_STS_PROTECTION]	= NOTIFY_BLOCK_ERROR_PROTECTION,
+	[BLK_STS_RESOURCE]	= NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE,
+	[BLK_STS_DEV_RESOURCE]	= NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE,
+	[BLK_STS_IOERR]		= NOTIFY_BLOCK_ERROR_IO,
+};
+#endif
+
 blk_status_t errno_to_blk_status(int errno)
 {
 	int i;
@@ -227,6 +243,19 @@ static void print_req_error(struct request *req, blk_status_t status,
 		req->cmd_flags & ~REQ_OP_MASK,
 		req->nr_phys_segments,
 		IOPRIO_PRIO_CLASS(req->ioprio));
+
+#ifdef CONFIG_BLK_NOTIFICATIONS
+	if (blk_notifications[idx]) {
+		struct block_notification n = {
+			.watch.type	= WATCH_TYPE_BLOCK_NOTIFY,
+			.watch.subtype	= blk_notifications[idx],
+			.watch.info	= watch_sizeof(n),
+			.dev		= req->rq_disk ? disk_devt(req->rq_disk) : 0,
+			.sector		= blk_rq_pos(req),
+		};
+		post_block_notification(&n);
+	}
+#endif
 }
 
 static void req_bio_endio(struct request *rq, struct bio *bio,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 47eb22a3b7f9..3cd1853dbdac 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,6 +27,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
+#include <linux/watch_queue.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -1770,6 +1771,20 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
 }
 #endif /* CONFIG_BLK_DEV_ZONED */
 
+#ifdef CONFIG_BLK_NOTIFICATIONS
+static inline void post_block_notification(struct block_notification *n)
+{
+	u64 id = 0; /* Might want to allow dev# here. */
+
+	post_device_notification(&n->watch, id);
+}
+#else
+static inline void post_block_notification(struct block_notification *n)
+{
+}
+#endif
+
+
 #else /* CONFIG_BLOCK */
 
 struct block_device;
diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h
index c3d8320b5d3a..557771413242 100644
--- a/include/uapi/linux/watch_queue.h
+++ b/include/uapi/linux/watch_queue.h
@@ -14,7 +14,8 @@
 enum watch_notification_type {
 	WATCH_TYPE_META		= 0,	/* Special record */
 	WATCH_TYPE_KEY_NOTIFY	= 1,	/* Key change event notification */
-	WATCH_TYPE__NR		= 2
+	WATCH_TYPE_BLOCK_NOTIFY	= 2,	/* Block layer event notification */
+	WATCH_TYPE__NR		= 3
 };
 
 enum watch_meta_notification_subtype {
@@ -101,4 +102,31 @@ struct key_notification {
 	__u32	aux;		/* Per-type auxiliary data */
 };
 
+/*
+ * Type of block layer notification.
+ */
+enum block_notification_type {
+	NOTIFY_BLOCK_ERROR_TIMEOUT		= 1, /* Timeout error */
+	NOTIFY_BLOCK_ERROR_NO_SPACE		= 2, /* Critical space allocation error */
+	NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT = 3, /* Recoverable transport error */
+	NOTIFY_BLOCK_ERROR_CRITICAL_TARGET	= 4, /* Critical target error */
+	NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS	= 5, /* Critical nexus error */
+	NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM	= 6, /* Critical medium error */
+	NOTIFY_BLOCK_ERROR_PROTECTION		= 7, /* Protection error */
+	NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE	= 8, /* Kernel resource error */
+	NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE	= 9, /* Device resource error */
+	NOTIFY_BLOCK_ERROR_IO			= 10, /* Other I/O error */
+};
+
+/*
+ * Block layer notification record.
+ * - watch.type = WATCH_TYPE_BLOCK_NOTIFY
+ * - watch.subtype = enum block_notification_type
+ */
+struct block_notification {
+	struct watch_notification watch; /* WATCH_TYPE_BLOCK_NOTIFY */
+	__u64	dev;			/* Device number */
+	__u64	sector;			/* Affected sector */
+};
+
 #endif /* _UAPI_LINUX_WATCH_QUEUE_H */
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
index 0eaff5dc04c3..f5260fb792d1 100644
--- a/samples/watch_queue/watch_test.c
+++ b/samples/watch_queue/watch_test.c
@@ -58,6 +58,32 @@ static void saw_key_change(struct watch_notification *n, size_t len)
 	       k->key_id, n->subtype, key_subtypes[n->subtype], k->aux);
 }
 
+static const char *block_subtypes[256] = {
+	[NOTIFY_BLOCK_ERROR_TIMEOUT]			= "timeout",
+	[NOTIFY_BLOCK_ERROR_NO_SPACE]			= "critical space allocation",
+	[NOTIFY_BLOCK_ERROR_RECOVERABLE_TRANSPORT]	= "recoverable transport",
+	[NOTIFY_BLOCK_ERROR_CRITICAL_TARGET]		= "critical target",
+	[NOTIFY_BLOCK_ERROR_CRITICAL_NEXUS]		= "critical nexus",
+	[NOTIFY_BLOCK_ERROR_CRITICAL_MEDIUM]		= "critical medium",
+	[NOTIFY_BLOCK_ERROR_PROTECTION]			= "protection",
+	[NOTIFY_BLOCK_ERROR_KERNEL_RESOURCE]		= "kernel resource",
+	[NOTIFY_BLOCK_ERROR_DEVICE_RESOURCE]		= "device resource",
+	[NOTIFY_BLOCK_ERROR_IO]				= "I/O",
+};
+
+static void saw_block_change(struct watch_notification *n, size_t len)
+{
+	struct block_notification *b = (struct block_notification *)n;
+
+	if (len < sizeof(struct block_notification))
+		return;
+
+	printf("BLOCK %08llx e=%u[%s] s=%llx\n",
+	       (unsigned long long)b->dev,
+	       n->subtype, block_subtypes[n->subtype],
+	       (unsigned long long)b->sector);
+}
+
 /*
  * Consume and display events.
  */
@@ -131,6 +157,9 @@ static void consumer(int fd)
 			case WATCH_TYPE_KEY_NOTIFY:
 				saw_key_change(&n.n, len);
 				break;
+			case WATCH_TYPE_BLOCK_NOTIFY:
+				saw_block_change(&n.n, len);
+				break;
 			default:
 				printf("other type\n");
 				break;
@@ -142,12 +171,16 @@ static void consumer(int fd)
 }
 
 static struct watch_notification_filter filter = {
-	.nr_filters	= 1,
+	.nr_filters	= 2,
 	.filters = {
 		[0]	= {
 			.type			= WATCH_TYPE_KEY_NOTIFY,
 			.subtype_filter[0]	= UINT_MAX,
 		},
+		[1]	= {
+			.type			= WATCH_TYPE_BLOCK_NOTIFY,
+			.subtype_filter[0]	= UINT_MAX,
+		},
 	},
 };
 
@@ -181,6 +214,11 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
+	if (syscall(__NR_watch_devices, fd, 0x04, 0) == -1) {
+		perror("watch_devices");
+		exit(1);
+	}
+
 	consumer(fd);
 	exit(0);
 }


^ permalink raw reply related

* [RFC PATCH 12/14] usb: Add USB subsystem notifications [ver #3]
From: David Howells @ 2020-01-15 13:32 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Add a USB subsystem notification mechanism whereby notifications about
hardware events such as device connection, disconnection, reset and I/O
errors, can be reported to a monitoring process asynchronously.

Firstly, an event queue needs to be created:

	pipe2(fds, O_NOTIFICATION_PIPE);
	ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);

then a notification can be set up to report USB notifications via that
queue:

	struct watch_notification_filter filter = {
		.nr_filters = 1,
		.filters = {
			[0] = {
				.type = WATCH_TYPE_USB_NOTIFY,
				.subtype_filter[0] = UINT_MAX;
			},
		},
	};
	ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter);
	notify_devices(fds[1], 12);

After that, messages will be placed into the queue when events occur on a
USB device or bus.  Messages are of the following format:

	struct usb_notification {
		struct watch_notification watch;
		__u32	error;
		__u32	reserved;
		__u8	name_len;
		__u8	name[0];
	} *n;

Where:

	n->watch.type will be WATCH_TYPE_USB_NOTIFY

	n->watch.subtype will be the type of notification, such as
	NOTIFY_USB_DEVICE_ADD.

	n->watch.info & WATCH_INFO_LENGTH will indicate the length of the
	message.

	n->watch.info & WATCH_INFO_ID will be the second argument to
	device_notify(), shifted.

	n->error and n->reserved are intended to convey information such as
	error codes, but are currently not used

	n->name_len and n->name convey the USB device name as an
	unterminated string.  This may be truncated - it is currently
	limited to a maximum 63 chars.

Note that it is permissible for messages to be of variable length - or, at
least, the length may be dependent on the subtype.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: linux-usb@vger.kernel.org
---

 Documentation/watch_queue.rst    |    9 +++++++
 drivers/usb/core/Kconfig         |    9 +++++++
 drivers/usb/core/devio.c         |   47 ++++++++++++++++++++++++++++++++++++++
 drivers/usb/core/hub.c           |    4 +++
 include/linux/usb.h              |   18 +++++++++++++++
 include/uapi/linux/watch_queue.h |   28 ++++++++++++++++++++++-
 samples/watch_queue/watch_test.c |   29 +++++++++++++++++++++++
 7 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/Documentation/watch_queue.rst b/Documentation/watch_queue.rst
index f2299f631ae8..5321a9cb1ab2 100644
--- a/Documentation/watch_queue.rst
+++ b/Documentation/watch_queue.rst
@@ -12,6 +12,8 @@ opened by userspace.  This can be used in conjunction with::
 
     * Block layer event notifications
 
+    * USB subsystem event notifications
+
 
 The notifications buffers can be enabled by:
 
@@ -262,6 +264,13 @@ Any particular buffer can be fed from multiple sources.  Sources include:
     or temporary link loss.  Watches of this type are set on the global device
     watch list.
 
+  * WATCH_TYPE_USB_NOTIFY
+
+    Notifications of this type indicate USB subsystem events, such as
+    attachment, removal, reset and I/O errors.  Separate events are generated
+    for buses and devices.  Watchpoints of this type are set on the global
+    device watch list.
+
 
 Event Filtering
 ===============
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index ecaacc8ed311..57e7b649e48b 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -102,3 +102,12 @@ config USB_AUTOSUSPEND_DELAY
 	  The default value Linux has always had is 2 seconds.  Change
 	  this value if you want a different delay and cannot modify
 	  the command line or module parameter.
+
+config USB_NOTIFICATIONS
+	bool "Provide USB hardware event notifications"
+	depends on USB && DEVICE_NOTIFICATIONS
+	help
+	  This option provides support for getting hardware event notifications
+	  on USB devices and interfaces.  This makes use of the
+	  /dev/watch_queue misc device to handle the notification buffer.
+	  device_notify(2) is used to set/remove watches.
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 12bb5722b420..3436a2bb6e98 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -41,6 +41,7 @@
 #include <linux/dma-mapping.h>
 #include <asm/byteorder.h>
 #include <linux/moduleparam.h>
+#include <linux/watch_queue.h>
 
 #include "usb.h"
 
@@ -2747,13 +2748,59 @@ static void usbdev_remove(struct usb_device *udev)
 	mutex_unlock(&usbfs_mutex);
 }
 
+#ifdef CONFIG_USB_NOTIFICATIONS
+static noinline void post_usb_notification(const char *devname,
+					   enum usb_notification_type subtype,
+					   u32 error)
+{
+	unsigned int name_len, n_len;
+	u64 id = 0; /* We can put a device ID here for separate dev watches */
+
+	struct {
+		struct usb_notification n;
+		char more_name[USB_NOTIFICATION_MAX_NAME_LEN -
+			       (sizeof(struct usb_notification) -
+				offsetof(struct usb_notification, name))];
+	} n;
+
+	name_len = strlen(devname);
+	name_len = min_t(size_t, name_len, USB_NOTIFICATION_MAX_NAME_LEN);
+	n_len = offsetof(struct usb_notification, name) + name_len;
+
+	memset(&n, 0, sizeof(n));
+	memcpy(n.n.name, devname, n_len);
+
+	n.n.watch.type		= WATCH_TYPE_USB_NOTIFY;
+	n.n.watch.subtype	= subtype;
+	n.n.watch.info		= n_len;
+	n.n.error		= error;
+	n.n.name_len		= name_len;
+
+	post_device_notification(&n.n.watch, id);
+}
+
+void post_usb_device_notification(const struct usb_device *udev,
+				  enum usb_notification_type subtype, u32 error)
+{
+	post_usb_notification(dev_name(&udev->dev), subtype, error);
+}
+
+void post_usb_bus_notification(const struct usb_bus *ubus,
+			       enum usb_notification_type subtype, u32 error)
+{
+	post_usb_notification(ubus->bus_name, subtype, error);
+}
+#endif
+
 static int usbdev_notify(struct notifier_block *self,
 			       unsigned long action, void *dev)
 {
 	switch (action) {
 	case USB_DEVICE_ADD:
+		post_usb_device_notification(dev, NOTIFY_USB_DEVICE_ADD, 0);
 		break;
 	case USB_DEVICE_REMOVE:
+		post_usb_device_notification(dev, NOTIFY_USB_DEVICE_REMOVE, 0);
 		usbdev_remove(dev);
 		break;
 	}
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index f229ad6952c0..eaf28eed51b0 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -30,6 +30,7 @@
 #include <linux/random.h>
 #include <linux/pm_qos.h>
 #include <linux/kobject.h>
+#include <linux/watch_queue.h>
 
 #include <linux/uaccess.h>
 #include <asm/byteorder.h>
@@ -4606,6 +4607,9 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 				(udev->config) ? "reset" : "new", speed,
 				devnum, driver_name);
 
+	if (udev->config)
+		post_usb_device_notification(udev, NOTIFY_USB_DEVICE_RESET, 0);
+
 	/* Set up TT records, if needed  */
 	if (hdev->tt) {
 		udev->tt = hdev->tt;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e656e7b4b1e4..93fa0666f95a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -26,6 +26,7 @@
 struct usb_device;
 struct usb_driver;
 struct wusb_dev;
+enum usb_notification_type;
 
 /*-------------------------------------------------------------------------*/
 
@@ -2015,6 +2016,23 @@ extern void usb_led_activity(enum usb_led_event ev);
 static inline void usb_led_activity(enum usb_led_event ev) {}
 #endif
 
+/*
+ * Notification functions.
+ */
+#ifdef CONFIG_USB_NOTIFICATIONS
+extern void post_usb_device_notification(const struct usb_device *udev,
+					 enum usb_notification_type subtype,
+					 u32 error);
+extern void post_usb_bus_notification(const struct usb_bus *ubus,
+				      enum usb_notification_type subtype,
+				      u32 error);
+#else
+static inline void post_usb_device_notification(const struct usb_device *udev,
+						unsigned int subtype, u32 error) {}
+static inline void post_usb_bus_notification(const struct usb_bus *ubus,
+					     unsigned int subtype, u32 error) {}
+#endif
+
 #endif  /* __KERNEL__ */
 
 #endif
diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h
index 557771413242..ad1ae229674a 100644
--- a/include/uapi/linux/watch_queue.h
+++ b/include/uapi/linux/watch_queue.h
@@ -15,7 +15,8 @@ enum watch_notification_type {
 	WATCH_TYPE_META		= 0,	/* Special record */
 	WATCH_TYPE_KEY_NOTIFY	= 1,	/* Key change event notification */
 	WATCH_TYPE_BLOCK_NOTIFY	= 2,	/* Block layer event notification */
-	WATCH_TYPE__NR		= 3
+	WATCH_TYPE_USB_NOTIFY	= 3,	/* USB subsystem event notification */
+	WATCH_TYPE__NR		= 4
 };
 
 enum watch_meta_notification_subtype {
@@ -129,4 +130,29 @@ struct block_notification {
 	__u64	sector;			/* Affected sector */
 };
 
+/*
+ * Type of USB layer notification.
+ */
+enum usb_notification_type {
+	NOTIFY_USB_DEVICE_ADD		= 0, /* USB device added */
+	NOTIFY_USB_DEVICE_REMOVE	= 1, /* USB device removed */
+	NOTIFY_USB_DEVICE_RESET		= 2, /* USB device reset */
+	NOTIFY_USB_DEVICE_ERROR		= 3, /* USB device error */
+};
+
+/*
+ * USB subsystem notification record.
+ * - watch.type = WATCH_TYPE_USB_NOTIFY
+ * - watch.subtype = enum usb_notification_type
+ */
+struct usb_notification {
+	struct watch_notification watch; /* WATCH_TYPE_USB_NOTIFY */
+	__u32	error;
+	__u32	reserved;
+	__u8	name_len;		/* Length of device name */
+	__u8	name[0];		/* Device name (padded to __u64, truncated at 63 chars) */
+};
+
+#define USB_NOTIFICATION_MAX_NAME_LEN 63
+
 #endif /* _UAPI_LINUX_WATCH_QUEUE_H */
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
index f5260fb792d1..e4d47dfcc5d7 100644
--- a/samples/watch_queue/watch_test.c
+++ b/samples/watch_queue/watch_test.c
@@ -84,6 +84,26 @@ static void saw_block_change(struct watch_notification *n, size_t len)
 	       (unsigned long long)b->sector);
 }
 
+static const char *usb_subtypes[256] = {
+	[NOTIFY_USB_DEVICE_ADD]		= "dev-add",
+	[NOTIFY_USB_DEVICE_REMOVE]	= "dev-remove",
+	[NOTIFY_USB_DEVICE_RESET]	= "dev-reset",
+	[NOTIFY_USB_DEVICE_ERROR]	= "dev-error",
+};
+
+static void saw_usb_event(struct watch_notification *n, size_t len)
+{
+	struct usb_notification *u = (struct usb_notification *)n;
+
+	if (len < sizeof(struct usb_notification))
+		return;
+
+	printf("USB %*.*s %s e=%x r=%x\n",
+	       u->name_len, u->name_len, u->name,
+	       usb_subtypes[n->subtype],
+	       u->error, u->reserved);
+}
+
 /*
  * Consume and display events.
  */
@@ -160,6 +180,9 @@ static void consumer(int fd)
 			case WATCH_TYPE_BLOCK_NOTIFY:
 				saw_block_change(&n.n, len);
 				break;
+			case WATCH_TYPE_USB_NOTIFY:
+				saw_usb_event(&n.n, len);
+				break;
 			default:
 				printf("other type\n");
 				break;
@@ -171,7 +194,7 @@ static void consumer(int fd)
 }
 
 static struct watch_notification_filter filter = {
-	.nr_filters	= 2,
+	.nr_filters	= 3,
 	.filters = {
 		[0]	= {
 			.type			= WATCH_TYPE_KEY_NOTIFY,
@@ -181,6 +204,10 @@ static struct watch_notification_filter filter = {
 			.type			= WATCH_TYPE_BLOCK_NOTIFY,
 			.subtype_filter[0]	= UINT_MAX,
 		},
+		[2]	= {
+			.type			= WATCH_TYPE_USB_NOTIFY,
+			.subtype_filter[0]	= UINT_MAX,
+		},
 	},
 };
 


^ permalink raw reply related

* [RFC PATCH 13/14] selinux: Implement the watch_key security hook [ver #3]
From: David Howells @ 2020-01-15 13:32 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Implement the watch_key security hook to make sure that a key grants the
caller View permission in order to set a watch on a key.

For the moment, the watch_devices security hook is left unimplemented as
it's not obvious what the object should be since the queue is global and
didn't previously exist.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
---

 security/selinux/hooks.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 116b4d644f68..d838d1b58d88 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6587,6 +6587,17 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer)
 	*_buffer = context;
 	return rc;
 }
+
+#ifdef CONFIG_KEY_NOTIFICATIONS
+static int selinux_watch_key(struct key *key)
+{
+	struct key_security_struct *ksec = key->security;
+	u32 sid = current_sid();
+
+	return avc_has_perm(&selinux_state,
+			    sid, ksec->sid, SECCLASS_KEY, KEY_NEED_VIEW, NULL);
+}
+#endif
 #endif
 
 #ifdef CONFIG_SECURITY_INFINIBAND
@@ -7081,6 +7092,9 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(key_free, selinux_key_free),
 	LSM_HOOK_INIT(key_permission, selinux_key_permission),
 	LSM_HOOK_INIT(key_getsecurity, selinux_key_getsecurity),
+#ifdef CONFIG_KEY_NOTIFICATIONS
+	LSM_HOOK_INIT(watch_key, selinux_watch_key),
+#endif
 #endif
 
 #ifdef CONFIG_AUDIT


^ permalink raw reply related

* [RFC PATCH 14/14] smack: Implement the watch_key and post_notification hooks [ver #3]
From: David Howells @ 2020-01-15 13:32 UTC (permalink / raw)
  To: torvalds
  Cc: dhowells, Greg Kroah-Hartman, Casey Schaufler, Stephen Smalley,
	nicolas.dichtel, raven, Christian Brauner, dhowells, keyrings,
	linux-usb, linux-block, linux-security-module, linux-fsdevel,
	linux-api, linux-security-module, linux-kernel
In-Reply-To: <157909503552.20155.3030058841911628518.stgit@warthog.procyon.org.uk>

Implement the watch_key security hook in Smack to make sure that a key
grants the caller Read permission in order to set a watch on a key.

Also implement the post_notification security hook to make sure that the
notification source is granted Write permission by the watch queue.

For the moment, the watch_devices security hook is left unimplemented as
it's not obvious what the object should be since the queue is global and
didn't previously exist.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---

 include/linux/lsm_audit.h  |    1 +
 security/smack/smack_lsm.c |   82 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 915330abf6e5..734d67889826 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -74,6 +74,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_FILE	12
 #define LSM_AUDIT_DATA_IBPKEY	13
 #define LSM_AUDIT_DATA_IBENDPORT 14
+#define LSM_AUDIT_DATA_NOTIFICATION 15
 	union 	{
 		struct path path;
 		struct dentry *dentry;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index ecea41ce919b..71b6f37d49c1 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4273,7 +4273,7 @@ static int smack_key_permission(key_ref_t key_ref,
 	if (tkp == NULL)
 		return -EACCES;
 
-	if (smack_privileged_cred(CAP_MAC_OVERRIDE, cred))
+	if (smack_privileged(CAP_MAC_OVERRIDE))
 		return 0;
 
 #ifdef CONFIG_AUDIT
@@ -4319,8 +4319,81 @@ static int smack_key_getsecurity(struct key *key, char **_buffer)
 	return length;
 }
 
+
+#ifdef CONFIG_KEY_NOTIFICATIONS
+/**
+ * smack_watch_key - Smack access to watch a key for notifications.
+ * @key: The key to be watched
+ *
+ * Return 0 if the @watch->cred has permission to read from the key object and
+ * an error otherwise.
+ */
+static int smack_watch_key(struct key *key)
+{
+	struct smk_audit_info ad;
+	struct smack_known *tkp = smk_of_current();
+	int rc;
+
+	if (key == NULL)
+		return -EINVAL;
+	/*
+	 * If the key hasn't been initialized give it access so that
+	 * it may do so.
+	 */
+	if (key->security == NULL)
+		return 0;
+	/*
+	 * This should not occur
+	 */
+	if (tkp == NULL)
+		return -EACCES;
+
+	if (smack_privileged_cred(CAP_MAC_OVERRIDE, current_cred()))
+		return 0;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY);
+	ad.a.u.key_struct.key = key->serial;
+	ad.a.u.key_struct.key_desc = key->description;
+#endif
+	rc = smk_access(tkp, key->security, MAY_READ, &ad);
+	rc = smk_bu_note("key watch", tkp, key->security, MAY_READ, rc);
+	return rc;
+}
+#endif /* CONFIG_KEY_NOTIFICATIONS */
 #endif /* CONFIG_KEYS */
 
+#ifdef CONFIG_WATCH_QUEUE
+/**
+ * smack_post_notification - Smack access to post a notification to a queue
+ * @w_cred: The credentials of the watcher.
+ * @cred: The credentials of the event source (may be NULL).
+ * @n: The notification message to be posted.
+ */
+static int smack_post_notification(const struct cred *w_cred,
+				   const struct cred *cred,
+				   struct watch_notification *n)
+{
+	struct smk_audit_info ad;
+	struct smack_known *subj, *obj;
+	int rc;
+
+	/* Always let maintenance notifications through. */
+	if (n->type == WATCH_TYPE_META)
+		return 0;
+
+	if (!cred)
+		return 0;
+	subj = smk_of_task(smack_cred(cred));
+	obj = smk_of_task(smack_cred(w_cred));
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NOTIFICATION);
+	rc = smk_access(subj, obj, MAY_WRITE, &ad);
+	rc = smk_bu_note("notification", subj, obj, MAY_WRITE, rc);
+	return rc;
+}
+#endif /* CONFIG_WATCH_QUEUE */
+
 /*
  * Smack Audit hooks
  *
@@ -4709,8 +4782,15 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(key_free, smack_key_free),
 	LSM_HOOK_INIT(key_permission, smack_key_permission),
 	LSM_HOOK_INIT(key_getsecurity, smack_key_getsecurity),
+#ifdef CONFIG_KEY_NOTIFICATIONS
+	LSM_HOOK_INIT(watch_key, smack_watch_key),
+#endif
 #endif /* CONFIG_KEYS */
 
+#ifdef CONFIG_WATCH_QUEUE
+	LSM_HOOK_INIT(post_notification, smack_post_notification),
+#endif
+
  /* Audit hooks */
 #ifdef CONFIG_AUDIT
 	LSM_HOOK_INIT(audit_rule_init, smack_audit_rule_init),


^ permalink raw reply related

* Re: Perf Data on LSM in v5.3
From: Stephen Smalley @ 2020-01-15 13:40 UTC (permalink / raw)
  To: Wenhui Zhang, John Johansen
  Cc: Casey Schaufler, Casey Schaufler, James Morris,
	linux-security-module, SELinux, Kees Cook, penguin-kernel,
	Paul Moore
In-Reply-To: <CAOSEQ1rBu+wRzgk_Jh2RsZpf8Lv1+WUi-Pte-EsBMphnEr4SsQ@mail.gmail.com>

On 1/14/20 8:00 PM, Wenhui Zhang wrote:
> Hi, John:
> 
> It seems like, the MAC hooks are default to*return 0 or empty void 
> hooks* if CONFIG_SECURITY, CONFIG_SECURITY_NETWORK , 
> CONFIG_PAGE_TABLE_ISOLATION, CONFIG_SECURITY_INFINIBAND, 
> CONFIG_SECURITY_PATH, CONFIG_INTEL_TXT,
> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR, 
> CONFIG_HARDENED_USERCOPY, CONFIG_HARDENED_USERCOPY_FALLBACK *are NOT set*.
> 
> If HOOKs are "return 0 or empty void hooks", MAC is not enabled.
> In runtime of fs-benchmarks, if CONFIG_DEFAULT_SECURITY_DAC=y, then 
> capability is enabled.
> 
> Please correct me if I am wrong.
> 
> For the first test, wo-sec is tested with:
> # CONFIG_SECURITY_DMESG_RESTRICT is not set
> # CONFIG_SECURITY is not set
> # CONFIG_SECURITYFS is not set
> # CONFIG_PAGE_TABLE_ISOLATION is not set
> # CONFIG_INTEL_TXT is not set
> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
> # CONFIG_HARDENED_USERCOPY is not set
> CONFIG_FORTIFY_SOURCE=y
> # CONFIG_STATIC_USERMODEHELPER is not set
> CONFIG_DEFAULT_SECURITY_DAC=y
> 
> 
> For the second test, w-sec is tested with:
> # CONFIG_SECURITY_DMESG_RESTRICT is not set
> CONFIG_SECURITY=y
> CONFIG_SECURITYFS=y
> # CONFIG_SECURITY_NETWORK is not set
> CONFIG_PAGE_TABLE_ISOLATION=y
> CONFIG_SECURITY_INFINIBAND=y
> CONFIG_SECURITY_PATH=y
> CONFIG_INTEL_TXT=y
> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
> CONFIG_HARDENED_USERCOPY=y
> CONFIG_HARDENED_USERCOPY_FALLBACK=y
> # CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
> CONFIG_FORTIFY_SOURCE=y
> # CONFIG_STATIC_USERMODEHELPER is not set
> # CONFIG_SECURITY_SMACK is not set
> # CONFIG_SECURITY_TOMOYO is not set
> # CONFIG_SECURITY_APPARMOR is not set
> # CONFIG_SECURITY_LOADPIN is not set
> # CONFIG_SECURITY_YAMA is not set
> # CONFIG_SECURITY_SAFESETID is not set
> # CONFIG_INTEGRITY is not set
> CONFIG_DEFAULT_SECURITY_DAC=y
> # 
> CONFIG_LSM="yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo"

Your configs should only differ with respect to CONFIG_SECURITY* if you 
want to evaluate LSM, SELinux, etc overheads.  PAGE_TABLE_ISOLATION, 
INTEL_TXT, and HARDENED_USERCOPY are not relevant to LSM itself.

Also, what benchmarks are you using?  Your own home-grown ones, a set of 
open source standard benchmarks (if so, which ones?).  You should 
include both micro and macro benchmarks in your suite.

How stable are your results?  What kind of variance / standard deviation 
are you seeing?

It is hard to get meaningful, reliable performance measurements so going 
down this road is not to be done lightly.

^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: Stephen Smalley @ 2020-01-15 13:59 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: KP Singh, James Morris, Kees Cook, Casey Schaufler, open list,
	bpf, linux-security-module, Alexei Starovoitov, Daniel Borkmann,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer, Paul Moore
In-Reply-To: <20200115024830.4ogd3mi5jy5hwr2v@ast-mbp.dhcp.thefacebook.com>

On 1/14/20 9:48 PM, Alexei Starovoitov wrote:
> On Tue, Jan 14, 2020 at 12:42:22PM -0500, Stephen Smalley wrote:
>> On 1/14/20 11:54 AM, Stephen Smalley wrote:
>>> On 1/10/20 12:53 PM, Alexei Starovoitov wrote:
>>>> On Fri, Jan 10, 2020 at 04:27:58PM +0100, KP Singh wrote:
>>>>> On 09-Jan 14:47, Stephen Smalley wrote:
>>>>>> On 1/9/20 2:43 PM, KP Singh wrote:
>>>>>>> On 10-Jan 06:07, James Morris wrote:
>>>>>>>> On Thu, 9 Jan 2020, Stephen Smalley wrote:
>>>>>>>>
>>>>>>>>> On 1/9/20 1:11 PM, James Morris wrote:
>>>>>>>>>> On Wed, 8 Jan 2020, Stephen Smalley wrote:
>>>>>>>>>>
>>>>>>>>>>> The cover letter subject line and the
>>>>>>>>>>> Kconfig help text refer to it as a
>>>>>>>>>>> BPF-based "MAC and Audit policy".  It
>>>>>>>>>>> has an enforce config option that
>>>>>>>>>>> enables the bpf programs to deny access,
>>>>>>>>>>> providing access control. IIRC,
>>>>>>>>>>> in
>>>>>>>>>>> the earlier discussion threads, the BPF
>>>>>>>>>>> maintainers suggested that Smack
>>>>>>>>>>> and
>>>>>>>>>>> other LSMs could be entirely
>>>>>>>>>>> re-implemented via it in the future, and
>>>>>>>>>>> that
>>>>>>>>>>> such an implementation would be more optimal.
>>>>>>>>>>
>>>>>>>>>> In this case, the eBPF code is similar to a
>>>>>>>>>> kernel module, rather than a
>>>>>>>>>> loadable policy file.  It's a loadable
>>>>>>>>>> mechanism, rather than a policy, in
>>>>>>>>>> my view.
>>>>>>>>>
>>>>>>>>> I thought you frowned on dynamically loadable
>>>>>>>>> LSMs for both security and
>>>>>>>>> correctness reasons?
>>>>>>>
>>>>>>> Based on the feedback from the lists we've updated the design for v2.
>>>>>>>
>>>>>>> In v2, LSM hook callbacks are allocated dynamically using BPF
>>>>>>> trampolines, appended to a separate security_hook_heads and run
>>>>>>> only after the statically allocated hooks.
>>>>>>>
>>>>>>> The security_hook_heads for all the other LSMs (SELinux, AppArmor etc)
>>>>>>> still remains __lsm_ro_after_init and cannot be modified. We are still
>>>>>>> working on v2 (not ready for review yet) but the general idea can be
>>>>>>> seen here:
>>>>>>>
>>>>>>>       https://github.com/sinkap/linux-krsi/blob/patch/v1/trampoline_prototype/security/bpf/lsm.c
>>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Evaluating the security impact of this is the next
>>>>>>>> step. My understanding
>>>>>>>> is that eBPF via BTF is constrained to read only access to hook
>>>>>>>> parameters, and that its behavior would be entirely restrictive.
>>>>>>>>
>>>>>>>> I'd like to understand the security impact more
>>>>>>>> fully, though.  Can the
>>>>>>>> eBPF code make arbitrary writes to the kernel, or
>>>>>>>> read anything other than
>>>>>>>> the correctly bounded LSM hook parameters?
>>>>>>>>
>>>>>>>
>>>>>>> As mentioned, the BPF verifier does not allow writes to BTF types.
>>>>>>>
>>>>>>>>> And a traditional security module would necessarily fall
>>>>>>>>> under GPL; is the eBPF code required to be
>>>>>>>>> likewise?  If not, KRSI is a
>>>>>>>>> gateway for proprietary LSMs...
>>>>>>>>
>>>>>>>> Right, we do not want this to be a GPL bypass.
>>>>>>>
>>>>>>> This is not intended to be a GPL bypass and the BPF verifier checks
>>>>>>> for license compatibility of the loaded program with GPL.
>>>>>>
>>>>>> IIUC, it checks that the program is GPL compatible if it
>>>>>> uses a function
>>>>>> marked GPL-only.  But what specifically is marked GPL-only
>>>>>> that is required
>>>>>> for eBPF programs using KRSI?
>>>>>
>>>>> Good point! If no-one objects, I can add it to the BPF_PROG_TYPE_LSM
>>>>> specific verification for the v2 of the patch-set which would require
>>>>> all BPF-LSM programs to be GPL.
>>>>
>>>> I don't think it's a good idea to enforce license on the program.
>>>> The kernel doesn't do it for modules.
>>>> For years all of BPF tracing progs were GPL because they have to use
>>>> GPL-ed helpers to do anything meaningful.
>>>> So for KRSI just make sure that all helpers are GPL-ed as well.
>>>
>>> IIUC, the example eBPF code included in this patch series showed a
>>> program that used a GPL-only helper for the purpose of reporting event
>>> output to userspace. But it could have just as easily omitted the use of
>>> that helper and still implemented its own arbitrary access control model
>>> on the LSM hooks to which it attached.  It seems like the question is
>>> whether the kernel developers are ok with exposing the entire LSM hook
>>> interface and all the associated data structures to non-GPLd code,
>>> irrespective of what helpers it may or may not use.
>>
>> Also, to be clear, while kernel modules aren't necessarily GPL, prior to
>> this patch series, all Linux security modules were necessarily GPLd in order
>> to use the LSM interface.
> 
> Because they use securityfs_create_file() GPL-ed api, right?
> but not because module license is enforced.

No, securityfs was a later addition and is not required by all LSMs 
either.  Originally LSMs had to register their hooks via 
register_security(), which was intentionally EXPORT_SYMBOL_GPL() to 
avoid exposing the LSM interface to non-GPLd modules because there were 
significant concerns with doing so when LSM was first merged.  Then in 
20510f2f4e2dabb0ff6c13901807627ec9452f98 ("security: Convert LSM into a 
static interface"), the ability for loadable modules to use 
register_security() at all was removed, limiting its use to built-in 
modules.  In commit b1d9e6b0646d0e5ee5d9050bd236b6c65d66faef ("LSM: 
Switch to lists of hooks"), register_security() was replaced by 
security_add_hooks(), but this was likewise not exported for use by 
modules and could only be used by built-in code.  The bpf LSM is 
providing a shim that allows eBPF code to attach to these hooks that 
would otherwise not be exposed to non-GPLd code, so if the bpf LSM does 
not require the eBPF programs to also be GPLd, then that is a change 
from current practice.

>> So allowing non-GPL eBPF-based LSMs would be a
>> change.
> 
> I don't see it this way. seccomp progs technically unlicensed. Yet they can
> disallow any syscall. Primitive KRSI progs like
> int bpf-prog(void*) { return REJECT; }
> would be able to do selectively disable a syscall with an overhead acceptable
> in production systems (unlike seccomp). I want this use case to be available to
> people. It's a bait, because to do real progs people would need to GPL them.
> Key helpers bpf_perf_event_output, bpf_ktime_get_ns, bpf_trace_printk are all
> GPL-ed. It may look that most networking helpers are not-GPL, but real life is
> different. To debug programs bpf_trace_printk() is necessary. To have
> communication with user space bpf_perf_event_output() is necssary. To measure
> anything or implement timestamps bpf_ktime_get_ns() is necessary. So today all
> meaninful bpf programs are GPL. Those that are not GPL probably exist, but
> they're toy programs. Hence I have zero concerns about GPL bypass coming from
> tracing, networking, and, in the future, KRSI progs too.

You have more confidence than I do about that.  I would anticipate 
developers of out-of-tree LSMs latching onto this bpf LSM and using it 
to avoid GPL.  I don't see that any of those helpers are truly needed to 
implement an access control model.

^ permalink raw reply

* Re: Perf Data on LSM in v5.3
From: Stephen Smalley @ 2020-01-15 14:06 UTC (permalink / raw)
  To: Wenhui Zhang, Casey Schaufler
  Cc: Casey Schaufler, James Morris, linux-security-module, SELinux,
	Kees Cook, John Johansen, penguin-kernel, Paul Moore
In-Reply-To: <CAOSEQ1qipfe0Juz+4V9FgebAPDDXePd29s8=G1pFtHGqx4Sedg@mail.gmail.com>

On 1/14/20 7:14 PM, Wenhui Zhang wrote:
> Hi, Casey:
> 
> Nope, I did not test without CONFIG_SECURITY for v 5.3. (I could give it 
> a try later this week, please let me know if you need this data)
> However I did this test for v4.18.20, afterwards i switched  to v5.3 as 
> my base code.
> 
> I am attaching the three results to this email for your reference for 
> v4.18.20.
>   -- without_sec is without CONFIG_SECURITY
> -- with_sec_disable_all is with CONFIG_SECURITY, however no submodule is 
> CONFIG
> -- selinux is with CONFIG_SECURITY, and CONFIG integrity and selinux 
> only, however no policy enabled

Don't enable integrity if you want to evaluate just LSM/SELinux 
overheads.  Also not sure what kind of behavior you get from SELinux 
with no policy loaded; it wasn't designed to be used that way beyond 
early initialization up to the point where init/systemd loads policy. 
Better comparisons would be running standard benchmarks on e.g. Fedora 
with SELinux disabled versus enabled as well as with LSM completely 
disabled.  Then you'd be evaluating SELinux with a policy in enforcing 
mode on a distro that actually supports it.  Similarly, evaluating 
AppArmor perf is best done on a distro that supports it and provides a 
policy, e.g. Ubuntu or latest Debian.

> 
> One interesting fact generated from this test is that, selinux and 
> integrity CONFIG introduces about 20% performance downgrade for readdir.

Would have to see the actual benchmark code, complete kernel config, and 
kernel version to evaluate that result meaningfully.

BTW, it would be interesting to evaluating the LSM overhead alone (i.e. 
CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y but all other 
CONFIG_SECURITY*=n) before and after the switch to LSM hook lists aka 
stacking support.  Don't think we ever saw micro benchmark data for that 
change IIRC.

> 
> without_sec 
> <https://drive.google.com/drive/folders/1TuUB1JT5bijG-hNvN1Dti7DyFIXM3u_g>
> 
> with_sec_disable_all 
> <https://drive.google.com/drive/folders/1bWrQ-dTSn1p05hVyvIUIAE4hkKgUp6D->
> 
> selinux 
> <https://drive.google.com/drive/folders/1132zzrw42XH8tbNgYvd44LuocgIw4Wq6>
> 
> 
> 
> On Tue, Jan 14, 2020 at 6:59 PM Casey Schaufler <casey@schaufler-ca.com 
> <mailto:casey@schaufler-ca.com>> wrote:
> 
>     On 1/14/2020 1:15 PM, Wenhui Zhang wrote:
>      >
>      > On Tue, Jan 14, 2020 at 4:08 PM Casey Schaufler
>     <casey@schaufler-ca.com <mailto:casey@schaufler-ca.com>
>     <mailto:casey@schaufler-ca.com <mailto:casey@schaufler-ca.com>>> wrote:
>      >
>      >     On 1/14/2020 12:15 PM, Wenhui Zhang wrote:
>      >     > Hi, Casey:
>      >     >
>      >     > I just performed a performance check on
>      >     > 1. v5.3 with DAC only, and
>      >     > 2. v5.3 with DAC and MAC framework, an empty-policy enabled
>     in sub-modules(e.g. selinux)
>      >
>     This is great. Do you have data for a system without CONFIG_SECURITY?
> 
> 
> 
> 
> -- 
> V/R,
> 
> Wenhui Zhang
> 
> Email: wenhui@gwmail.gwu.edu <mailto:wenhui@gwmail.gwu.edu>
>             Telephone: 1-(703) 424 3193
> 
> 
> 
> 
> 
> 


^ permalink raw reply

* Re: Perf Data on LSM in v5.3
From: Stephen Smalley @ 2020-01-15 14:09 UTC (permalink / raw)
  To: Wenhui Zhang, John Johansen
  Cc: Casey Schaufler, Casey Schaufler, James Morris,
	linux-security-module, SELinux, Kees Cook, penguin-kernel,
	Paul Moore
In-Reply-To: <e7cfc960-32fb-7712-b21c-37999cf29430@tycho.nsa.gov>

On 1/15/20 8:40 AM, Stephen Smalley wrote:
> On 1/14/20 8:00 PM, Wenhui Zhang wrote:
>> Hi, John:
>>
>> It seems like, the MAC hooks are default to*return 0 or empty void 
>> hooks* if CONFIG_SECURITY, CONFIG_SECURITY_NETWORK , 
>> CONFIG_PAGE_TABLE_ISOLATION, CONFIG_SECURITY_INFINIBAND, 
>> CONFIG_SECURITY_PATH, CONFIG_INTEL_TXT,
>> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR, 
>> CONFIG_HARDENED_USERCOPY, CONFIG_HARDENED_USERCOPY_FALLBACK *are NOT 
>> set*.
>>
>> If HOOKs are "return 0 or empty void hooks", MAC is not enabled.
>> In runtime of fs-benchmarks, if CONFIG_DEFAULT_SECURITY_DAC=y, then 
>> capability is enabled.
>>
>> Please correct me if I am wrong.
>>
>> For the first test, wo-sec is tested with:
>> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>> # CONFIG_SECURITY is not set
>> # CONFIG_SECURITYFS is not set
>> # CONFIG_PAGE_TABLE_ISOLATION is not set
>> # CONFIG_INTEL_TXT is not set
>> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>> # CONFIG_HARDENED_USERCOPY is not set
>> CONFIG_FORTIFY_SOURCE=y
>> # CONFIG_STATIC_USERMODEHELPER is not set
>> CONFIG_DEFAULT_SECURITY_DAC=y
>>
>>
>> For the second test, w-sec is tested with:
>> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>> CONFIG_SECURITY=y
>> CONFIG_SECURITYFS=y
>> # CONFIG_SECURITY_NETWORK is not set
>> CONFIG_PAGE_TABLE_ISOLATION=y
>> CONFIG_SECURITY_INFINIBAND=y
>> CONFIG_SECURITY_PATH=y
>> CONFIG_INTEL_TXT=y
>> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>> CONFIG_HARDENED_USERCOPY=y
>> CONFIG_HARDENED_USERCOPY_FALLBACK=y
>> # CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
>> CONFIG_FORTIFY_SOURCE=y
>> # CONFIG_STATIC_USERMODEHELPER is not set
>> # CONFIG_SECURITY_SMACK is not set
>> # CONFIG_SECURITY_TOMOYO is not set
>> # CONFIG_SECURITY_APPARMOR is not set
>> # CONFIG_SECURITY_LOADPIN is not set
>> # CONFIG_SECURITY_YAMA is not set
>> # CONFIG_SECURITY_SAFESETID is not set
>> # CONFIG_INTEGRITY is not set
>> CONFIG_DEFAULT_SECURITY_DAC=y
>> # 
>> CONFIG_LSM="yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" 
>>
> 
> Your configs should only differ with respect to CONFIG_SECURITY* if you 
> want to evaluate LSM, SELinux, etc overheads.  PAGE_TABLE_ISOLATION, 
> INTEL_TXT, and HARDENED_USERCOPY are not relevant to LSM itself.
> 
> Also, what benchmarks are you using?  Your own home-grown ones, a set of 
> open source standard benchmarks (if so, which ones?).  You should 
> include both micro and macro benchmarks in your suite.
> 
> How stable are your results?  What kind of variance / standard deviation 
> are you seeing?
> 
> It is hard to get meaningful, reliable performance measurements so going 
> down this road is not to be done lightly.

Also, I note that you don't enable CONFIG_SECURITY_NETWORK above.  That 
means you aren't including the base LSM overhead for the networking 
security hooks.  So if you then compare that against SELinux (which 
requires CONFIG_SECURITY_NETWORK), you are going to end up attributing 
the cost of both the LSM overhead and SELinux overhead all to SELinux. 
If you truly want to isolate the base LSM overhead, you need to enable 
all the hooks.


^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: Greg Kroah-Hartman @ 2020-01-15 14:09 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: Alexei Starovoitov, KP Singh, James Morris, Kees Cook,
	Casey Schaufler, open list, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Nicolas Ferre, Stanislav Fomichev,
	Quentin Monnet, Andrey Ignatov, Joe Stringer, Paul Moore
In-Reply-To: <38a82df5-7610-efe1-d6cd-76f6f68c6110@tycho.nsa.gov>

On Wed, Jan 15, 2020 at 08:59:08AM -0500, Stephen Smalley wrote:
> On 1/14/20 9:48 PM, Alexei Starovoitov wrote:
> > On Tue, Jan 14, 2020 at 12:42:22PM -0500, Stephen Smalley wrote:
> > > On 1/14/20 11:54 AM, Stephen Smalley wrote:
> > > > On 1/10/20 12:53 PM, Alexei Starovoitov wrote:
> > > > > On Fri, Jan 10, 2020 at 04:27:58PM +0100, KP Singh wrote:
> > > > > > On 09-Jan 14:47, Stephen Smalley wrote:
> > > > > > > On 1/9/20 2:43 PM, KP Singh wrote:
> > > > > > > > On 10-Jan 06:07, James Morris wrote:
> > > > > > > > > On Thu, 9 Jan 2020, Stephen Smalley wrote:
> > > > > > > > > 
> > > > > > > > > > On 1/9/20 1:11 PM, James Morris wrote:
> > > > > > > > > > > On Wed, 8 Jan 2020, Stephen Smalley wrote:
> > > > > > > > > > > 
> > > > > > > > > > > > The cover letter subject line and the
> > > > > > > > > > > > Kconfig help text refer to it as a
> > > > > > > > > > > > BPF-based "MAC and Audit policy".  It
> > > > > > > > > > > > has an enforce config option that
> > > > > > > > > > > > enables the bpf programs to deny access,
> > > > > > > > > > > > providing access control. IIRC,
> > > > > > > > > > > > in
> > > > > > > > > > > > the earlier discussion threads, the BPF
> > > > > > > > > > > > maintainers suggested that Smack
> > > > > > > > > > > > and
> > > > > > > > > > > > other LSMs could be entirely
> > > > > > > > > > > > re-implemented via it in the future, and
> > > > > > > > > > > > that
> > > > > > > > > > > > such an implementation would be more optimal.
> > > > > > > > > > > 
> > > > > > > > > > > In this case, the eBPF code is similar to a
> > > > > > > > > > > kernel module, rather than a
> > > > > > > > > > > loadable policy file.  It's a loadable
> > > > > > > > > > > mechanism, rather than a policy, in
> > > > > > > > > > > my view.
> > > > > > > > > > 
> > > > > > > > > > I thought you frowned on dynamically loadable
> > > > > > > > > > LSMs for both security and
> > > > > > > > > > correctness reasons?
> > > > > > > > 
> > > > > > > > Based on the feedback from the lists we've updated the design for v2.
> > > > > > > > 
> > > > > > > > In v2, LSM hook callbacks are allocated dynamically using BPF
> > > > > > > > trampolines, appended to a separate security_hook_heads and run
> > > > > > > > only after the statically allocated hooks.
> > > > > > > > 
> > > > > > > > The security_hook_heads for all the other LSMs (SELinux, AppArmor etc)
> > > > > > > > still remains __lsm_ro_after_init and cannot be modified. We are still
> > > > > > > > working on v2 (not ready for review yet) but the general idea can be
> > > > > > > > seen here:
> > > > > > > > 
> > > > > > > >       https://github.com/sinkap/linux-krsi/blob/patch/v1/trampoline_prototype/security/bpf/lsm.c
> > > > > > > > 
> > > > > > > > 
> > > > > > > > > 
> > > > > > > > > Evaluating the security impact of this is the next
> > > > > > > > > step. My understanding
> > > > > > > > > is that eBPF via BTF is constrained to read only access to hook
> > > > > > > > > parameters, and that its behavior would be entirely restrictive.
> > > > > > > > > 
> > > > > > > > > I'd like to understand the security impact more
> > > > > > > > > fully, though.  Can the
> > > > > > > > > eBPF code make arbitrary writes to the kernel, or
> > > > > > > > > read anything other than
> > > > > > > > > the correctly bounded LSM hook parameters?
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > As mentioned, the BPF verifier does not allow writes to BTF types.
> > > > > > > > 
> > > > > > > > > > And a traditional security module would necessarily fall
> > > > > > > > > > under GPL; is the eBPF code required to be
> > > > > > > > > > likewise?  If not, KRSI is a
> > > > > > > > > > gateway for proprietary LSMs...
> > > > > > > > > 
> > > > > > > > > Right, we do not want this to be a GPL bypass.
> > > > > > > > 
> > > > > > > > This is not intended to be a GPL bypass and the BPF verifier checks
> > > > > > > > for license compatibility of the loaded program with GPL.
> > > > > > > 
> > > > > > > IIUC, it checks that the program is GPL compatible if it
> > > > > > > uses a function
> > > > > > > marked GPL-only.  But what specifically is marked GPL-only
> > > > > > > that is required
> > > > > > > for eBPF programs using KRSI?
> > > > > > 
> > > > > > Good point! If no-one objects, I can add it to the BPF_PROG_TYPE_LSM
> > > > > > specific verification for the v2 of the patch-set which would require
> > > > > > all BPF-LSM programs to be GPL.
> > > > > 
> > > > > I don't think it's a good idea to enforce license on the program.
> > > > > The kernel doesn't do it for modules.
> > > > > For years all of BPF tracing progs were GPL because they have to use
> > > > > GPL-ed helpers to do anything meaningful.
> > > > > So for KRSI just make sure that all helpers are GPL-ed as well.
> > > > 
> > > > IIUC, the example eBPF code included in this patch series showed a
> > > > program that used a GPL-only helper for the purpose of reporting event
> > > > output to userspace. But it could have just as easily omitted the use of
> > > > that helper and still implemented its own arbitrary access control model
> > > > on the LSM hooks to which it attached.  It seems like the question is
> > > > whether the kernel developers are ok with exposing the entire LSM hook
> > > > interface and all the associated data structures to non-GPLd code,
> > > > irrespective of what helpers it may or may not use.
> > > 
> > > Also, to be clear, while kernel modules aren't necessarily GPL, prior to
> > > this patch series, all Linux security modules were necessarily GPLd in order
> > > to use the LSM interface.
> > 
> > Because they use securityfs_create_file() GPL-ed api, right?
> > but not because module license is enforced.
> 
> No, securityfs was a later addition and is not required by all LSMs either.
> Originally LSMs had to register their hooks via register_security(), which
> was intentionally EXPORT_SYMBOL_GPL() to avoid exposing the LSM interface to
> non-GPLd modules because there were significant concerns with doing so when
> LSM was first merged.  Then in 20510f2f4e2dabb0ff6c13901807627ec9452f98
> ("security: Convert LSM into a static interface"), the ability for loadable
> modules to use register_security() at all was removed, limiting its use to
> built-in modules.  In commit b1d9e6b0646d0e5ee5d9050bd236b6c65d66faef ("LSM:
> Switch to lists of hooks"), register_security() was replaced by
> security_add_hooks(), but this was likewise not exported for use by modules
> and could only be used by built-in code.  The bpf LSM is providing a shim
> that allows eBPF code to attach to these hooks that would otherwise not be
> exposed to non-GPLd code, so if the bpf LSM does not require the eBPF
> programs to also be GPLd, then that is a change from current practice.
> 
> > > So allowing non-GPL eBPF-based LSMs would be a
> > > change.
> > 
> > I don't see it this way. seccomp progs technically unlicensed. Yet they can
> > disallow any syscall. Primitive KRSI progs like
> > int bpf-prog(void*) { return REJECT; }
> > would be able to do selectively disable a syscall with an overhead acceptable
> > in production systems (unlike seccomp). I want this use case to be available to
> > people. It's a bait, because to do real progs people would need to GPL them.
> > Key helpers bpf_perf_event_output, bpf_ktime_get_ns, bpf_trace_printk are all
> > GPL-ed. It may look that most networking helpers are not-GPL, but real life is
> > different. To debug programs bpf_trace_printk() is necessary. To have
> > communication with user space bpf_perf_event_output() is necssary. To measure
> > anything or implement timestamps bpf_ktime_get_ns() is necessary. So today all
> > meaninful bpf programs are GPL. Those that are not GPL probably exist, but
> > they're toy programs. Hence I have zero concerns about GPL bypass coming from
> > tracing, networking, and, in the future, KRSI progs too.
> 
> You have more confidence than I do about that.  I would anticipate
> developers of out-of-tree LSMs latching onto this bpf LSM and using it to
> avoid GPL.  I don't see that any of those helpers are truly needed to
> implement an access control model.

Yeah, I'm with Stephen here, this should be explicitly marked for
GPL-only bpf code to prevent anyone from trying to route around the LSM
apis we have today.  We have enough problem with companies trying to do
that as-is, let's not give them any other ways to abuse our license.

thanks,

greg k-h

^ permalink raw reply

* Re: Perf Data on LSM in v5.3
From: Stephen Smalley @ 2020-01-15 15:42 UTC (permalink / raw)
  To: Wenhui Zhang
  Cc: John Johansen, Casey Schaufler, Casey Schaufler, James Morris,
	linux-security-module, SELinux, Kees Cook, penguin-kernel,
	Paul Moore
In-Reply-To: <1479ac1a-b957-f907-b983-c0bcefd51457@tycho.nsa.gov>

On 1/15/20 10:34 AM, Stephen Smalley wrote:
> On 1/15/20 10:21 AM, Wenhui Zhang wrote:
>>
>> On Wed, Jan 15, 2020 at 9:08 AM Stephen Smalley <sds@tycho.nsa.gov 
>> <mailto:sds@tycho.nsa.gov>> wrote:
>>
>>     On 1/15/20 8:40 AM, Stephen Smalley wrote:
>>      > On 1/14/20 8:00 PM, Wenhui Zhang wrote:
>>      >> Hi, John:
>>      >>
>>      >> It seems like, the MAC hooks are default to*return 0 or empty 
>> void
>>      >> hooks* if CONFIG_SECURITY, CONFIG_SECURITY_NETWORK ,
>>      >> CONFIG_PAGE_TABLE_ISOLATION, CONFIG_SECURITY_INFINIBAND,
>>      >> CONFIG_SECURITY_PATH, CONFIG_INTEL_TXT,
>>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR,
>>      >> CONFIG_HARDENED_USERCOPY, CONFIG_HARDENED_USERCOPY_FALLBACK *are
>>     NOT
>>      >> set*.
>>      >>
>>      >> If HOOKs are "return 0 or empty void hooks", MAC is not enabled.
>>      >> In runtime of fs-benchmarks, if CONFIG_DEFAULT_SECURITY_DAC=y, 
>> then
>>      >> capability is enabled.
>>      >>
>>      >> Please correct me if I am wrong.
>>      >>
>>      >> For the first test, wo-sec is tested with:
>>      >> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>>      >> # CONFIG_SECURITY is not set
>>      >> # CONFIG_SECURITYFS is not set
>>      >> # CONFIG_PAGE_TABLE_ISOLATION is not set
>>      >> # CONFIG_INTEL_TXT is not set
>>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>>      >> # CONFIG_HARDENED_USERCOPY is not set
>>      >> CONFIG_FORTIFY_SOURCE=y
>>      >> # CONFIG_STATIC_USERMODEHELPER is not set
>>      >> CONFIG_DEFAULT_SECURITY_DAC=y
>>      >>
>>      >>
>>      >> For the second test, w-sec is tested with:
>>      >> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>>      >> CONFIG_SECURITY=y
>>      >> CONFIG_SECURITYFS=y
>>      >> # CONFIG_SECURITY_NETWORK is not set
>>      >> CONFIG_PAGE_TABLE_ISOLATION=y
>>      >> CONFIG_SECURITY_INFINIBAND=y
>>      >> CONFIG_SECURITY_PATH=y
>>      >> CONFIG_INTEL_TXT=y
>>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>>      >> CONFIG_HARDENED_USERCOPY=y
>>      >> CONFIG_HARDENED_USERCOPY_FALLBACK=y
>>      >> # CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
>>      >> CONFIG_FORTIFY_SOURCE=y
>>      >> # CONFIG_STATIC_USERMODEHELPER is not set
>>      >> # CONFIG_SECURITY_SMACK is not set
>>      >> # CONFIG_SECURITY_TOMOYO is not set
>>      >> # CONFIG_SECURITY_APPARMOR is not set
>>      >> # CONFIG_SECURITY_LOADPIN is not set
>>      >> # CONFIG_SECURITY_YAMA is not set
>>      >> # CONFIG_SECURITY_SAFESETID is not set
>>      >> # CONFIG_INTEGRITY is not set
>>      >> CONFIG_DEFAULT_SECURITY_DAC=y
>>      >> #
>>      >>
>>     
>> CONFIG_LSM="yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" 
>>
>>
>>      >>
>>      >
>>      > Your configs should only differ with respect to CONFIG_SECURITY*
>>     if you
>>      > want to evaluate LSM, SELinux, etc overheads.  
>> PAGE_TABLE_ISOLATION,
>>      > INTEL_TXT, and HARDENED_USERCOPY are not relevant to LSM itself.
>>      >
>>      > Also, what benchmarks are you using?  Your own home-grown ones, a
>>     set of
>>      > open source standard benchmarks (if so, which ones?).  You should
>>      > include both micro and macro benchmarks in your suite.
>>      >
>>      > How stable are your results?  What kind of variance / standard
>>     deviation
>>      > are you seeing?
>>      >
>>      > It is hard to get meaningful, reliable performance measurements
>>     so going
>>      > down this road is not to be done lightly.
>>
>>     Also, I note that you don't enable CONFIG_SECURITY_NETWORK above.  
>> That
>>     means you aren't including the base LSM overhead for the networking
>>     security hooks.  So if you then compare that against SELinux (which
>>     requires CONFIG_SECURITY_NETWORK), you are going to end up 
>> attributing
>>     the cost of both the LSM overhead and SELinux overhead all to 
>> SELinux.
>>     If you truly want to isolate the base LSM overhead, you need to 
>> enable
>>     all the hooks.
>>
>> I will give it a try for enabling CONFIG_SECURITY_NETWORK later this 
>> week, however I wonder if this would affect the test results that much.
>> I am testing with LMBench 2.5 , with focusing on filesystem unit 
>> tests, however not network stack at this time.
>> My understanding of why this result is so different from previous 
>> paper 20 years ago, is that the Bottleneck changes.
>> As Chris was testing with 4 cores , each 700MHz CPU, and 128MB memory, 
>> with HDD (latency is about 20,000,000 ns for sequential read).
>> The  Bottleneck of accessing files w/ MAC are mostly on I/O.
>> However hardware setup is different now,  we have much larger and 
>> faster memory (better prefetching as well), with SSD (latency is about 
>> 49,000 ns for sequential read). , while CPU speed is not increasing as 
>> much as that of I/O.
>> The Bottleneck of accessing files w/ MAC are mostly on CPU now.
> 
> Don't know if lmbench is still a good benchmark and I recall struggling 
> with it even back then to get stable results.
> 
> Could be bottleneck changes, could be the fact that your kernel config 
> changes aren't limited to CONFIG_SECURITY* (e.g. PTI introduces 
> non-trivial overheads), could be changes to LSM since that time (e.g. 
> stacking support, moving security_ calls out-of-line, more hooks, ...), 
> could be that running SELinux w/o policy is flooding the system logs 
> with warnings or other messages since it wasn't really designed to be 
> used that way past initialization.  Lots of options, can't tell without 
> more info on your details.

I'd think that these days one would leverage perf and/or lkp for Linux 
kernel performance measurements, not lmbench.



^ permalink raw reply

* [PATCH v4] ima: ima/lsm policy rule loading logic bug fixes
From: Janne Karhunen @ 2020-01-15 15:42 UTC (permalink / raw)
  To: linux-integrity, linux-security-module, zohar
  Cc: Janne Karhunen, Casey Schaufler, Konsta Karsisto

Keep the ima policy rules around from the beginning even if they appear
invalid at the time of loading, as they may become active after an lsm
policy load. However, loading a custom IMA policy with unknown LSM
labels is only safe after we have transitioned from the "built-in"
policy rules to a custom IMA policy.

Patch also fixes the rule re-use during the lsm policy reload and makes
some prints a bit more human readable.

Changelog:
v4:
- Do not allow the initial policy load refer to non-existing lsm rules.
v3:
- Fix too wide policy rule matching for non-initialized LSMs
v2:
- Fix log prints

Fixes: b16942455193 ("ima: use the lsm policy update notifier")
Cc: Casey Schaufler <casey@schaufler-ca.com>
Reported-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Janne Karhunen <janne.karhunen@gmail.com>
Signed-off-by: Konsta Karsisto <konsta.karsisto@gmail.com>
---
 security/integrity/ima/ima_policy.c | 44 +++++++++++++++++------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index a4dde9d575b2..76dc9b3dd0fc 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -265,7 +265,7 @@ static void ima_lsm_free_rule(struct ima_rule_entry *entry)
 static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
 {
 	struct ima_rule_entry *nentry;
-	int i, result;
+	int i;
 
 	nentry = kmalloc(sizeof(*nentry), GFP_KERNEL);
 	if (!nentry)
@@ -279,7 +279,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
 	memset(nentry->lsm, 0, FIELD_SIZEOF(struct ima_rule_entry, lsm));
 
 	for (i = 0; i < MAX_LSM_RULES; i++) {
-		if (!entry->lsm[i].rule)
+		if (!entry->lsm[i].args_p)
 			continue;
 
 		nentry->lsm[i].type = entry->lsm[i].type;
@@ -288,13 +288,13 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
 		if (!nentry->lsm[i].args_p)
 			goto out_err;
 
-		result = security_filter_rule_init(nentry->lsm[i].type,
-						   Audit_equal,
-						   nentry->lsm[i].args_p,
-						   &nentry->lsm[i].rule);
-		if (result == -EINVAL)
-			pr_warn("ima: rule for LSM \'%d\' is undefined\n",
-				entry->lsm[i].type);
+		security_filter_rule_init(nentry->lsm[i].type,
+					  Audit_equal,
+					  nentry->lsm[i].args_p,
+					  &nentry->lsm[i].rule);
+		if (!nentry->lsm[i].rule)
+			pr_warn("rule for LSM \'%s\' is undefined\n",
+				(char *)entry->lsm[i].args_p);
 	}
 	return nentry;
 
@@ -331,7 +331,7 @@ static void ima_lsm_update_rules(void)
 	list_for_each_entry_safe(entry, e, &ima_policy_rules, list) {
 		needs_update = 0;
 		for (i = 0; i < MAX_LSM_RULES; i++) {
-			if (entry->lsm[i].rule) {
+			if (entry->lsm[i].args_p) {
 				needs_update = 1;
 				break;
 			}
@@ -341,8 +341,7 @@ static void ima_lsm_update_rules(void)
 
 		result = ima_lsm_update_rule(entry);
 		if (result) {
-			pr_err("ima: lsm rule update error %d\n",
-				result);
+			pr_err("lsm rule update error %d\n", result);
 			return;
 		}
 	}
@@ -403,7 +402,7 @@ static bool ima_match_keyring(struct ima_rule_entry *rule,
 }
 
 /**
- * ima_match_rules - determine whether an inode matches the measure rule.
+ * ima_match_rules - determine whether an inode matches the policy rule.
  * @rule: a pointer to a rule
  * @inode: a pointer to an inode
  * @cred: a pointer to a credentials structure for user validation
@@ -466,9 +465,12 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
 		int rc = 0;
 		u32 osid;
 
-		if (!rule->lsm[i].rule)
-			continue;
-
+		if (!rule->lsm[i].rule) {
+			if (!rule->lsm[i].args_p)
+				continue;
+			else
+				return false;
+		}
 		switch (i) {
 		case LSM_OBJ_USER:
 		case LSM_OBJ_ROLE:
@@ -880,8 +882,14 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry,
 					   entry->lsm[lsm_rule].args_p,
 					   &entry->lsm[lsm_rule].rule);
 	if (!entry->lsm[lsm_rule].rule) {
-		kfree(entry->lsm[lsm_rule].args_p);
-		return -EINVAL;
+		pr_warn("rule for LSM \'%s\' is undefined\n",
+			(char *)entry->lsm[lsm_rule].args_p);
+
+		if (ima_rules == &ima_default_rules) {
+			kfree(entry->lsm[lsm_rule].args_p);
+			result = -EINVAL;
+		} else
+			result = 0;
 	}
 
 	return result;
-- 
2.17.1


^ permalink raw reply related

* Re: Perf Data on LSM in v5.3
From: Stephen Smalley @ 2020-01-15 15:34 UTC (permalink / raw)
  To: Wenhui Zhang
  Cc: John Johansen, Casey Schaufler, Casey Schaufler, James Morris,
	linux-security-module, SELinux, Kees Cook, penguin-kernel,
	Paul Moore
In-Reply-To: <CAOSEQ1o3nhY-svtsFSSv+M=V+NchxmBbhY-FvqoTzJgMnZ1ydw@mail.gmail.com>

On 1/15/20 10:21 AM, Wenhui Zhang wrote:
> 
> On Wed, Jan 15, 2020 at 9:08 AM Stephen Smalley <sds@tycho.nsa.gov 
> <mailto:sds@tycho.nsa.gov>> wrote:
> 
>     On 1/15/20 8:40 AM, Stephen Smalley wrote:
>      > On 1/14/20 8:00 PM, Wenhui Zhang wrote:
>      >> Hi, John:
>      >>
>      >> It seems like, the MAC hooks are default to*return 0 or empty void
>      >> hooks* if CONFIG_SECURITY, CONFIG_SECURITY_NETWORK ,
>      >> CONFIG_PAGE_TABLE_ISOLATION, CONFIG_SECURITY_INFINIBAND,
>      >> CONFIG_SECURITY_PATH, CONFIG_INTEL_TXT,
>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR,
>      >> CONFIG_HARDENED_USERCOPY, CONFIG_HARDENED_USERCOPY_FALLBACK *are
>     NOT
>      >> set*.
>      >>
>      >> If HOOKs are "return 0 or empty void hooks", MAC is not enabled.
>      >> In runtime of fs-benchmarks, if CONFIG_DEFAULT_SECURITY_DAC=y, then
>      >> capability is enabled.
>      >>
>      >> Please correct me if I am wrong.
>      >>
>      >> For the first test, wo-sec is tested with:
>      >> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>      >> # CONFIG_SECURITY is not set
>      >> # CONFIG_SECURITYFS is not set
>      >> # CONFIG_PAGE_TABLE_ISOLATION is not set
>      >> # CONFIG_INTEL_TXT is not set
>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>      >> # CONFIG_HARDENED_USERCOPY is not set
>      >> CONFIG_FORTIFY_SOURCE=y
>      >> # CONFIG_STATIC_USERMODEHELPER is not set
>      >> CONFIG_DEFAULT_SECURITY_DAC=y
>      >>
>      >>
>      >> For the second test, w-sec is tested with:
>      >> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>      >> CONFIG_SECURITY=y
>      >> CONFIG_SECURITYFS=y
>      >> # CONFIG_SECURITY_NETWORK is not set
>      >> CONFIG_PAGE_TABLE_ISOLATION=y
>      >> CONFIG_SECURITY_INFINIBAND=y
>      >> CONFIG_SECURITY_PATH=y
>      >> CONFIG_INTEL_TXT=y
>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>      >> CONFIG_HARDENED_USERCOPY=y
>      >> CONFIG_HARDENED_USERCOPY_FALLBACK=y
>      >> # CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
>      >> CONFIG_FORTIFY_SOURCE=y
>      >> # CONFIG_STATIC_USERMODEHELPER is not set
>      >> # CONFIG_SECURITY_SMACK is not set
>      >> # CONFIG_SECURITY_TOMOYO is not set
>      >> # CONFIG_SECURITY_APPARMOR is not set
>      >> # CONFIG_SECURITY_LOADPIN is not set
>      >> # CONFIG_SECURITY_YAMA is not set
>      >> # CONFIG_SECURITY_SAFESETID is not set
>      >> # CONFIG_INTEGRITY is not set
>      >> CONFIG_DEFAULT_SECURITY_DAC=y
>      >> #
>      >>
>     CONFIG_LSM="yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo"
> 
>      >>
>      >
>      > Your configs should only differ with respect to CONFIG_SECURITY*
>     if you
>      > want to evaluate LSM, SELinux, etc overheads.  PAGE_TABLE_ISOLATION,
>      > INTEL_TXT, and HARDENED_USERCOPY are not relevant to LSM itself.
>      >
>      > Also, what benchmarks are you using?  Your own home-grown ones, a
>     set of
>      > open source standard benchmarks (if so, which ones?).  You should
>      > include both micro and macro benchmarks in your suite.
>      >
>      > How stable are your results?  What kind of variance / standard
>     deviation
>      > are you seeing?
>      >
>      > It is hard to get meaningful, reliable performance measurements
>     so going
>      > down this road is not to be done lightly.
> 
>     Also, I note that you don't enable CONFIG_SECURITY_NETWORK above.  That
>     means you aren't including the base LSM overhead for the networking
>     security hooks.  So if you then compare that against SELinux (which
>     requires CONFIG_SECURITY_NETWORK), you are going to end up attributing
>     the cost of both the LSM overhead and SELinux overhead all to SELinux.
>     If you truly want to isolate the base LSM overhead, you need to enable
>     all the hooks.
> 
> I will give it a try for enabling CONFIG_SECURITY_NETWORK later this 
> week, however I wonder if this would affect the test results that much.
> I am testing with LMBench 2.5 , with focusing on filesystem unit tests, 
> however not network stack at this time.
> My understanding of why this result is so different from previous paper 
> 20 years ago, is that the Bottleneck changes.
> As Chris was testing with 4 cores , each 700MHz CPU, and 128MB memory, 
> with HDD (latency is about 20,000,000 ns for sequential read).
> The  Bottleneck of accessing files w/ MAC are mostly on I/O.
> However hardware setup is different now,  we have much larger and faster 
> memory (better prefetching as well), with SSD (latency is about 49,000 
> ns for sequential read). , while CPU speed is not increasing as much as 
> that of I/O.
> The Bottleneck of accessing files w/ MAC are mostly on CPU now.

Don't know if lmbench is still a good benchmark and I recall struggling 
with it even back then to get stable results.

Could be bottleneck changes, could be the fact that your kernel config 
changes aren't limited to CONFIG_SECURITY* (e.g. PTI introduces 
non-trivial overheads), could be changes to LSM since that time (e.g. 
stacking support, moving security_ calls out-of-line, more hooks, ...), 
could be that running SELinux w/o policy is flooding the system logs 
with warnings or other messages since it wasn't really designed to be 
used that way past initialization.  Lots of options, can't tell without 
more info on your details.



^ permalink raw reply

* Re: Perf Data on LSM in v5.3
From: Stephen Smalley @ 2020-01-15 16:04 UTC (permalink / raw)
  To: Wenhui Zhang
  Cc: John Johansen, Casey Schaufler, Casey Schaufler, James Morris,
	linux-security-module, SELinux, Kees Cook, penguin-kernel,
	Paul Moore
In-Reply-To: <CAOSEQ1o6+uL-ATjQ_YXaJP9KxFTS3_b_bzeO7M8eiKbCB9dsyQ@mail.gmail.com>

On 1/15/20 10:59 AM, Wenhui Zhang wrote:
> 
> 
> On Wed, Jan 15, 2020 at 10:41 AM Stephen Smalley <sds@tycho.nsa.gov 
> <mailto:sds@tycho.nsa.gov>> wrote:
> 
>     On 1/15/20 10:34 AM, Stephen Smalley wrote:
>      > On 1/15/20 10:21 AM, Wenhui Zhang wrote:
>      >>
>      >> On Wed, Jan 15, 2020 at 9:08 AM Stephen Smalley
>     <sds@tycho.nsa.gov <mailto:sds@tycho.nsa.gov>
>      >> <mailto:sds@tycho.nsa.gov <mailto:sds@tycho.nsa.gov>>> wrote:
>      >>
>      >>     On 1/15/20 8:40 AM, Stephen Smalley wrote:
>      >>      > On 1/14/20 8:00 PM, Wenhui Zhang wrote:
>      >>      >> Hi, John:
>      >>      >>
>      >>      >> It seems like, the MAC hooks are default to*return 0 or
>     empty
>      >> void
>      >>      >> hooks* if CONFIG_SECURITY, CONFIG_SECURITY_NETWORK ,
>      >>      >> CONFIG_PAGE_TABLE_ISOLATION, CONFIG_SECURITY_INFINIBAND,
>      >>      >> CONFIG_SECURITY_PATH, CONFIG_INTEL_TXT,
>      >>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR,
>      >>      >>
>     CONFIG_HARDENED_USERCOPY, CONFIG_HARDENED_USERCOPY_FALLBACK *are
>      >>     NOT
>      >>      >> set*.
>      >>      >>
>      >>      >> If HOOKs are "return 0 or empty void hooks", MAC is not
>     enabled.
>      >>      >> In runtime of fs-benchmarks,
>     if CONFIG_DEFAULT_SECURITY_DAC=y,
>      >> then
>      >>      >> capability is enabled.
>      >>      >>
>      >>      >> Please correct me if I am wrong.
>      >>      >>
>      >>      >> For the first test, wo-sec is tested with:
>      >>      >> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>      >>      >> # CONFIG_SECURITY is not set
>      >>      >> # CONFIG_SECURITYFS is not set
>      >>      >> # CONFIG_PAGE_TABLE_ISOLATION is not set
>      >>      >> # CONFIG_INTEL_TXT is not set
>      >>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>      >>      >> # CONFIG_HARDENED_USERCOPY is not set
>      >>      >> CONFIG_FORTIFY_SOURCE=y
>      >>      >> # CONFIG_STATIC_USERMODEHELPER is not set
>      >>      >> CONFIG_DEFAULT_SECURITY_DAC=y
>      >>      >>
>      >>      >>
>      >>      >> For the second test, w-sec is tested with:
>      >>      >> # CONFIG_SECURITY_DMESG_RESTRICT is not set
>      >>      >> CONFIG_SECURITY=y
>      >>      >> CONFIG_SECURITYFS=y
>      >>      >> # CONFIG_SECURITY_NETWORK is not set
>      >>      >> CONFIG_PAGE_TABLE_ISOLATION=y
>      >>      >> CONFIG_SECURITY_INFINIBAND=y
>      >>      >> CONFIG_SECURITY_PATH=y
>      >>      >> CONFIG_INTEL_TXT=y
>      >>      >> CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
>      >>      >> CONFIG_HARDENED_USERCOPY=y
>      >>      >> CONFIG_HARDENED_USERCOPY_FALLBACK=y
>      >>      >> # CONFIG_HARDENED_USERCOPY_PAGESPAN is not set
>      >>      >> CONFIG_FORTIFY_SOURCE=y
>      >>      >> # CONFIG_STATIC_USERMODEHELPER is not set
>      >>      >> # CONFIG_SECURITY_SMACK is not set
>      >>      >> # CONFIG_SECURITY_TOMOYO is not set
>      >>      >> # CONFIG_SECURITY_APPARMOR is not set
>      >>      >> # CONFIG_SECURITY_LOADPIN is not set
>      >>      >> # CONFIG_SECURITY_YAMA is not set
>      >>      >> # CONFIG_SECURITY_SAFESETID is not set
>      >>      >> # CONFIG_INTEGRITY is not set
>      >>      >> CONFIG_DEFAULT_SECURITY_DAC=y
>      >>      >> #
>      >>      >>
>      >>
>      >>
>     CONFIG_LSM="yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo"
> 
>      >>
>      >>
>      >>      >>
>      >>      >
>      >>      > Your configs should only differ with respect to
>     CONFIG_SECURITY*
>      >>     if you
>      >>      > want to evaluate LSM, SELinux, etc overheads.
>      >> PAGE_TABLE_ISOLATION,
>      >>      > INTEL_TXT, and HARDENED_USERCOPY are not relevant to LSM
>     itself.
>      >>      >
>      >>      > Also, what benchmarks are you using?  Your own home-grown
>     ones, a
>      >>     set of
>      >>      > open source standard benchmarks (if so, which ones?). 
>     You should
>      >>      > include both micro and macro benchmarks in your suite.
>      >>      >
>      >>      > How stable are your results?  What kind of variance /
>     standard
>      >>     deviation
>      >>      > are you seeing?
>      >>      >
>      >>      > It is hard to get meaningful, reliable performance
>     measurements
>      >>     so going
>      >>      > down this road is not to be done lightly.
>      >>
>      >>     Also, I note that you don't enable CONFIG_SECURITY_NETWORK
>     above.
>      >> That
>      >>     means you aren't including the base LSM overhead for the
>     networking
>      >>     security hooks.  So if you then compare that against SELinux
>     (which
>      >>     requires CONFIG_SECURITY_NETWORK), you are going to end up
>      >> attributing
>      >>     the cost of both the LSM overhead and SELinux overhead all to
>      >> SELinux.
>      >>     If you truly want to isolate the base LSM overhead, you need to
>      >> enable
>      >>     all the hooks.
>      >>
>      >> I will give it a try for enabling CONFIG_SECURITY_NETWORK later
>     this
>      >> week, however I wonder if this would affect the test results
>     that much.
>      >> I am testing with LMBench 2.5 , with focusing on filesystem unit
>      >> tests, however not network stack at this time.
>      >> My understanding of why this result is so different from previous
>      >> paper 20 years ago, is that the Bottleneck changes.
>      >> As Chris was testing with 4 cores , each 700MHz CPU, and 128MB
>     memory,
>      >> with HDD (latency is about 20,000,000 ns for sequential read).
>      >> The  Bottleneck of accessing files w/ MAC are mostly on I/O.
>      >> However hardware setup is different now,  we have much larger and
>      >> faster memory (better prefetching as well), with SSD (latency is
>     about
>      >> 49,000 ns for sequential read). , while CPU speed is not
>     increasing as
>      >> much as that of I/O.
>      >> The Bottleneck of accessing files w/ MAC are mostly on CPU now.
>      >
>      > Don't know if lmbench is still a good benchmark and I recall
>     struggling
>      > with it even back then to get stable results.
>      >
>      > Could be bottleneck changes, could be the fact that your kernel
>     config
>      > changes aren't limited to CONFIG_SECURITY* (e.g. PTI introduces
>      > non-trivial overheads), could be changes to LSM since that time
>     (e.g.
>      > stacking support, moving security_ calls out-of-line, more hooks,
>     ...),
>      > could be that running SELinux w/o policy is flooding the system logs
>      > with warnings or other messages since it wasn't really designed
>     to be
>      > used that way past initialization.  Lots of options, can't tell
>     without
>      > more info on your details.
> 
>     I'd think that these days one would leverage perf and/or lkp for Linux
>     kernel performance measurements, not lmbench.
> 
> 
> Thanks so much, I will give it a try for lkp and let you know how it goes.
> Maybe later next week or this weekend, we should have the results.

Ok, please make sure your kernel configs are truly comparable (i.e. no 
differences other than the right set of CONFIG_SECURITY* options), that 
all of the same LSM hooks are enabled for comparing LSM-only versus 
SELinux (i.e. CONFIG_SECURITY and CONFIG_SECURITY_NETWORK both enabled), 
and consider using a distribution that actually supports SELinux out of 
the box (e.g. Fedora) so that you can properly test SELinux with a 
policy loaded in enforcing mode.  Similarly if you want to do the same 
for AppArmor, except for it you'll need to enable CONFIG_SECURITY_PATH 
as well for the pathname-based hooks and you'll want to use Ubuntu or 
latest Debian to get a working policy.

^ permalink raw reply

* Re: Perf Data on LSM in v5.3
From: Stephen Smalley @ 2020-01-15 16:48 UTC (permalink / raw)
  To: Wenhui Zhang
  Cc: John Johansen, Casey Schaufler, Casey Schaufler, James Morris,
	linux-security-module, SELinux, Kees Cook, penguin-kernel,
	Paul Moore
In-Reply-To: <CAOSEQ1qPCtdsaieuXtWDEBEZAyddvLTNn8VDAJ-JWKeAP5PYsA@mail.gmail.com>

On 1/15/20 10:56 AM, Wenhui Zhang wrote:
> 
> 
> On Wed, Jan 15, 2020 at 10:33 AM Stephen Smalley <sds@tycho.nsa.gov 
> <mailto:sds@tycho.nsa.gov>> wrote:
>     Could be bottleneck changes, could be the fact that your kernel config
>     changes aren't limited to CONFIG_SECURITY* (e.g. PTI introduces
>     non-trivial overheads), could be changes to LSM since that time (e.g.
>     stacking support, moving security_ calls out-of-line, more hooks, ...),
>     could be that running SELinux w/o policy is flooding the system logs
>     with warnings or other messages since it wasn't really designed to be
>     used that way past initialization.  Lots of options, can't tell without
>     more info on your details.
> 
<snip>
> 
> Yup, seems like there used to be 29 hooks back 20 years ago, and now we 
> have 214 hooks with more placement locations for each.
> I am still struggling with the white-list based stacking logic, please 
> correct me if my understanding is wrong.

I don't think that's entirely accurate, since I believe the benchmarking 
performed for that paper was done using the LSM kernel patch (which was 
still out-of-tree at the time) which had a larger set of hooks than just 
29.  The hooks were incrementally merged to mainline but that wasn't the 
basis for the benchmarks IIRC. It is true however that the set of hooks 
and their callers has grown over time.

> I thought black-list  (i.e. firewall alike) logic might be easier to 
> understand and reasoning about the performance.
> By adding an off-line policy conflict/shadowing/redundant checker (i.e. 
> FIREMAN alike) should solve the rest of the issue.

Not sure I follow, but blacklisting is not a good approach for enforcing 
MAC security goals.

^ permalink raw reply

* [PATCH bpf-next v2 03/10] bpf: lsm: Introduce types for eBPF based LSM
From: KP Singh @ 2020-01-15 17:13 UTC (permalink / raw)
  To: linux-kernel, bpf, linux-security-module
  Cc: Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <20200115171333.28811-1-kpsingh@chromium.org>

From: KP Singh <kpsingh@google.com>

A new eBPF program type BPF_PROG_TYPE_LSM with an
expected attach type of BPF_LSM_MAC. Attachment to LSM hooks is not
implemented in this patch.

On defining the types for the program, the macros expect that
<prog_name>_prog_ops and <prog_name>_verifier_ops exist. This is
implicitly required by the macro:

  BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, ...)

Signed-off-by: KP Singh <kpsingh@google.com>
---
 include/linux/bpf_types.h      |  4 ++++
 include/uapi/linux/bpf.h       |  2 ++
 kernel/bpf/syscall.c           |  6 ++++++
 security/bpf/Makefile          |  2 +-
 security/bpf/ops.c             | 28 ++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  2 ++
 tools/lib/bpf/libbpf_probes.c  |  1 +
 7 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 security/bpf/ops.c

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9f326e6ef885..2f5b054500d7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -68,6 +68,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport,
 #if defined(CONFIG_BPF_JIT)
 BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops,
 	      void *, void *)
+#ifdef CONFIG_SECURITY_BPF
+BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
+	       void *, void *)
+#endif /* CONFIG_SECURITY_BPF */
 #endif
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 52966e758fe5..b6a725a8a21d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -176,6 +176,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_SOCKOPT,
 	BPF_PROG_TYPE_TRACING,
 	BPF_PROG_TYPE_STRUCT_OPS,
+	BPF_PROG_TYPE_LSM,
 };
 
 enum bpf_attach_type {
@@ -205,6 +206,7 @@ enum bpf_attach_type {
 	BPF_TRACE_RAW_TP,
 	BPF_TRACE_FENTRY,
 	BPF_TRACE_FEXIT,
+	BPF_LSM_MAC,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f9db72a96ec0..2945739618c9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2151,6 +2151,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_LIRC_MODE2:
 		ptype = BPF_PROG_TYPE_LIRC_MODE2;
 		break;
+	case BPF_LSM_MAC:
+		ptype = BPF_PROG_TYPE_LSM;
+		break;
 	case BPF_FLOW_DISSECTOR:
 		ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
 		break;
@@ -2182,6 +2185,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_LIRC_MODE2:
 		ret = lirc_prog_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_LSM:
+		ret = -EINVAL;
+		break;
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
 		break;
diff --git a/security/bpf/Makefile b/security/bpf/Makefile
index 26a0ab6f99b7..c78a8a056e7e 100644
--- a/security/bpf/Makefile
+++ b/security/bpf/Makefile
@@ -2,4 +2,4 @@
 #
 # Copyright 2019 Google LLC.
 
-obj-$(CONFIG_SECURITY_BPF) := lsm.o
+obj-$(CONFIG_SECURITY_BPF) := lsm.o ops.o
diff --git a/security/bpf/ops.c b/security/bpf/ops.c
new file mode 100644
index 000000000000..81c2bd9c0495
--- /dev/null
+++ b/security/bpf/ops.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019 Google LLC.
+ */
+
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+const struct bpf_prog_ops lsm_prog_ops = {
+};
+
+static const struct bpf_func_proto *get_bpf_func_proto(
+	enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+	default:
+		return NULL;
+	}
+}
+
+const struct bpf_verifier_ops lsm_verifier_ops = {
+	.get_func_proto = get_bpf_func_proto,
+};
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 52966e758fe5..b6a725a8a21d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -176,6 +176,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_SOCKOPT,
 	BPF_PROG_TYPE_TRACING,
 	BPF_PROG_TYPE_STRUCT_OPS,
+	BPF_PROG_TYPE_LSM,
 };
 
 enum bpf_attach_type {
@@ -205,6 +206,7 @@ enum bpf_attach_type {
 	BPF_TRACE_RAW_TP,
 	BPF_TRACE_FENTRY,
 	BPF_TRACE_FEXIT,
+	BPF_LSM_MAC,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 8cc992bc532a..2314889369cc 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -107,6 +107,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	case BPF_PROG_TYPE_TRACING:
 	case BPF_PROG_TYPE_STRUCT_OPS:
+	case BPF_PROG_TYPE_LSM:
 	default:
 		break;
 	}
-- 
2.20.1


^ permalink raw reply related

* [PATCH bpf-next v2 05/10] bpf: lsm: BTF API for LSM hooks
From: KP Singh @ 2020-01-15 17:13 UTC (permalink / raw)
  To: linux-kernel, bpf, linux-security-module
  Cc: Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <20200115171333.28811-1-kpsingh@chromium.org>

From: KP Singh <kpsingh@google.com>

The BTF API provides information required by the BPF verifier to
attach eBPF programs to the LSM hooks by using the BTF information of
two types:

- struct security_hook_heads: This type provides the offset which
  a new dynamically allocated security hook must be attached to.
- union security_list_options: This provides the information about the
  function prototype required by the hook.

When the program is loaded:

- The verifier receives the index of a member in struct
  security_hook_heads to which a program must be attached as
  prog->aux->lsm_hook_index. The index is one-based for better
  verification.
- bpf_lsm_type_by_index is used to determine the func_proto of
  the LSM hook and updates prog->aux->attach_func_proto
- bpf_lsm_head_by_index is used to determine the hlist_head to which
  the BPF program must be attached.

Signed-off-by: KP Singh <kpsingh@google.com>
---
 include/linux/bpf_lsm.h |  12 +++++
 security/bpf/Kconfig    |   1 +
 security/bpf/hooks.c    | 104 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 9883cf25241c..a9b4f7b41c65 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -19,6 +19,8 @@ extern struct security_hook_heads bpf_lsm_hook_heads;
 
 int bpf_lsm_srcu_read_lock(void);
 void bpf_lsm_srcu_read_unlock(int idx);
+const struct btf_type *bpf_lsm_type_by_index(struct btf *btf, u32 offset);
+const struct btf_member *bpf_lsm_head_by_index(struct btf *btf, u32 id);
 
 #define CALL_BPF_LSM_VOID_HOOKS(FUNC, ...)			\
 	do {							\
@@ -65,6 +67,16 @@ static inline int bpf_lsm_srcu_read_lock(void)
 	return 0;
 }
 static inline void bpf_lsm_srcu_read_unlock(int idx) {}
+static inline const struct btf_type *bpf_lsm_type_by_index(
+	struct btf *btf, u32 index)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+static inline const struct btf_member *bpf_lsm_head_by_index(
+	struct btf *btf, u32 id)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 
 #endif /* CONFIG_SECURITY_BPF */
 
diff --git a/security/bpf/Kconfig b/security/bpf/Kconfig
index 595e4ad597ae..9438d899b618 100644
--- a/security/bpf/Kconfig
+++ b/security/bpf/Kconfig
@@ -7,6 +7,7 @@ config SECURITY_BPF
 	depends on SECURITY
 	depends on BPF_SYSCALL
 	depends on SRCU
+	depends on DEBUG_INFO_BTF
 	help
 	  This enables instrumentation of the security hooks with
 	  eBPF programs.
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index b123d9cb4cd4..82725611693d 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -5,6 +5,8 @@
  */
 
 #include <linux/bpf_lsm.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/srcu.h>
 
 DEFINE_STATIC_SRCU(security_hook_srcu);
@@ -18,3 +20,105 @@ void bpf_lsm_srcu_read_unlock(int idx)
 {
 	return srcu_read_unlock(&security_hook_srcu, idx);
 }
+
+static inline int validate_hlist_head(struct btf *btf, u32 type_id)
+{
+	s32 hlist_id;
+
+	hlist_id = btf_find_by_name_kind(btf, "hlist_head", BTF_KIND_STRUCT);
+	if (hlist_id < 0 || hlist_id != type_id)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Find the BTF representation of the security_hook_heads member for a member
+ * with a given index in struct security_hook_heads.
+ */
+const struct btf_member *bpf_lsm_head_by_index(struct btf *btf, u32 index)
+{
+	const struct btf_member *member;
+	const struct btf_type *t;
+	u32 off, i;
+	int ret;
+
+	t = btf_type_by_name_kind(btf, "security_hook_heads", BTF_KIND_STRUCT);
+	if (WARN_ON_ONCE(IS_ERR(t)))
+		return ERR_CAST(t);
+
+	for_each_member(i, t, member) {
+		/* We've found the id requested and need to check the
+		 * the following:
+		 *
+		 * - Is it at a valid alignment for struct hlist_head?
+		 *
+		 * - Is it a valid hlist_head struct?
+		 */
+		if (index == i) {
+			off = btf_member_bit_offset(t, member);
+			if (off % 8)
+				/* valid c code cannot generate such btf */
+				return ERR_PTR(-EINVAL);
+			off /= 8;
+
+			if (off % __alignof__(struct hlist_head))
+				return ERR_PTR(-EINVAL);
+
+			ret = validate_hlist_head(btf, member->type);
+			if (ret < 0)
+				return ERR_PTR(ret);
+
+			return member;
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+/* Given an index of a member in security_hook_heads return the
+ * corresponding type for the LSM hook. The members of the union
+ * security_list_options have the same name as the security_hook_heads which
+ * is ensured by the LSM_HOOK_INIT macro defined in include/linux/lsm_hooks.h
+ */
+const struct btf_type *bpf_lsm_type_by_index(struct btf *btf, u32 index)
+{
+	const struct btf_member *member, *hook_head = NULL;
+	const struct btf_type *t, *hook_type = NULL;
+	u32 i;
+
+	hook_head = bpf_lsm_head_by_index(btf, index);
+	if (IS_ERR(hook_head))
+		return ERR_PTR(PTR_ERR(hook_head));
+
+	t = btf_type_by_name_kind(btf, "security_list_options", BTF_KIND_UNION);
+	if (WARN_ON_ONCE(IS_ERR(t)))
+		return ERR_CAST(t);
+
+	for_each_member(i, t, member) {
+		if (hook_head->name_off == member->name_off) {
+			/* There should be only one member with the same name
+			 * as the LSM hook. This should never really happen
+			 * and either indicates malformed BTF or someone trying
+			 * trick the LSM.
+			 */
+			if (WARN_ON(hook_type))
+				return ERR_PTR(-EINVAL);
+
+			hook_type = btf_type_by_id(btf, member->type);
+			if (unlikely(!hook_type))
+				return ERR_PTR(-EINVAL);
+
+			if (!btf_type_is_ptr(hook_type))
+				return ERR_PTR(-EINVAL);
+		}
+	}
+
+	if (!hook_type)
+		return ERR_PTR(-ENOENT);
+
+	t = btf_type_by_id(btf, hook_type->type);
+	if (unlikely(!t))
+		return ERR_PTR(-EINVAL);
+
+	return t;
+}
-- 
2.20.1


^ permalink raw reply related

* [PATCH bpf-next v2 09/10] bpf: lsm: Add selftests for BPF_PROG_TYPE_LSM
From: KP Singh @ 2020-01-15 17:13 UTC (permalink / raw)
  To: linux-kernel, bpf, linux-security-module
  Cc: Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <20200115171333.28811-1-kpsingh@chromium.org>

From: KP Singh <kpsingh@google.com>

* Load a BPF program that audits mprotect calls
* Attach the program to the "file_mprotect" LSM hook
* Initialize the perf events buffer and poll for audit events
* Do an mprotect on some memory allocated on the heap
* Verify if the audit event was received

Signed-off-by: KP Singh <kpsingh@google.com>
---
 MAINTAINERS                                   |  2 +
 tools/testing/selftests/bpf/lsm_helpers.h     | 19 ++++++
 .../bpf/prog_tests/lsm_mprotect_audit.c       | 58 +++++++++++++++++++
 .../selftests/bpf/progs/lsm_mprotect_audit.c  | 48 +++++++++++++++
 4 files changed, 127 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/lsm_helpers.h
 create mode 100644 tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c
 create mode 100644 tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 02d7e05e9b75..5d553c2e7452 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3210,6 +3210,8 @@ L:	bpf@vger.kernel.org
 S:	Maintained
 F:	security/bpf/
 F:	include/linux/bpf_lsm.h
+F:	tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c
+F:	tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c
 
 BROADCOM B44 10/100 ETHERNET DRIVER
 M:	Michael Chan <michael.chan@broadcom.com>
diff --git a/tools/testing/selftests/bpf/lsm_helpers.h b/tools/testing/selftests/bpf/lsm_helpers.h
new file mode 100644
index 000000000000..8bad08f77654
--- /dev/null
+++ b/tools/testing/selftests/bpf/lsm_helpers.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright 2019 Google LLC.
+ */
+#ifndef _LSM_HELPERS_H
+#define _LSM_HELPERS_H
+
+struct lsm_mprotect_audit_result {
+	/* This ensures that the LSM Hook only monitors the PID requested
+	 * by the loader
+	 */
+	__u32 monitored_pid;
+	/* The number of mprotect calls for the monitored PID.
+	 */
+	__u32 mprotect_count;
+};
+
+#endif /* _LSM_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c b/tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c
new file mode 100644
index 000000000000..ff90b874eafc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <malloc.h>
+#include "lsm_helpers.h"
+#include "lsm_mprotect_audit.skel.h"
+
+int heap_mprotect(void)
+{
+	void *buf;
+	long sz;
+
+	sz = sysconf(_SC_PAGESIZE);
+	if (sz < 0)
+		return sz;
+
+	buf = memalign(sz, 2 * sz);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	return mprotect(buf, sz, PROT_READ | PROT_EXEC);
+}
+
+void test_lsm_mprotect_audit(void)
+{
+	struct lsm_mprotect_audit_result *result;
+	struct lsm_mprotect_audit *skel = NULL;
+	int err, duration = 0;
+
+	skel = lsm_mprotect_audit__open_and_load();
+	if (CHECK(!skel, "skel_load", "lsm_mprotect_audit skeleton failed\n"))
+		goto close_prog;
+
+	err = lsm_mprotect_audit__attach(skel);
+	if (CHECK(err, "attach", "lsm_mprotect_audit attach failed: %d\n", err))
+		goto close_prog;
+
+	result = &skel->bss->result;
+	result->monitored_pid = getpid();
+
+	err = heap_mprotect();
+	if (CHECK(err < 0, "heap_mprotect", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	/* Make sure mprotect_audit program was triggered
+	 * and detected an mprotect on the heap.
+	 */
+	CHECK_FAIL(result->mprotect_count != 1);
+
+close_prog:
+	lsm_mprotect_audit__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c b/tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c
new file mode 100644
index 000000000000..f4569b418616
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include "bpf_helpers.h"
+#include "bpf_trace_helpers.h"
+#include "lsm_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct lsm_mprotect_audit_result result = {
+	.mprotect_count = 0,
+	.monitored_pid = 0,
+};
+
+/*
+ * Define some of the structs used in the BPF program.
+ * Only the field names and their sizes need to be the
+ * same as the kernel type, the order is irrelevant.
+ */
+struct mm_struct {
+	unsigned long start_brk, brk;
+} __attribute__((preserve_access_index));
+
+struct vm_area_struct {
+	unsigned long vm_start, vm_end;
+	struct mm_struct *vm_mm;
+} __attribute__((preserve_access_index));
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(mprotect_audit, struct vm_area_struct *vma,
+	     unsigned long reqprot, unsigned long prot)
+{
+	__u32 pid = bpf_get_current_pid_tgid();
+	int is_heap = 0;
+
+	is_heap = (vma->vm_start >= vma->vm_mm->start_brk &&
+		   vma->vm_end <= vma->vm_mm->brk);
+
+	if (is_heap && result.monitored_pid == pid)
+		result.mprotect_count++;
+
+	return 0;
+}
-- 
2.20.1


^ permalink raw reply related

* [PATCH bpf-next v2 10/10] bpf: lsm: Add Documentation
From: KP Singh @ 2020-01-15 17:13 UTC (permalink / raw)
  To: linux-kernel, bpf, linux-security-module
  Cc: Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <20200115171333.28811-1-kpsingh@chromium.org>

From: KP Singh <kpsingh@google.com>

Document how eBPF programs (BPF_PROG_TYPE_LSM) can be loaded and
attached (BPF_LSM_MAC) to the LSM hooks.

Signed-off-by: KP Singh <kpsingh@google.com>
---
 Documentation/security/bpf.rst   | 150 +++++++++++++++++++++++++++++++
 Documentation/security/index.rst |   1 +
 MAINTAINERS                      |   1 +
 3 files changed, 152 insertions(+)
 create mode 100644 Documentation/security/bpf.rst

diff --git a/Documentation/security/bpf.rst b/Documentation/security/bpf.rst
new file mode 100644
index 000000000000..4d115c07c370
--- /dev/null
+++ b/Documentation/security/bpf.rst
@@ -0,0 +1,150 @@
+.. SPDX-License-Identifier: GPL-2.0+
+.. Copyright 2019 Google LLC.
+
+==========================
+eBPF Linux Security Module
+==========================
+
+This LSM allows runtime instrumentation of the LSM hooks by privileged users to
+implement system-wide MAC (Mandatory Access Control) and Audit policies using
+eBPF. The LSM is privileged and stackable and requires both ``CAP_SYS_ADMIN``
+and ``CAP_MAC_ADMIN`` for the loading of BPF programs and modification of MAC
+policies respectively.
+
+eBPF Programs
+==============
+
+`eBPF (extended BPF) <https://cilium.readthedocs.io/en/latest/bpf>`_ is a
+virtual machine-like construct in the Linux Kernel allowing the execution of
+verifiable, just-in-time compiled byte code at various points in the Kernel.
+
+The eBPF LSM adds a new type, ``BPF_PROG_TYPE_LSM``, of eBPF programs which
+have the following characteristics:
+
+	* Multiple eBPF programs can be attached to the same LSM hook
+	* The programs are always run after the static hooks (i.e. the ones
+	  registered by SELinux, AppArmor, Smack etc.)
+	* LSM hooks can return an ``-EPERM`` to indicate the decision of the
+	  MAC policy being enforced or simply be used for auditing
+	* If ``CONFIG_SECURITY_BPF_ENFORCE`` is enabled and a non-zero error
+	  code is returned from the BPF program, no further BPF programs for the hook are executed
+	* Allowing the eBPF programs to be attached to all the LSM hooks by
+	  making :doc:`/bpf/btf` type information available for all LSM hooks
+	  and allowing the BPF verifier to perform runtime relocations and
+	  validation on the programs
+
+Structure
+---------
+
+The example shows an eBPF program that can be attached to the ``file_mprotect``
+LSM hook:
+
+.. c:function:: int file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot);
+
+eBPF programs that use :doc:`/bpf/btf` do not need to include kernel headers
+for accessing information from the attached eBPF program's context. They can
+simply declare the structures in the eBPF program and only specify the fields
+that need to be accessed.
+
+.. code-block:: c
+
+	struct mm_struct {
+		unsigned long start_brk, brk, start_stack;
+	} __attribute__((preserve_access_index));
+
+	struct vm_area_struct {
+		unsigned long start_brk, brk, start_stack;
+		unsigned long vm_start, vm_end;
+		struct mm_struct *vm_mm;
+	} __attribute__((preserve_access_index));
+
+
+.. note:: Only the size and the names of the fields must match the type in the
+	  kernel and the order of the fields is irrelevant.
+
+The eBPF programs can be declared using macros similar to the ``BPF_TRACE_<N>``
+macros defined in `tools/testing/selftests/bpf/bpf_trace_helpers.h`_. In this
+example:
+
+	* The LSM hook takes 3 args so we use ``BPF_TRACE_3``
+	* ``"lsm/file_mprotect"`` indicates the LSM hook that the program must
+	  be attached to
+	* ``mprotect_audit`` is the name of the eBPF program
+
+.. code-block:: c
+
+        SEC("lsm/file_mprotect")
+        int BPF_PROG(mprotect_audit, struct vm_area_struct *vma,
+                     unsigned long reqprot, unsigned long prot)
+	{
+		int is_heap;
+
+		is_heap = (vma->vm_start >= vma->vm_mm->start_brk &&
+			   vma->vm_end <= vma->vm_mm->brk);
+
+		/*
+		 * Return an -EPERM or write information to the perf events buffer
+		 * for auditing
+		 */
+	}
+
+The ``__attribute__((preserve_access_index))`` is a clang feature that allows
+the BPF verifier to update the offsets for the access at runtime using the
+:doc:`/bpf/btf` information. Since the BPF verifier is aware of the types, it
+also validates all the accesses made to the various types in the eBPF program.
+
+Loading
+-------
+
+eBPP programs can be loaded with the :manpage:`bpf(2)` syscall's
+``BPF_PROG_LOAD`` operation or more simply by using the the libbpf helper
+``bpf_prog_load_xattr``:
+
+
+.. code-block:: c
+
+	struct bpf_prog_load_attr attr = {
+		.file = "./prog.o",
+	};
+	struct bpf_object *prog_obj;
+	struct bpf_program *prog;
+	int prog_fd;
+
+	bpf_prog_load_xattr(&attr, &prog_obj, &prog_fd);
+
+Attachment to LSM Hooks
+-----------------------
+
+The LSM allows attachment of eBPF programs as LSM hooks using :manpage:`bpf(2)`
+syscall's ``BPF_PROG_ATTACH`` operation or more simply by
+using the libbpf helper ``bpf_program__attach_lsm``. In the code shown below
+``prog`` is the eBPF program loaded using ``BPF_PROG_LOAD``:
+
+.. code-block:: c
+
+	struct bpf_link *link;
+
+	link = bpf_program__attach_lsm(prog);
+
+The program can be detached from the LSM hook by *destroying* the ``link``
+link returned by ``bpf_program__attach_lsm``:
+
+.. code-block:: c
+
+	link->destroy();
+
+Examples
+--------
+
+An example eBPF program can be found in
+`tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c`_ and the corresponding
+userspace code in
+`tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c`_
+
+.. Links
+.. _tools/testing/selftests/bpf/bpf_trace_helpers.h:
+   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/selftests/bpf/bpf_trace_helpers.h
+.. _tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c:
+   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c
+.. _tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c:
+   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index fc503dd689a7..844463df4547 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -5,6 +5,7 @@ Security Documentation
 .. toctree::
    :maxdepth: 1
 
+   bpf
    credentials
    IMA-templates
    keys/index
diff --git a/MAINTAINERS b/MAINTAINERS
index 5d553c2e7452..dd4c4ee151b0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3212,6 +3212,7 @@ F:	security/bpf/
 F:	include/linux/bpf_lsm.h
 F:	tools/testing/selftests/bpf/progs/lsm_mprotect_audit.c
 F:	tools/testing/selftests/bpf/prog_tests/lsm_mprotect_audit.c
+F:	Documentation/security/bpf.rst
 
 BROADCOM B44 10/100 ETHERNET DRIVER
 M:	Michael Chan <michael.chan@broadcom.com>
-- 
2.20.1


^ permalink raw reply related

* [PATCH bpf-next v2 08/10] tools/libbpf: Add support for BPF_PROG_TYPE_LSM
From: KP Singh @ 2020-01-15 17:13 UTC (permalink / raw)
  To: linux-kernel, bpf, linux-security-module
  Cc: Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <20200115171333.28811-1-kpsingh@chromium.org>

From: KP Singh <kpsingh@google.com>

* Add functionality in libbpf to attach eBPF program to LSM hooks
* Lookup the index of the LSM hook in security_hook_heads and pass it in
  attr->lsm_hook_index

Signed-off-by: KP Singh <kpsingh@google.com>
---
 tools/lib/bpf/bpf.c      |   6 +-
 tools/lib/bpf/bpf.h      |   1 +
 tools/lib/bpf/libbpf.c   | 143 ++++++++++++++++++++++++++++++++++-----
 tools/lib/bpf/libbpf.h   |   4 ++
 tools/lib/bpf/libbpf.map |   3 +
 5 files changed, 138 insertions(+), 19 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 500afe478e94..b138d98ff862 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -235,7 +235,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
-	if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
+
+	if (attr.prog_type == BPF_PROG_TYPE_LSM) {
+		attr.lsm_hook_index = load_attr->lsm_hook_index;
+	} else if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
 		attr.attach_btf_id = load_attr->attach_btf_id;
 	} else if (attr.prog_type == BPF_PROG_TYPE_TRACING) {
 		attr.attach_btf_id = load_attr->attach_btf_id;
@@ -244,6 +247,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 		attr.prog_ifindex = load_attr->prog_ifindex;
 		attr.kern_version = load_attr->kern_version;
 	}
+
 	attr.insn_cnt = (__u32)load_attr->insns_cnt;
 	attr.insns = ptr_to_u64(load_attr->insns);
 	attr.license = ptr_to_u64(load_attr->license);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 56341d117e5b..54458a102939 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ struct bpf_load_program_attr {
 		__u32 prog_ifindex;
 		__u32 attach_btf_id;
 	};
+	__u32 lsm_hook_index;
 	__u32 prog_btf_fd;
 	__u32 func_info_rec_size;
 	const void *func_info;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0c229f00a67e..60737559a9a6 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -229,6 +229,7 @@ struct bpf_program {
 	enum bpf_attach_type expected_attach_type;
 	__u32 attach_btf_id;
 	__u32 attach_prog_fd;
+	__u32 lsm_hook_index;
 	void *func_info;
 	__u32 func_info_rec_size;
 	__u32 func_info_cnt;
@@ -4886,7 +4887,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.insns = insns;
 	load_attr.insns_cnt = insns_cnt;
 	load_attr.license = license;
-	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+
+	if (prog->type == BPF_PROG_TYPE_LSM) {
+		load_attr.lsm_hook_index = prog->lsm_hook_index;
+	} else if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
 		load_attr.attach_btf_id = prog->attach_btf_id;
 	} else if (prog->type == BPF_PROG_TYPE_TRACING) {
 		load_attr.attach_prog_fd = prog->attach_prog_fd;
@@ -4895,6 +4899,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 		load_attr.kern_version = kern_version;
 		load_attr.prog_ifindex = prog->prog_ifindex;
 	}
+
 	/* if .BTF.ext was loaded, kernel supports associated BTF for prog */
 	if (prog->obj->btf_ext)
 		btf_fd = bpf_object__btf_fd(prog->obj);
@@ -4967,9 +4972,11 @@ static int libbpf_find_attach_btf_id(const char *name,
 				     enum bpf_attach_type attach_type,
 				     __u32 attach_prog_fd);
 
+static __s32 btf__find_lsm_hook_index(const char *name);
+
 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 {
-	int err = 0, fd, i, btf_id;
+	int err = 0, fd, i, btf_id, index;
 
 	if (prog->type == BPF_PROG_TYPE_TRACING) {
 		btf_id = libbpf_find_attach_btf_id(prog->section_name,
@@ -4980,6 +4987,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 		prog->attach_btf_id = btf_id;
 	}
 
+	if (prog->type == BPF_PROG_TYPE_LSM) {
+		index = btf__find_lsm_hook_index(prog->section_name);
+		if (index < 0)
+			return index;
+		prog->lsm_hook_index = index;
+	}
+
 	if (prog->instances.nr < 0 || !prog->instances.fds) {
 		if (prog->preprocessor) {
 			pr_warn("Internal error: can't load program '%s'\n",
@@ -6207,6 +6221,7 @@ bool bpf_program__is_##NAME(const struct bpf_program *prog)	\
 }								\
 
 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
@@ -6272,6 +6287,8 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
 				      struct bpf_program *prog);
 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
 				     struct bpf_program *prog);
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+				   struct bpf_program *prog);
 
 struct bpf_sec_def {
 	const char *sec;
@@ -6315,12 +6332,17 @@ static const struct bpf_sec_def section_defs[] = {
 		.expected_attach_type = BPF_TRACE_FEXIT,
 		.is_attach_btf = true,
 		.attach_fn = attach_trace),
+	SEC_DEF("lsm/", LSM,
+		.expected_attach_type = BPF_LSM_MAC,
+		.attach_fn = attach_lsm),
 	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
 	BPF_PROG_SEC("lwt_out",			BPF_PROG_TYPE_LWT_OUT),
 	BPF_PROG_SEC("lwt_xmit",		BPF_PROG_TYPE_LWT_XMIT),
 	BPF_PROG_SEC("lwt_seg6local",		BPF_PROG_TYPE_LWT_SEG6LOCAL),
+	BPF_PROG_BTF("lsm/",			BPF_PROG_TYPE_LSM,
+						BPF_LSM_MAC),
 	BPF_APROG_SEC("cgroup_skb/ingress",	BPF_PROG_TYPE_CGROUP_SKB,
 						BPF_CGROUP_INET_INGRESS),
 	BPF_APROG_SEC("cgroup_skb/egress",	BPF_PROG_TYPE_CGROUP_SKB,
@@ -6576,32 +6598,80 @@ static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
 	return -EINVAL;
 }
 
-#define BTF_PREFIX "btf_trace_"
+#define BTF_TRACE_PREFIX "btf_trace_"
+
+static inline int btf__find_by_prefix_kind(struct btf *btf, const char *name,
+					   const char *prefix, __u32 kind)
+{
+	char btf_type_name[128];
+
+	snprintf(btf_type_name, sizeof(btf_type_name), "%s%s", prefix, name);
+	return btf__find_by_name_kind(btf, btf_type_name, kind);
+}
+
+static __s32 btf__find_lsm_hook_index(const char *name)
+{
+	struct btf *btf = bpf_find_kernel_btf();
+	const struct bpf_sec_def *sec_def;
+	const struct btf_type *hl_type;
+	struct btf_member *m;
+	__u16 vlen;
+	__s32 hl_id;
+	int j;
+
+	sec_def = find_sec_def(name);
+	if (!sec_def)
+		return -ESRCH;
+
+	name += sec_def->len;
+
+	hl_id = btf__find_by_name_kind(btf, "security_hook_heads",
+				       BTF_KIND_STRUCT);
+	if (hl_id < 0) {
+		pr_debug("security_hook_heads cannot be found in BTF\n");
+		return hl_id;
+	}
+
+	hl_type = btf__type_by_id(btf, hl_id);
+	if (!hl_type) {
+		pr_warn("Can't find type for security_hook_heads: %u\n", hl_id);
+		return -EINVAL;
+	}
+
+	m = btf_members(hl_type);
+	vlen = btf_vlen(hl_type);
+
+	for (j = 0; j < vlen; j++) {
+		if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
+			return j + 1;
+		m++;
+	}
+
+	pr_warn("Cannot find offset for %s in security_hook_heads\n", name);
+	return -ENOENT;
+}
+
 int libbpf_find_vmlinux_btf_id(const char *name,
 			       enum bpf_attach_type attach_type)
 {
 	struct btf *btf = bpf_find_kernel_btf();
-	char raw_tp_btf[128] = BTF_PREFIX;
-	char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1;
-	const char *btf_name;
 	int err = -EINVAL;
-	__u32 kind;
 
 	if (IS_ERR(btf)) {
 		pr_warn("vmlinux BTF is not found\n");
 		return -EINVAL;
 	}
 
-	if (attach_type == BPF_TRACE_RAW_TP) {
-		/* prepend "btf_trace_" prefix per kernel convention */
-		strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX));
-		btf_name = raw_tp_btf;
-		kind = BTF_KIND_TYPEDEF;
-	} else {
-		btf_name = name;
-		kind = BTF_KIND_FUNC;
-	}
-	err = btf__find_by_name_kind(btf, btf_name, kind);
+	if (attach_type == BPF_TRACE_RAW_TP)
+		err = btf__find_by_prefix_kind(btf, name, BTF_TRACE_PREFIX,
+					       BTF_KIND_TYPEDEF);
+	else
+		err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+
+	/* err = 0 means void / UNKNOWN which is treated as an error */
+	if (err == 0)
+		err = -EINVAL;
+
 	btf__free(btf);
 	return err;
 }
@@ -6630,7 +6700,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 	}
 	err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 	btf__free(btf);
-	if (err <= 0) {
+	if (err < 0) {
 		pr_warn("%s is not found in prog's BTF\n", name);
 		goto out;
 	}
@@ -7395,6 +7465,43 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
 	return bpf_program__attach_trace(prog);
 }
 
+struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link_fd *link;
+	int prog_fd, pfd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("program '%s': can't attach before loaded\n",
+			bpf_program__title(prog, false));
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->link.detach = &bpf_link__detach_fd;
+
+	pfd = bpf_prog_attach(prog_fd, 0, BPF_LSM_MAC,
+			      BPF_F_ALLOW_OVERRIDE);
+	if (pfd < 0) {
+		pfd = -errno;
+		pr_warn("program '%s': failed to attach: %s\n",
+			bpf_program__title(prog, false),
+			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link->fd = pfd;
+	return (struct bpf_link *)link;
+}
+
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+				   struct bpf_program *prog)
+{
+	return bpf_program__attach_lsm(prog);
+}
+
 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
 {
 	const struct bpf_sec_def *sec_def;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 01639f9a1062..a97e709a29e6 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -241,6 +241,8 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_trace(struct bpf_program *prog);
 struct bpf_map;
 LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_lsm(struct bpf_program *prog);
 struct bpf_insn;
 
 /*
@@ -318,6 +320,7 @@ LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
 
 LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@@ -339,6 +342,7 @@ LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
 
 /*
  * No need for __attribute__((packed)), all members of 'bpf_map_def'
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index a19f04e6e3d9..3da0452ce679 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -227,4 +227,7 @@ LIBBPF_0.0.7 {
 		bpf_program__is_struct_ops;
 		bpf_program__set_struct_ops;
 		btf__align_of;
+		bpf_program__is_lsm;
+		bpf_program__set_lsm;
+		bpf_program__attach_lsm;
 } LIBBPF_0.0.6;
-- 
2.20.1


^ permalink raw reply related

* [PATCH bpf-next v2 06/10] bpf: lsm: Implement attach, detach and execution
From: KP Singh @ 2020-01-15 17:13 UTC (permalink / raw)
  To: linux-kernel, bpf, linux-security-module
  Cc: Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <20200115171333.28811-1-kpsingh@chromium.org>

From: KP Singh <kpsingh@google.com>

JITed BPF programs are used by the BPF LSM as dynamically allocated
security hooks. arch_bpf_prepare_trampoline handles the
arch_bpf_prepare_trampoline generates code to handle conversion of the
signature of the hook to the BPF context and allows the BPF program to
be called directly as a C function.

The following permissions are required to attach a program to a hook:

- CAP_SYS_ADMIN to load the program
- CAP_MAC_ADMIN to attach it (i.e. to update the security policy)

When the program is loaded (BPF_PROG_LOAD), the verifier receives the
index (one-based) of the member in the security_hook_heads struct in the
prog->aux->lsm_hook_index and uses the BTF API provided by the LSM to:

- Populate the name of the hook in prog->aux->attach_func_name and
  the prototype in prog->aux->attach_func_proto.
- Verify if the offset is valid for a type struct hlist_head.
- The program is verified for accesses based on the attach_func_proto
  similar to raw_tp BPF programs.

When an attachment (BPF_PROG_ATTACH) is requested:

- The information required to set-up of a callback is populated in the
  struct bpf_lsm_list.
- A new callback and a bpf_lsm_hook is allocated and the address of
  the hook is set to the be the address of the allocated callback.
- The attachment returns an anonymous O_CLOEXEC fd which detaches the
  program on close.

Signed-off-by: KP Singh <kpsingh@google.com>
---
 include/linux/bpf.h            |   4 +
 include/linux/bpf_lsm.h        |  15 +++
 include/uapi/linux/bpf.h       |   4 +
 kernel/bpf/syscall.c           |  47 ++++++-
 kernel/bpf/verifier.c          |  74 +++++++++--
 security/bpf/Kconfig           |   1 +
 security/bpf/Makefile          |   2 +
 security/bpf/hooks.c           | 230 +++++++++++++++++++++++++++++++--
 security/bpf/include/bpf_lsm.h |  75 +++++++++++
 security/bpf/lsm.c             |  60 +++++++++
 security/bpf/ops.c             |   2 +
 tools/include/uapi/linux/bpf.h |   4 +
 12 files changed, 494 insertions(+), 24 deletions(-)
 create mode 100644 security/bpf/include/bpf_lsm.h

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aed2bc39d72b..5ed4780c2091 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -599,6 +599,10 @@ struct bpf_prog_aux {
 	u32 func_cnt; /* used by non-func prog as the number of func progs */
 	u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
 	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+	/* Index (one-based) of the hlist_head in security_hook_heads to which
+	 * the program must be attached.
+	 */
+	u32 lsm_hook_index;
 	struct bpf_prog *linked_prog;
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index a9b4f7b41c65..8c86672437f0 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -19,8 +19,11 @@ extern struct security_hook_heads bpf_lsm_hook_heads;
 
 int bpf_lsm_srcu_read_lock(void);
 void bpf_lsm_srcu_read_unlock(int idx);
+int bpf_lsm_verify_prog(const struct bpf_prog *prog);
 const struct btf_type *bpf_lsm_type_by_index(struct btf *btf, u32 offset);
 const struct btf_member *bpf_lsm_head_by_index(struct btf *btf, u32 id);
+int bpf_lsm_attach(struct bpf_prog *prog);
+int bpf_lsm_detach(struct bpf_prog *prog);
 
 #define CALL_BPF_LSM_VOID_HOOKS(FUNC, ...)			\
 	do {							\
@@ -67,6 +70,10 @@ static inline int bpf_lsm_srcu_read_lock(void)
 	return 0;
 }
 static inline void bpf_lsm_srcu_read_unlock(int idx) {}
+static inline int bpf_lsm_verify_prog(const struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
 static inline const struct btf_type *bpf_lsm_type_by_index(
 	struct btf *btf, u32 index)
 {
@@ -77,6 +84,14 @@ static inline const struct btf_member *bpf_lsm_head_by_index(
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+static inline int bpf_lsm_attach(struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
+static inline int bpf_lsm_detach(struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
 
 #endif /* CONFIG_SECURITY_BPF */
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b6a725a8a21d..fa5513cb3332 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -448,6 +448,10 @@ union bpf_attr {
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
 		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
+		/* Index (one-based) of the hlist_head in security_hook_heads to
+		 * which the program must be attached.
+		 */
+		__u32		lsm_hook_index;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2945739618c9..a54eb1032fdd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/bpf_lirc.h>
+#include <linux/bpf_lsm.h>
 #include <linux/btf.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
@@ -1751,7 +1752,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
+#define	BPF_PROG_LOAD_LAST_FIELD lsm_hook_index
 
 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 {
@@ -1805,6 +1806,10 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 	prog->expected_attach_type = attr->expected_attach_type;
 	prog->aux->attach_btf_id = attr->attach_btf_id;
+
+	if (type == BPF_PROG_TYPE_LSM)
+		prog->aux->lsm_hook_index = attr->lsm_hook_index;
+
 	if (attr->attach_prog_fd) {
 		struct bpf_prog *tgt_prog;
 
@@ -1970,6 +1975,44 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
 	return err;
 }
 
+static int bpf_lsm_prog_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_prog *prog = filp->private_data;
+
+	WARN_ON_ONCE(bpf_lsm_detach(prog));
+	bpf_prog_put(prog);
+	return 0;
+}
+
+static const struct file_operations bpf_lsm_prog_fops = {
+	.release	= bpf_lsm_prog_release,
+	.read		= bpf_dummy_read,
+	.write		= bpf_dummy_write,
+};
+
+static int bpf_lsm_prog_attach(struct bpf_prog *prog)
+{
+	int ret;
+
+	if (prog->expected_attach_type != BPF_LSM_MAC)
+		return -EINVAL;
+
+	/* The attach increments the references to the program which is
+	 * decremented on detach as a part of bpf_lsm_hook_free.
+	 */
+	ret = bpf_lsm_attach(prog);
+	if (ret)
+		return ret;
+
+	ret = anon_inode_getfd("bpf-lsm-prog", &bpf_lsm_prog_fops,
+				prog, O_CLOEXEC);
+	if (ret < 0) {
+		bpf_lsm_detach(prog);
+		return ret;
+	}
+	return 0;
+}
+
 struct bpf_raw_tracepoint {
 	struct bpf_raw_event_map *btp;
 	struct bpf_prog *prog;
@@ -2186,7 +2229,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		ret = lirc_prog_attach(attr, prog);
 		break;
 	case BPF_PROG_TYPE_LSM:
-		ret = -EINVAL;
+		ret = bpf_lsm_prog_attach(prog);
 		break;
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ca17dccc17ba..10f44c3b15ec 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19,6 +19,7 @@
 #include <linux/sort.h>
 #include <linux/perf_event.h>
 #include <linux/ctype.h>
+#include <linux/bpf_lsm.h>
 
 #include "disasm.h"
 
@@ -6393,8 +6394,9 @@ static int check_return_code(struct bpf_verifier_env *env)
 	struct tnum range = tnum_range(0, 1);
 	int err;
 
-	/* The struct_ops func-ptr's return type could be "void" */
-	if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
+	/* LSM and struct_ops func-ptr's return type could be "void" */
+	if ((env->prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+	     env->prog->type == BPF_PROG_TYPE_LSM) &&
 	    !prog->aux->attach_func_proto->type)
 		return 0;
 
@@ -9734,7 +9736,51 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
 	return 0;
 }
 
-static int check_attach_btf_id(struct bpf_verifier_env *env)
+/* BPF_PROG_TYPE_LSM programs pass the member index of the LSM hook in the
+ * security_hook_heads as the lsm_hook_index. The verifier determines the
+ * name and the prototype for the LSM hook using the information in
+ * security_list_options, validates if the offset is a valid hlist_head, and
+ * updates the attach_btf_id to the byte offset in the security_hook_heads
+ * struct.
+ */
+static inline int check_attach_btf_id_lsm(struct bpf_verifier_env *env)
+{
+	struct bpf_prog *prog = env->prog;
+	u32 index = prog->aux->lsm_hook_index;
+	const struct btf_member *head;
+	const struct btf_type *t;
+	const char *tname;
+	int ret;
+
+	ret = bpf_lsm_verify_prog(prog);
+	if (ret < 0)
+		return -EINVAL;
+
+	t = bpf_lsm_type_by_index(btf_vmlinux, index);
+	if (!t) {
+		verbose(env, "unable to find security_list_option for index %u in security_hook_heads\n", index);
+		return -EINVAL;
+	}
+
+	if (!btf_type_is_func_proto(t))
+		return -EINVAL;
+
+	head = bpf_lsm_head_by_index(btf_vmlinux, index);
+	if (IS_ERR(head)) {
+		verbose(env, "no security_hook_heads index = %u\n", index);
+		return PTR_ERR(head);
+	}
+
+	tname = btf_name_by_offset(btf_vmlinux, head->name_off);
+	if (!tname || !tname[0])
+		return -EINVAL;
+
+	prog->aux->attach_func_name = tname;
+	prog->aux->attach_func_proto = t;
+	return 0;
+}
+
+static int check_attach_btf_id_tracing(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog;
 	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
@@ -9749,12 +9795,6 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	long addr;
 	u64 key;
 
-	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
-		return check_struct_ops_btf_id(env);
-
-	if (prog->type != BPF_PROG_TYPE_TRACING)
-		return 0;
-
 	if (!btf_id) {
 		verbose(env, "Tracing programs must provide btf_id\n");
 		return -EINVAL;
@@ -9895,6 +9935,22 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	}
 }
 
+static int check_attach_btf_id(struct bpf_verifier_env *env)
+{
+	struct bpf_prog *prog = env->prog;
+
+	switch (prog->type) {
+	case BPF_PROG_TYPE_TRACING:
+		return check_attach_btf_id_tracing(env);
+	case BPF_PROG_TYPE_STRUCT_OPS:
+		return check_struct_ops_btf_id(env);
+	case BPF_PROG_TYPE_LSM:
+		return check_attach_btf_id_lsm(env);
+	default:
+		return 0;
+	}
+}
+
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	      union bpf_attr __user *uattr)
 {
diff --git a/security/bpf/Kconfig b/security/bpf/Kconfig
index 9438d899b618..a915c549f4b8 100644
--- a/security/bpf/Kconfig
+++ b/security/bpf/Kconfig
@@ -8,6 +8,7 @@ config SECURITY_BPF
 	depends on BPF_SYSCALL
 	depends on SRCU
 	depends on DEBUG_INFO_BTF
+	depends on BPF_JIT && HAVE_EBPF_JIT
 	help
 	  This enables instrumentation of the security hooks with
 	  eBPF programs.
diff --git a/security/bpf/Makefile b/security/bpf/Makefile
index c526927c337d..748b9b7d4bc7 100644
--- a/security/bpf/Makefile
+++ b/security/bpf/Makefile
@@ -3,3 +3,5 @@
 # Copyright 2019 Google LLC.
 
 obj-$(CONFIG_SECURITY_BPF) := lsm.o ops.o hooks.o
+
+ccflags-y := -I$(srctree)/security/bpf -I$(srctree)/security/bpf/include
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index 82725611693d..4e71da0e8e9e 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -6,9 +6,14 @@
 
 #include <linux/bpf_lsm.h>
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
 #include <linux/btf.h>
 #include <linux/srcu.h>
 
+#include "bpf_lsm.h"
+
+#define SECURITY_LIST_HEAD(off) ((void *)&bpf_lsm_hook_heads + off)
+
 DEFINE_STATIC_SRCU(security_hook_srcu);
 
 int bpf_lsm_srcu_read_lock(void)
@@ -32,21 +37,36 @@ static inline int validate_hlist_head(struct btf *btf, u32 type_id)
 	return 0;
 }
 
+int bpf_lsm_verify_prog(const struct bpf_prog *prog)
+{
+	u32 index = prog->aux->lsm_hook_index;
+	struct bpf_verifier_log log = {};
+
+	if (!prog->gpl_compatible) {
+		bpf_log(&log,
+			"LSM programs must have a GPL compatible license\n");
+		return -EINVAL;
+	}
+
+	if (index < 1 || index > bpf_lsm_info.num_hooks) {
+		bpf_log(&log, "lsm_hook_index should be between 1 and %lu\n",
+			bpf_lsm_info.num_hooks);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* Find the BTF representation of the security_hook_heads member for a member
  * with a given index in struct security_hook_heads.
  */
 const struct btf_member *bpf_lsm_head_by_index(struct btf *btf, u32 index)
 {
 	const struct btf_member *member;
-	const struct btf_type *t;
 	u32 off, i;
 	int ret;
 
-	t = btf_type_by_name_kind(btf, "security_hook_heads", BTF_KIND_STRUCT);
-	if (WARN_ON_ONCE(IS_ERR(t)))
-		return ERR_CAST(t);
-
-	for_each_member(i, t, member) {
+	for_each_member(i, bpf_lsm_info.btf_hook_heads, member) {
 		/* We've found the id requested and need to check the
 		 * the following:
 		 *
@@ -54,8 +74,9 @@ const struct btf_member *bpf_lsm_head_by_index(struct btf *btf, u32 index)
 		 *
 		 * - Is it a valid hlist_head struct?
 		 */
-		if (index == i) {
-			off = btf_member_bit_offset(t, member);
+		if (index == i + 1) {
+			off = btf_member_bit_offset(
+				bpf_lsm_info.btf_hook_heads, member);
 			if (off % 8)
 				/* valid c code cannot generate such btf */
 				return ERR_PTR(-EINVAL);
@@ -90,11 +111,7 @@ const struct btf_type *bpf_lsm_type_by_index(struct btf *btf, u32 index)
 	if (IS_ERR(hook_head))
 		return ERR_PTR(PTR_ERR(hook_head));
 
-	t = btf_type_by_name_kind(btf, "security_list_options", BTF_KIND_UNION);
-	if (WARN_ON_ONCE(IS_ERR(t)))
-		return ERR_CAST(t);
-
-	for_each_member(i, t, member) {
+	for_each_member(i, bpf_lsm_info.btf_hook_types, member) {
 		if (hook_head->name_off == member->name_off) {
 			/* There should be only one member with the same name
 			 * as the LSM hook. This should never really happen
@@ -122,3 +139,190 @@ const struct btf_type *bpf_lsm_type_by_index(struct btf *btf, u32 index)
 
 	return t;
 }
+
+static void *bpf_lsm_get_func_addr(struct security_hook_list *s,
+				   const char *name)
+{
+	const struct btf_member *member;
+	void *addr = NULL;
+	s32 i;
+
+	for_each_member(i, bpf_lsm_info.btf_hook_types, member) {
+		if (!strncmp(btf_name_by_offset(btf_vmlinux, member->name_off),
+				name, strlen(name) + 1)) {
+			/* There should be only one member with the same name
+			 * as the LSM hook.
+			 */
+			if (WARN_ON(addr))
+				return ERR_PTR(-EINVAL);
+			addr = (void *)&s->hook + member->offset / 8;
+		}
+	}
+
+	if (!addr)
+		return ERR_PTR(-ENOENT);
+	return addr;
+}
+
+static struct bpf_lsm_list *bpf_lsm_list_lookup(struct bpf_prog *prog)
+{
+	u32 index = prog->aux->lsm_hook_index;
+	struct bpf_verifier_log bpf_log = {};
+	const struct btf_member *head;
+	struct bpf_lsm_list *list;
+	int ret = 0;
+
+	list = &bpf_lsm_info.hook_lists[index - 1];
+
+	mutex_lock(&list->mutex);
+
+	if (list->initialized)
+		goto unlock;
+
+	list->attach_type = prog->aux->attach_func_proto;
+
+	ret = btf_distill_func_proto(&bpf_log, btf_vmlinux, list->attach_type,
+				     prog->aux->attach_func_name,
+				     &list->func_model);
+	if (ret)
+		goto unlock;
+
+	head = bpf_lsm_head_by_index(btf_vmlinux, index);
+	if (IS_ERR(head)) {
+		ret = PTR_ERR(head);
+		goto unlock;
+	}
+
+	list->security_list_head = SECURITY_LIST_HEAD(head->offset / 8);
+	list->initialized = true;
+unlock:
+	mutex_unlock(&list->mutex);
+	if (ret)
+		return ERR_PTR(ret);
+	return list;
+}
+
+static struct bpf_lsm_hook *bpf_lsm_hook_alloc(
+	struct bpf_lsm_list *list, struct bpf_prog *prog)
+{
+	struct bpf_lsm_hook *hook;
+	void *image;
+	int ret = 0;
+
+	image = bpf_jit_alloc_exec(PAGE_SIZE);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	set_vm_flush_reset_perms(image);
+
+	ret = arch_prepare_bpf_trampoline(image, image + PAGE_SIZE,
+		&list->func_model, 0, &prog, 1, NULL, 0, NULL);
+	if (ret < 0) {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	hook = kzalloc(sizeof(struct bpf_lsm_hook), GFP_KERNEL);
+	if (!hook) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	hook->image = image;
+	hook->prog = prog;
+	bpf_prog_inc(prog);
+	return hook;
+error:
+	bpf_jit_free_exec(image);
+	return ERR_PTR(ret);
+}
+
+static void bpf_lsm_hook_free(struct bpf_lsm_hook *tr)
+{
+	if (!tr)
+		return;
+
+	if (tr->prog)
+		bpf_prog_put(tr->prog);
+
+	bpf_jit_free_exec(tr->image);
+	kfree(tr);
+}
+
+int bpf_lsm_attach(struct bpf_prog *prog)
+{
+	struct bpf_lsm_hook *hook;
+	struct bpf_lsm_list *list;
+	void **addr;
+	int ret = 0;
+
+	/* Only CAP_MAC_ADMIN users are allowed to make changes to LSM hooks
+	 */
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (!bpf_lsm_info.initialized)
+		return -EBUSY;
+
+	list = bpf_lsm_list_lookup(prog);
+	if (IS_ERR(list))
+		return PTR_ERR(list);
+
+	hook = bpf_lsm_hook_alloc(list, prog);
+	if (IS_ERR(hook))
+		return PTR_ERR(hook);
+
+	hook->sec_hook.head = list->security_list_head;
+	addr = bpf_lsm_get_func_addr(&hook->sec_hook,
+				     prog->aux->attach_func_name);
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto error;
+	}
+
+	*addr = hook->image;
+
+	mutex_lock(&list->mutex);
+	hlist_add_tail_rcu(&hook->sec_hook.list, hook->sec_hook.head);
+	mutex_unlock(&list->mutex);
+	return 0;
+
+error:
+	bpf_lsm_hook_free(hook);
+	return ret;
+}
+
+int bpf_lsm_detach(struct bpf_prog *prog)
+{
+	struct security_hook_list *sec_hook;
+	struct bpf_lsm_hook *hook = NULL;
+	struct bpf_lsm_list *list;
+	struct hlist_node *n;
+
+	/* Only CAP_MAC_ADMIN users are allowed to make changes to LSM hooks
+	 */
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (!bpf_lsm_info.initialized)
+		return -EBUSY;
+
+	list = &bpf_lsm_info.hook_lists[prog->aux->lsm_hook_index - 1];
+
+	mutex_lock(&list->mutex);
+	hlist_for_each_entry_safe(sec_hook, n, list->security_list_head, list) {
+		hook = container_of(sec_hook, struct bpf_lsm_hook, sec_hook);
+		if (hook->prog == prog) {
+			hlist_del_rcu(&hook->sec_hook.list);
+			break;
+		}
+	}
+	mutex_unlock(&list->mutex);
+	/* call_rcu is not used directly as module_memfree cannot run from an
+	 * softirq context. The best way would be to schedule this on a work
+	 * queue.
+	 */
+	synchronize_srcu(&security_hook_srcu);
+	bpf_lsm_hook_free(hook);
+	return 0;
+}
diff --git a/security/bpf/include/bpf_lsm.h b/security/bpf/include/bpf_lsm.h
new file mode 100644
index 000000000000..849f7f64d7f2
--- /dev/null
+++ b/security/bpf/include/bpf_lsm.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_LSM_H
+#define _BPF_LSM_H
+
+#include <linux/filter.h>
+#include <linux/lsm_hooks.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+
+
+struct bpf_lsm_hook {
+	/* The security_hook_list is initialized dynamically. These are
+	 * initialized in static LSMs by LSM_HOOK_INIT.
+	 */
+	struct security_hook_list sec_hook;
+	/* The BPF program for which this hook was allocated, this is used upon
+	 * detachment to find the hook corresponding to a program.
+	 */
+	struct bpf_prog *prog;
+	/* The address of the allocated function */
+	void *image;
+};
+
+/* The list represents the list of hooks attached to a particular
+ * security_list_head and contains information required for attaching and
+ * detaching BPF Programs.
+ */
+struct bpf_lsm_list {
+	/* Used on the first attached BPF program to populate the remaining
+	 * information
+	 */
+	bool initialized;
+	/* This mutex is used to serialize accesses to all the fields in
+	 * this structure.
+	 */
+	struct mutex mutex;
+	/* The BTF type for this hook.
+	 */
+	const struct btf_type *attach_type;
+	/* func_model for the setup of the callback.
+	 */
+	struct btf_func_model func_model;
+	/* The list of functions currently associated with the LSM hook.
+	 */
+	struct list_head callback_list;
+	/* The head to which the allocated hooks must be attached to.
+	 */
+	struct hlist_head *security_list_head;
+};
+
+struct bpf_lsm_info {
+	/* Dynamic Hooks can only be attached after the LSM is initialized.
+	 */
+	bool initialized;
+	/* The number of hooks is calculated at runtime using the BTF
+	 * information of the struct security_hook_heads.
+	 */
+	size_t num_hooks;
+	/* The hook_lists is allocated during __init and mutexes for each
+	 * allocated on __init, the remaining initialization happens when a
+	 * BPF program is attached to the list.
+	 */
+	struct bpf_lsm_list *hook_lists;
+	/* BTF type for security_hook_heads populated at init.
+	 */
+	const struct btf_type *btf_hook_heads;
+	/* BTF type for security_list_options populated at init.
+	 */
+	const struct btf_type *btf_hook_types;
+};
+
+extern struct bpf_lsm_info bpf_lsm_info;
+
+#endif /* _BPF_LSM_H */
diff --git a/security/bpf/lsm.c b/security/bpf/lsm.c
index d4ea6aa9ddb8..3bab187e7574 100644
--- a/security/bpf/lsm.c
+++ b/security/bpf/lsm.c
@@ -7,6 +7,8 @@
 #include <linux/bpf_lsm.h>
 #include <linux/lsm_hooks.h>
 
+#include "bpf_lsm.h"
+
 /* This is only for internal hooks, always statically shipped as part of the
  * BPF LSM. Statically defined hooks are appended to the security_hook_heads
  * which is common for LSMs and R/O after init.
@@ -19,6 +21,64 @@ static struct security_hook_list lsm_hooks[] __lsm_ro_after_init = {};
  */
 struct security_hook_heads bpf_lsm_hook_heads;
 
+/* Security hooks registered dynamically by the BPF LSM and must be accessed
+ * by holding bpf_lsm_srcu_read_lock and bpf_lsm_srcu_read_unlock.
+ */
+struct bpf_lsm_info bpf_lsm_info;
+
+static __init int init_lsm_info(void)
+{
+	const struct btf_type *t;
+	size_t num_hooks;
+	int i;
+
+	if (!btf_vmlinux)
+		/* No need to grab any locks because we are still in init */
+		btf_vmlinux = btf_parse_vmlinux();
+
+	if (IS_ERR(btf_vmlinux)) {
+		pr_err("btf_vmlinux is malformed\n");
+		return PTR_ERR(btf_vmlinux);
+	}
+
+	t = btf_type_by_name_kind(btf_vmlinux, "security_hook_heads",
+				  BTF_KIND_STRUCT);
+	if (WARN_ON(IS_ERR(t)))
+		return PTR_ERR(t);
+
+	num_hooks = btf_type_vlen(t);
+	if (num_hooks <= 0)
+		return -EINVAL;
+
+	bpf_lsm_info.num_hooks = num_hooks;
+	bpf_lsm_info.btf_hook_heads = t;
+
+	t = btf_type_by_name_kind(btf_vmlinux, "security_list_options",
+				  BTF_KIND_UNION);
+	if (WARN_ON(IS_ERR(t)))
+		return PTR_ERR(t);
+
+	bpf_lsm_info.btf_hook_types = t;
+
+	bpf_lsm_info.hook_lists = kcalloc(num_hooks,
+		sizeof(struct bpf_lsm_list), GFP_KERNEL);
+	if (!bpf_lsm_info.hook_lists)
+		return -ENOMEM;
+
+	/* The mutex needs to be initialized at init as it must be held
+	 * when mutating the list. The rest of the information in the list
+	 * is populated lazily when the first LSM hook callback is appeneded
+	 * to the list.
+	 */
+	for (i = 0; i < num_hooks; i++)
+		mutex_init(&bpf_lsm_info.hook_lists[i].mutex);
+
+	bpf_lsm_info.initialized = true;
+	return 0;
+}
+
+late_initcall(init_lsm_info);
+
 static int __init lsm_init(void)
 {
 	security_add_hooks(lsm_hooks, ARRAY_SIZE(lsm_hooks), "bpf");
diff --git a/security/bpf/ops.c b/security/bpf/ops.c
index 81c2bd9c0495..9a26633ac6f3 100644
--- a/security/bpf/ops.c
+++ b/security/bpf/ops.c
@@ -6,6 +6,7 @@
 
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
 
 const struct bpf_prog_ops lsm_prog_ops = {
 };
@@ -25,4 +26,5 @@ static const struct bpf_func_proto *get_bpf_func_proto(
 
 const struct bpf_verifier_ops lsm_verifier_ops = {
 	.get_func_proto = get_bpf_func_proto,
+	.is_valid_access = btf_ctx_access,
 };
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b6a725a8a21d..fa5513cb3332 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -448,6 +448,10 @@ union bpf_attr {
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
 		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
+		/* Index (one-based) of the hlist_head in security_hook_heads to
+		 * which the program must be attached.
+		 */
+		__u32		lsm_hook_index;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-- 
2.20.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox