Linux Security Modules development
 help / color / mirror / Atom feed
* [PATCH v6 03/10] proc: move /proc/{self|thread-self} dentries to proc_fs_info
From: Alexey Gladkov @ 2019-12-25 12:51 UTC (permalink / raw)
  To: LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module
  Cc: Akinobu Mita, Alexander Viro, Alexey Dobriyan, Alexey Gladkov,
	Andrew Morton, Andy Lutomirski, Daniel Micay, Djalal Harouni,
	Dmitry V . Levin, Eric W . Biederman, Greg Kroah-Hartman,
	Ingo Molnar, J . Bruce Fields, Jeff Layton, Jonathan Corbet,
	Kees Cook, Linus Torvalds, Oleg Nesterov, Solar Designer,
	Stephen Rothwell
In-Reply-To: <20191225125151.1950142-1-gladkov.alexey@gmail.com>

This is a preparation patch that moves /proc/{self|thread-self} dentries
to be stored inside procfs fs_info struct instead of making them per pid
namespace. Since we want to support multiple procfs instances we need to
make sure that these dentries are also per-superblock instead of
per-pidns, unmounting a private procfs won't clash with other procfs
mounts.

Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 fs/proc/base.c                | 5 +++--
 fs/proc/root.c                | 8 ++++----
 fs/proc/self.c                | 4 ++--
 fs/proc/thread_self.c         | 6 +++---
 include/linux/pid_namespace.h | 4 +---
 include/linux/proc_fs.h       | 2 ++
 6 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 672e71c52dbd..1eb366ad8b06 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3316,6 +3316,7 @@ static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter ite
 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct tgid_iter iter;
+	struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
 	struct pid_namespace *ns = proc_pid_ns(file_inode(file));
 	loff_t pos = ctx->pos;
 
@@ -3323,13 +3324,13 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 		return 0;
 
 	if (pos == TGID_OFFSET - 2) {
-		struct inode *inode = d_inode(ns->proc_self);
+		struct inode *inode = d_inode(fs_info->proc_self);
 		if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
 			return 0;
 		ctx->pos = pos = pos + 1;
 	}
 	if (pos == TGID_OFFSET - 1) {
-		struct inode *inode = d_inode(ns->proc_thread_self);
+		struct inode *inode = d_inode(fs_info->proc_thread_self);
 		if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
 			return 0;
 		ctx->pos = pos = pos + 1;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index d449f095f0f7..637e26cc795e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -210,10 +210,10 @@ static void proc_kill_sb(struct super_block *sb)
 {
 	struct proc_fs_info *fs_info = proc_sb_info(sb);
 
-	if (fs_info->pid_ns->proc_self)
-		dput(fs_info->pid_ns->proc_self);
-	if (fs_info->pid_ns->proc_thread_self)
-		dput(fs_info->pid_ns->proc_thread_self);
+	if (fs_info->proc_self)
+		dput(fs_info->proc_self);
+	if (fs_info->proc_thread_self)
+		dput(fs_info->proc_thread_self);
 	kill_anon_super(sb);
 	put_pid_ns(fs_info->pid_ns);
 	kfree(fs_info);
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 57c0a1047250..846fc2b7c8a8 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init;
 int proc_setup_self(struct super_block *s)
 {
 	struct inode *root_inode = d_inode(s->s_root);
-	struct pid_namespace *ns = proc_pid_ns(root_inode);
+	struct proc_fs_info *fs_info = proc_sb_info(s);
 	struct dentry *self;
 	int ret = -ENOMEM;
 	
@@ -62,7 +62,7 @@ int proc_setup_self(struct super_block *s)
 	if (ret)
 		pr_err("proc_fill_super: can't allocate /proc/self\n");
 	else
-		ns->proc_self = self;
+		fs_info->proc_self = self;
 
 	return ret;
 }
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index f61ae53533f5..2493cbbdfa6f 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init;
 int proc_setup_thread_self(struct super_block *s)
 {
 	struct inode *root_inode = d_inode(s->s_root);
-	struct pid_namespace *ns = proc_pid_ns(root_inode);
+	struct proc_fs_info *fs_info = proc_sb_info(s);
 	struct dentry *thread_self;
 	int ret = -ENOMEM;
 
@@ -60,9 +60,9 @@ int proc_setup_thread_self(struct super_block *s)
 	inode_unlock(root_inode);
 
 	if (ret)
-		pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
+		pr_err("proc_fill_super: can't allocate /proc/thread-self\n");
 	else
-		ns->proc_thread_self = thread_self;
+		fs_info->proc_thread_self = thread_self;
 
 	return ret;
 }
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 49538b172483..f91a8bf6e09e 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -31,9 +31,7 @@ struct pid_namespace {
 	unsigned int level;
 	struct pid_namespace *parent;
 #ifdef CONFIG_PROC_FS
-	struct vfsmount *proc_mnt;
-	struct dentry *proc_self;
-	struct dentry *proc_thread_self;
+	struct vfsmount *proc_mnt; /* Internal proc mounted during each new pidns */
 #endif
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct fs_pin *bacct;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 6ef09e01bf10..fa44c2348e52 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -14,6 +14,8 @@ struct seq_operations;
 
 struct proc_fs_info {
 	struct pid_namespace *pid_ns;
+	struct dentry *proc_self;        /* For /proc/self */
+	struct dentry *proc_thread_self; /* For /proc/thread-self */
 };
 
 #ifdef CONFIG_PROC_FS
-- 
2.24.1


^ permalink raw reply related

* [PATCH v6 04/10] proc: move hide_pid, pid_gid from pid_namespace to proc_fs_info
From: Alexey Gladkov @ 2019-12-25 12:51 UTC (permalink / raw)
  To: LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module
  Cc: Akinobu Mita, Alexander Viro, Alexey Dobriyan, Alexey Gladkov,
	Andrew Morton, Andy Lutomirski, Daniel Micay, Djalal Harouni,
	Dmitry V . Levin, Eric W . Biederman, Greg Kroah-Hartman,
	Ingo Molnar, J . Bruce Fields, Jeff Layton, Jonathan Corbet,
	Kees Cook, Linus Torvalds, Oleg Nesterov, Solar Designer,
	Stephen Rothwell
In-Reply-To: <20191225125151.1950142-1-gladkov.alexey@gmail.com>

This is a preparation patch that moves hide_pid and pid_gid parameters
to be stored inside procfs fs_info struct instead of making them per pid
namespace. Since we want to support multiple procfs instances we need to
make sure that all proc-specific parameters are also per-superblock.

Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 fs/proc/base.c                | 18 +++++++++---------
 fs/proc/inode.c               |  9 ++++-----
 fs/proc/root.c                | 10 ++++++++--
 include/linux/pid_namespace.h |  8 --------
 include/linux/proc_fs.h       |  9 +++++++++
 5 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1eb366ad8b06..caca1929fee1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -695,13 +695,13 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
  * May current process learn task's sched/cmdline info (for hide_pid_min=1)
  * or euid/egid (for hide_pid_min=2)?
  */
-static bool has_pid_permissions(struct pid_namespace *pid,
+static bool has_pid_permissions(struct proc_fs_info *fs_info,
 				 struct task_struct *task,
 				 int hide_pid_min)
 {
-	if (pid->hide_pid < hide_pid_min)
+	if (fs_info->hide_pid < hide_pid_min)
 		return true;
-	if (in_group_p(pid->pid_gid))
+	if (in_group_p(fs_info->pid_gid))
 		return true;
 	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
 }
@@ -709,18 +709,18 @@ static bool has_pid_permissions(struct pid_namespace *pid,
 
 static int proc_pid_permission(struct inode *inode, int mask)
 {
-	struct pid_namespace *pid = proc_pid_ns(inode);
+	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
 	struct task_struct *task;
 	bool has_perms;
 
 	task = get_proc_task(inode);
 	if (!task)
 		return -ESRCH;
-	has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
+	has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
 	put_task_struct(task);
 
 	if (!has_perms) {
-		if (pid->hide_pid == HIDEPID_INVISIBLE) {
+		if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
 			/*
 			 * Let's make getdents(), stat(), and open()
 			 * consistent with each other.  If a process
@@ -1784,7 +1784,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
 		u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
-	struct pid_namespace *pid = proc_pid_ns(inode);
+	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
 	struct task_struct *task;
 
 	generic_fillattr(inode, stat);
@@ -1794,7 +1794,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
 	rcu_read_lock();
 	task = pid_task(proc_pid(inode), PIDTYPE_PID);
 	if (task) {
-		if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
+		if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
 			rcu_read_unlock();
 			/*
 			 * This doesn't prevent learning whether PID exists,
@@ -3344,7 +3344,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 		unsigned int len;
 
 		cond_resched();
-		if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
+		if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
 			continue;
 
 		len = snprintf(name, sizeof(name), "%u", iter.tgid);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b631608dfbcc..b90c233e5968 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -105,12 +105,11 @@ void __init proc_init_kmemcache(void)
 static int proc_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct proc_fs_info *fs_info = proc_sb_info(root->d_sb);
-	struct pid_namespace *pid = fs_info->pid_ns;
 
-	if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
-		seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
-	if (pid->hide_pid != HIDEPID_OFF)
-		seq_printf(seq, ",hidepid=%u", pid->hide_pid);
+	if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID))
+		seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid));
+	if (fs_info->hide_pid != HIDEPID_OFF)
+		seq_printf(seq, ",hidepid=%u", fs_info->hide_pid);
 
 	return 0;
 }
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 637e26cc795e..1ca47d446aa4 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -89,10 +89,16 @@ static void proc_apply_options(struct super_block *s,
 {
 	struct proc_fs_context *ctx = fc->fs_private;
 
+	if (pid_ns->proc_mnt) {
+		struct proc_fs_info *fs_info = proc_sb_info(pid_ns->proc_mnt->mnt_sb);
+		ctx->fs_info->pid_gid = fs_info->pid_gid;
+		ctx->fs_info->hide_pid = fs_info->hide_pid;
+	}
+
 	if (ctx->mask & (1 << Opt_gid))
-		pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
+		ctx->fs_info->pid_gid = make_kgid(user_ns, ctx->gid);
 	if (ctx->mask & (1 << Opt_hidepid))
-		pid_ns->hide_pid = ctx->hidepid;
+		ctx->fs_info->hide_pid = ctx->hidepid;
 }
 
 static int proc_fill_super(struct super_block *s, struct fs_context *fc)
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f91a8bf6e09e..66f47f1afe0d 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -15,12 +15,6 @@
 
 struct fs_pin;
 
-enum { /* definitions for pid_namespace's hide_pid field */
-	HIDEPID_OFF	  = 0,
-	HIDEPID_NO_ACCESS = 1,
-	HIDEPID_INVISIBLE = 2,
-};
-
 struct pid_namespace {
 	struct kref kref;
 	struct idr idr;
@@ -39,8 +33,6 @@ struct pid_namespace {
 	struct user_namespace *user_ns;
 	struct ucounts *ucounts;
 	struct work_struct proc_work;
-	kgid_t pid_gid;
-	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
 	struct ns_common ns;
 } __randomize_layout;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index fa44c2348e52..05ecf4e8923f 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -12,10 +12,19 @@ struct proc_dir_entry;
 struct seq_file;
 struct seq_operations;
 
+/* definitions for hide_pid field */
+enum {
+	HIDEPID_OFF	  = 0,
+	HIDEPID_NO_ACCESS = 1,
+	HIDEPID_INVISIBLE = 2,
+};
+
 struct proc_fs_info {
 	struct pid_namespace *pid_ns;
 	struct dentry *proc_self;        /* For /proc/self */
 	struct dentry *proc_thread_self; /* For /proc/thread-self */
+	kgid_t pid_gid;
+	int hide_pid;
 };
 
 #ifdef CONFIG_PROC_FS
-- 
2.24.1


^ permalink raw reply related

* [PATCH v6 09/10] proc: add option to mount only a pids subset
From: Alexey Gladkov @ 2019-12-25 12:51 UTC (permalink / raw)
  To: LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module
  Cc: Akinobu Mita, Alexander Viro, Alexey Dobriyan, Alexey Gladkov,
	Andrew Morton, Andy Lutomirski, Daniel Micay, Djalal Harouni,
	Dmitry V . Levin, Eric W . Biederman, Greg Kroah-Hartman,
	Ingo Molnar, J . Bruce Fields, Jeff Layton, Jonathan Corbet,
	Kees Cook, Linus Torvalds, Oleg Nesterov, Solar Designer,
	Stephen Rothwell
In-Reply-To: <20191225125151.1950142-1-gladkov.alexey@gmail.com>

This allows to hide all files and directories in the procfs that are not
related to tasks.

Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 fs/proc/generic.c       | 20 ++++++++++++++++++++
 fs/proc/inode.c         |  8 ++++++++
 fs/proc/root.c          | 14 ++++++++++++++
 include/linux/proc_fs.h | 26 ++++++++++++++++++++++++++
 4 files changed, 68 insertions(+)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 64e9ee1b129e..2fa919a5544d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -267,6 +267,11 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
 		unsigned int flags)
 {
+	struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb);
+
+	if (proc_fs_pidonly(fs_info) == PROC_PIDONLY_ON)
+		return ERR_PTR(-ENOENT);
+
 	return proc_lookup_de(dir, dentry, PDE(dir));
 }
 
@@ -323,10 +328,24 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
 int proc_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
+	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
+
+	if (proc_fs_pidonly(fs_info) == PROC_PIDONLY_ON)
+		return 1;
 
 	return proc_readdir_de(file, ctx, PDE(inode));
 }
 
+static int proc_dir_open(struct inode *inode, struct file *file)
+{
+	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
+
+	if (proc_fs_pidonly(fs_info) == PROC_PIDONLY_ON)
+		return -ENOENT;
+
+	return 0;
+}
+
 /*
  * These are the generic /proc directory operations. They
  * use the in-memory "struct proc_dir_entry" tree to parse
@@ -336,6 +355,7 @@ static const struct file_operations proc_dir_operations = {
 	.llseek			= generic_file_llseek,
 	.read			= generic_read_dir,
 	.iterate_shared		= proc_readdir,
+	.open			= proc_dir_open,
 };
 
 /*
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 70b722fb8811..fe9d20da3862 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -107,6 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
 	struct proc_fs_info *fs_info = proc_sb_info(root->d_sb);
 	int hidepid = proc_fs_hide_pid(fs_info);
 	kgid_t gid = proc_fs_pid_gid(fs_info);
+	int pidonly = proc_fs_pidonly(fs_info);
 
 	if (!gid_eq(gid, GLOBAL_ROOT_GID))
 		seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, gid));
@@ -114,6 +115,9 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
 	if (hidepid != HIDEPID_OFF)
 		seq_printf(seq, ",hidepid=%u", hidepid);
 
+	if (pidonly != PROC_PIDONLY_OFF)
+		seq_printf(seq, ",pidonly=%u", pidonly);
+
 	return 0;
 }
 
@@ -333,12 +337,16 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
 
 static int proc_reg_open(struct inode *inode, struct file *file)
 {
+	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
 	typeof_member(struct file_operations, open) open;
 	typeof_member(struct file_operations, release) release;
 	struct pde_opener *pdeo;
 
+	if (proc_fs_pidonly(fs_info) == PROC_PIDONLY_ON)
+		return -ENOENT;
+
 	/*
 	 * Ensure that
 	 * 1) PDE's ->release hook will be called no matter what
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 3c7b29140293..fc26c22a5906 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -34,16 +34,19 @@ struct proc_fs_context {
 	unsigned int		mask;
 	int			hidepid;
 	int			gid;
+	int			pidonly;
 };
 
 enum proc_param {
 	Opt_gid,
 	Opt_hidepid,
+	Opt_pidonly,
 };
 
 static const struct fs_parameter_spec proc_param_specs[] = {
 	fsparam_u32("gid",	Opt_gid),
 	fsparam_u32("hidepid",	Opt_hidepid),
+	fsparam_u32("pidonly",	Opt_pidonly),
 	{}
 };
 
@@ -74,6 +77,13 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
 			return invalf(fc, "proc: hidepid value must be between 0 and 3.\n");
 		break;
 
+	case Opt_pidonly:
+		ctx->pidonly = result.uint_32;
+		if (ctx->pidonly < PROC_PIDONLY_OFF ||
+		    ctx->pidonly > PROC_PIDONLY_ON)
+			return invalf(fc, "proc: pidonly value must be 0 or 1.\n");
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -94,6 +104,7 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
 
 		proc_fs_set_pid_gid(fs_info, proc_fs_pid_gid(pidns_fs_info));
 		proc_fs_set_hide_pid(fs_info, proc_fs_hide_pid(pidns_fs_info));
+		proc_fs_set_pidonly(fs_info, proc_fs_pidonly(pidns_fs_info));
 	}
 
 	if (ctx->mask & (1 << Opt_gid))
@@ -101,6 +112,9 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
 
 	if (ctx->mask & (1 << Opt_hidepid))
 		proc_fs_set_hide_pid(fs_info, ctx->hidepid);
+
+	if (ctx->mask & (1 << Opt_pidonly))
+		proc_fs_set_pidonly(fs_info, ctx->pidonly);
 }
 
 static int proc_fill_super(struct super_block *s, struct fs_context *fc)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 83c87ea65505..ef1c0d9712b0 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -20,6 +20,12 @@ enum {
 	HIDEPID_NOT_PTRACABLE = 3, /* Limit pids to only ptracable pids */
 };
 
+/* definitions for proc mount option pidonly */
+enum {
+	PROC_PIDONLY_OFF = 0,
+	PROC_PIDONLY_ON  = 1,
+};
+
 struct proc_fs_info {
 	struct list_head pidns_entry;    /* Node in procfs_mounts of a pidns */
 	struct super_block *m_super;
@@ -28,6 +34,7 @@ struct proc_fs_info {
 	struct dentry *proc_thread_self; /* For /proc/thread-self */
 	kgid_t pid_gid;
 	int hide_pid;
+	int pidonly;
 };
 
 #ifdef CONFIG_PROC_FS
@@ -39,6 +46,11 @@ static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
+static inline void proc_fs_set_pidonly(struct proc_fs_info *fs_info, int value)
+{
+	fs_info->pidonly = value;
+}
+
 static inline void proc_fs_set_hide_pid(struct proc_fs_info *fs_info, int hide_pid)
 {
 	fs_info->hide_pid = hide_pid;
@@ -49,6 +61,11 @@ static inline void proc_fs_set_pid_gid(struct proc_fs_info *fs_info, kgid_t gid)
 	fs_info->pid_gid = gid;
 }
 
+static inline int proc_fs_pidonly(struct proc_fs_info *fs_info)
+{
+	return fs_info->pidonly;
+}
+
 static inline int proc_fs_hide_pid(struct proc_fs_info *fs_info)
 {
 	return fs_info->hide_pid;
@@ -134,6 +151,10 @@ static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
 	return NULL;
 }
 
+static inline void proc_fs_set_pidonly(struct proc_fs_info *fs_info, int value)
+{
+}
+
 static inline void proc_fs_set_hide_pid(struct proc_fs_info *fs_info, int hide_pid)
 {
 }
@@ -142,6 +163,11 @@ static inline void proc_fs_set_pid_gid(struct proc_info_fs *fs_info, kgid_t gid)
 {
 }
 
+static inline void proc_fs_pidonly(struct proc_fs_info *fs_info)
+{
+	return PROC_PIDONLY_OFF;
+}
+
 static inline int proc_fs_hide_pid(struct proc_fs_info *fs_info)
 {
 	return 0;
-- 
2.24.1


^ permalink raw reply related

* [PATCH v6 06/10] proc: support mounting procfs instances inside same pid namespace
From: Alexey Gladkov @ 2019-12-25 12:51 UTC (permalink / raw)
  To: LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module
  Cc: Akinobu Mita, Alexander Viro, Alexey Dobriyan, Alexey Gladkov,
	Andrew Morton, Andy Lutomirski, Daniel Micay, Djalal Harouni,
	Dmitry V . Levin, Eric W . Biederman, Greg Kroah-Hartman,
	Ingo Molnar, J . Bruce Fields, Jeff Layton, Jonathan Corbet,
	Kees Cook, Linus Torvalds, Oleg Nesterov, Solar Designer,
	Stephen Rothwell
In-Reply-To: <20191225125151.1950142-1-gladkov.alexey@gmail.com>

This patch allows to have multiple procfs instances inside the
same pid namespace. The aim here is lightweight sandboxes, and to allow
that we have to modernize procfs internals.

1) The main aim of this work is to have on embedded systems one
supervisor for apps. Right now we have some lightweight sandbox support,
however if we create pid namespacess we have to manages all the
processes inside too, where our goal is to be able to run a bunch of
apps each one inside its own mount namespace without being able to
notice each other. We only want to use mount namespaces, and we want
procfs to behave more like a real mount point.

2) Linux Security Modules have multiple ptrace paths inside some
subsystems, however inside procfs, the implementation does not guarantee
that the ptrace() check which triggers the security_ptrace_check() hook
will always run. We have the 'hidepid' mount option that can be used to
force the ptrace_may_access() check inside has_pid_permissions() to run.
The problem is that 'hidepid' is per pid namespace and not attached to
the mount point, any remount or modification of 'hidepid' will propagate
to all other procfs mounts.

This also does not allow to support Yama LSM easily in desktop and user
sessions. Yama ptrace scope which restricts ptrace and some other
syscalls to be allowed only on inferiors, can be updated to have a
per-task context, where the context will be inherited during fork(),
clone() and preserved across execve(). If we support multiple private
procfs instances, then we may force the ptrace_may_access() on
/proc/<pids>/ to always run inside that new procfs instances. This will
allow to specifiy on user sessions if we should populate procfs with
pids that the user can ptrace or not.

By using Yama ptrace scope, some restricted users will only be able to see
inferiors inside /proc, they won't even be able to see their other
processes. Some software like Chromium, Firefox's crash handler, Wine
and others are already using Yama to restrict which processes can be
ptracable. With this change this will give the possibility to restrict
/proc/<pids>/ but more importantly this will give desktop users a
generic and usuable way to specifiy which users should see all processes
and which users can not.

Side notes:
* This covers the lack of seccomp where it is not able to parse
arguments, it is easy to install a seccomp filter on direct syscalls
that operate on pids, however /proc/<pid>/ is a Linux ABI using
filesystem syscalls. With this change LSMs should be able to analyze
open/read/write/close...

In the new patchset version I removed the 'newinstance' option
as Eric W. Biederman suggested.

Cc: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 fs/proc/root.c | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/fs/proc/root.c b/fs/proc/root.c
index efd76c004e86..5d5cba4c899b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -82,7 +82,7 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	return 0;
 }
 
-static void proc_apply_options(struct super_block *s,
+static void proc_apply_options(struct proc_fs_info *fs_info,
 			       struct fs_context *fc,
 			       struct pid_namespace *pid_ns,
 			       struct user_namespace *user_ns)
@@ -90,15 +90,17 @@ static void proc_apply_options(struct super_block *s,
 	struct proc_fs_context *ctx = fc->fs_private;
 
 	if (pid_ns->proc_mnt) {
-		struct proc_fs_info *fs_info = proc_sb_info(pid_ns->proc_mnt->mnt_sb);
-		proc_fs_set_pid_gid(ctx->fs_info, proc_fs_pid_gid(fs_info));
-		proc_fs_set_hide_pid(ctx->fs_info, proc_fs_hide_pid(fs_info));
+		struct proc_fs_info *pidns_fs_info = proc_sb_info(pid_ns->proc_mnt->mnt_sb);
+
+		proc_fs_set_pid_gid(fs_info, proc_fs_pid_gid(pidns_fs_info));
+		proc_fs_set_hide_pid(fs_info, proc_fs_hide_pid(pidns_fs_info));
 	}
 
 	if (ctx->mask & (1 << Opt_gid))
-		proc_fs_set_pid_gid(ctx->fs_info, make_kgid(user_ns, ctx->gid));
+		proc_fs_set_pid_gid(fs_info, make_kgid(user_ns, ctx->gid));
+
 	if (ctx->mask & (1 << Opt_hidepid))
-		proc_fs_set_hide_pid(ctx->fs_info, ctx->hidepid);
+		proc_fs_set_hide_pid(fs_info, ctx->hidepid);
 }
 
 static int proc_fill_super(struct super_block *s, struct fs_context *fc)
@@ -108,7 +110,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
 	struct inode *root_inode;
 	int ret;
 
-	proc_apply_options(s, fc, pid_ns, current_user_ns());
+	proc_apply_options(ctx->fs_info, fc, pid_ns, current_user_ns());
 
 	/* User space would break if executables or devices appear on proc */
 	s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
@@ -118,6 +120,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
 	s->s_magic = PROC_SUPER_MAGIC;
 	s->s_op = &proc_sops;
 	s->s_time_gran = 1;
+	s->s_fs_info = ctx->fs_info;
 
 	/*
 	 * procfs isn't actually a stacking filesystem; however, there is
@@ -157,15 +160,13 @@ static int proc_reconfigure(struct fs_context *fc)
 
 	sync_filesystem(sb);
 
-	proc_apply_options(sb, fc, pid, current_user_ns());
+	proc_apply_options(fs_info, fc, pid, current_user_ns());
 	return 0;
 }
 
 static int proc_get_tree(struct fs_context *fc)
 {
-	struct proc_fs_context *ctx = fc->fs_private;
-
-	return get_tree_keyed(fc, proc_fill_super, ctx->fs_info);
+	return get_tree_nodev(fc, proc_fill_super);
 }
 
 static void proc_fs_context_free(struct fs_context *fc)
@@ -186,25 +187,19 @@ static const struct fs_context_operations proc_fs_context_ops = {
 static int proc_init_fs_context(struct fs_context *fc)
 {
 	struct proc_fs_context *ctx;
-	struct pid_namespace *pid_ns;
 
 	ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	pid_ns = get_pid_ns(task_active_pid_ns(current));
-
-	if (!pid_ns->proc_mnt) {
-		ctx->fs_info = kzalloc(sizeof(struct proc_fs_info), GFP_KERNEL);
-		if (!ctx->fs_info) {
-			kfree(ctx);
-			return -ENOMEM;
-		}
-		ctx->fs_info->pid_ns = pid_ns;
-	} else {
-		ctx->fs_info = proc_sb_info(pid_ns->proc_mnt->mnt_sb);
+	ctx->fs_info = kzalloc(sizeof(struct proc_fs_info), GFP_KERNEL);
+	if (!ctx->fs_info) {
+		kfree(ctx);
+		return -ENOMEM;
 	}
 
+	ctx->fs_info->pid_ns = get_pid_ns(task_active_pid_ns(current));
+
 	put_user_ns(fc->user_ns);
 	fc->user_ns = get_user_ns(ctx->fs_info->pid_ns->user_ns);
 	fc->fs_private = ctx;
-- 
2.24.1


^ permalink raw reply related

* [PATCH v6 08/10] proc: instantiate only pids that we can ptrace on 'hidepid=3' mount option
From: Alexey Gladkov @ 2019-12-25 12:51 UTC (permalink / raw)
  To: LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module
  Cc: Akinobu Mita, Alexander Viro, Alexey Dobriyan, Alexey Gladkov,
	Andrew Morton, Andy Lutomirski, Daniel Micay, Djalal Harouni,
	Dmitry V . Levin, Eric W . Biederman, Greg Kroah-Hartman,
	Ingo Molnar, J . Bruce Fields, Jeff Layton, Jonathan Corbet,
	Kees Cook, Linus Torvalds, Oleg Nesterov, Solar Designer,
	Stephen Rothwell
In-Reply-To: <20191225125151.1950142-1-gladkov.alexey@gmail.com>

If "hidepid=3" mount option is set then do not instantiate pids that
we can not ptrace. "hidepid=3" means that procfs should only contain
pids that the caller can ptrace.

Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 fs/proc/base.c          | 15 +++++++++++++++
 fs/proc/root.c          |  4 ++--
 include/linux/proc_fs.h |  1 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index f4f1bcb28603..b55d205a7f6e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -699,6 +699,14 @@ static bool has_pid_permissions(struct proc_fs_info *fs_info,
 				 struct task_struct *task,
 				 int hide_pid_min)
 {
+	/*
+	 * If 'hidpid' mount option is set force a ptrace check,
+	 * we indicate that we are using a filesystem syscall
+	 * by passing PTRACE_MODE_READ_FSCREDS
+	 */
+	if (proc_fs_hide_pid(fs_info) == HIDEPID_NOT_PTRACABLE)
+		return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+
 	if (proc_fs_hide_pid(fs_info) < hide_pid_min)
 		return true;
 	if (in_group_p(proc_fs_pid_gid(fs_info)))
@@ -3274,7 +3282,14 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
 	if (!task)
 		goto out;
 
+	/* Limit procfs to only ptracable tasks */
+	if (proc_fs_hide_pid(fs_info) == HIDEPID_NOT_PTRACABLE) {
+		if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
+			goto out_put_task;
+	}
+
 	result = proc_pid_instantiate(dentry, task, NULL);
+out_put_task:
 	put_task_struct(task);
 out:
 	return result;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 3bb8df360cf7..3c7b29140293 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -70,8 +70,8 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_hidepid:
 		ctx->hidepid = result.uint_32;
 		if (ctx->hidepid < HIDEPID_OFF ||
-		    ctx->hidepid > HIDEPID_INVISIBLE)
-			return invalf(fc, "proc: hidepid value must be between 0 and 2.\n");
+		    ctx->hidepid > HIDEPID_NOT_PTRACABLE)
+			return invalf(fc, "proc: hidepid value must be between 0 and 3.\n");
 		break;
 
 	default:
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e349fcafd729..83c87ea65505 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -17,6 +17,7 @@ enum {
 	HIDEPID_OFF	  = 0,
 	HIDEPID_NO_ACCESS = 1,
 	HIDEPID_INVISIBLE = 2,
+	HIDEPID_NOT_PTRACABLE = 3, /* Limit pids to only ptracable pids */
 };
 
 struct proc_fs_info {
-- 
2.24.1


^ permalink raw reply related

* [PATCH v6 10/10] docs: proc: add documentation for "hidepid=3" and "pidonly" options and new mount behavior
From: Alexey Gladkov @ 2019-12-25 12:51 UTC (permalink / raw)
  To: LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module
  Cc: Akinobu Mita, Alexander Viro, Alexey Dobriyan, Alexey Gladkov,
	Andrew Morton, Andy Lutomirski, Daniel Micay, Djalal Harouni,
	Dmitry V . Levin, Eric W . Biederman, Greg Kroah-Hartman,
	Ingo Molnar, J . Bruce Fields, Jeff Layton, Jonathan Corbet,
	Kees Cook, Linus Torvalds, Oleg Nesterov, Solar Designer,
	Stephen Rothwell
In-Reply-To: <20191225125151.1950142-1-gladkov.alexey@gmail.com>

Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 Documentation/filesystems/proc.txt | 53 ++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 99ca040e3f90..6a62ae20a181 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -50,6 +50,8 @@ Table of Contents
   4	Configuring procfs
   4.1	Mount options
 
+  5	Filesystem behavior
+
 ------------------------------------------------------------------------------
 Preface
 ------------------------------------------------------------------------------
@@ -2021,6 +2023,7 @@ The following mount options are supported:
 
 	hidepid=	Set /proc/<pid>/ access mode.
 	gid=		Set the group authorized to learn processes information.
+	pidonly=	Show only task related subset of procfs.
 
 hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
 (default).
@@ -2042,6 +2045,56 @@ information about running processes, whether some daemon runs with elevated
 privileges, whether other user runs some sensitive program, whether other users
 run any program at all, etc.
 
+hidepid=3 means that procfs should only contain /proc/<pid>/ directories
+that the caller can ptrace.
+
 gid= defines a group authorized to learn processes information otherwise
 prohibited by hidepid=.  If you use some daemon like identd which needs to learn
 information about processes information, just add identd to this group.
+
+The pidonly=1 hides all top level files and directories in the procfs that
+are not related to tasks.
+
+------------------------------------------------------------------------------
+5 Filesystem behavior
+------------------------------------------------------------------------------
+
+Originally, before the advent of pid namepsace, procfs was a global file
+system. It means that there was only one procfs instance in the system.
+
+When pid namespace was added, a separate procfs instance was mounted in
+each pid namespace. So, procfs mount options are global among all
+mountpoints within the same namespace.
+
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=2 0 0
+
+# strace -e mount mount -o hidepid=1 -t proc proc /tmp/proc
+mount("proc", "/tmp/proc", "proc", 0, "hidepid=1") = 0
++++ exited with 0 +++
+
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=2 0 0
+proc /tmp/proc proc rw,relatime,hidepid=2 0 0
+
+and only after remounting procfs mount options will change at all
+mountpoints.
+
+# mount -o remount,hidepid=1 -t proc proc /tmp/proc
+
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=1 0 0
+proc /tmp/proc proc rw,relatime,hidepid=1 0 0
+
+This behavior is different from the behavior of other filesystems.
+
+The new procfs behavior is more like other filesystems. Each procfs mount
+creates a new procfs instance. Mount options affect own procfs instance.
+It means that it became possible to have several procfs instances
+displaying tasks with different filtering options in one pid namespace.
+
+# mount -o hidepid=2 -t proc proc /proc
+# mount -o hidepid=1 -t proc proc /tmp/proc
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=2 0 0
+proc /tmp/proc proc rw,relatime,hidepid=1 0 0
-- 
2.24.1


^ permalink raw reply related

* Re: [PATCH v13 02/25] LSM: Create and manage the lsmblob data structure.
From: Mickaël Salaün @ 2019-12-25 20:34 UTC (permalink / raw)
  To: Casey Schaufler, casey.schaufler, jmorris, linux-security-module,
	selinux
  Cc: keescook, john.johansen, penguin-kernel, paul, sds
In-Reply-To: <20191224235939.7483-3-casey@schaufler-ca.com>


On 25/12/2019 00:59, Casey Schaufler wrote:
> When more than one security module is exporting data to
> audit and networking sub-systems a single 32 bit integer
> is no longer sufficient to represent the data. Add a
> structure to be used instead.
> 
> The lsmblob structure is currently an array of
> u32 "secids". There is an entry for each of the
> security modules built into the system that would
> use secids if active. The system assigns the module
> a "slot" when it registers hooks. If modules are
> compiled in but not registered there will be unused
> slots.
> 
> A new lsm_id structure, which contains the name
> of the LSM and its slot number, is created. There
> is an instance for each LSM, which assigns the name
> and passes it to the infrastructure to set the slot.
> 
> Reviewed-by: John Johansen <john.johansen@canonical.com>
> Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
> ---
>   include/linux/lsm_hooks.h    | 12 ++++++--
>   include/linux/security.h     | 58 ++++++++++++++++++++++++++++++++++++
>   security/apparmor/lsm.c      |  7 ++++-
>   security/commoncap.c         |  7 ++++-
>   security/loadpin/loadpin.c   |  8 ++++-
>   security/lockdown/lockdown.c |  7 ++++-
>   security/safesetid/lsm.c     |  8 ++++-
>   security/security.c          | 28 +++++++++++++----
>   security/selinux/hooks.c     |  8 ++++-
>   security/smack/smack_lsm.c   |  7 ++++-
>   security/tomoyo/tomoyo.c     |  8 ++++-
>   security/yama/yama_lsm.c     |  7 ++++-
>   12 files changed, 148 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
> index c2b1af29a8f0..7eb808cde051 100644
> --- a/include/linux/lsm_hooks.h
> +++ b/include/linux/lsm_hooks.h
> @@ -2077,6 +2077,14 @@ struct security_hook_heads {
>   #endif
>   } __randomize_layout;
>   
> +/*
> + * Information that identifies a security module.
> + */
> +struct lsm_id {
> +	const char	*lsm;	/* Name of the LSM */
> +	int		slot;	/* Slot in lsmblob if one is allocated */
> +};
> +
>   /*
>    * Security module hook list structure.
>    * For use with generic list macros for common operations.
> @@ -2085,7 +2093,7 @@ struct security_hook_list {
>   	struct hlist_node		list;
>   	struct hlist_head		*head;
>   	union security_list_options	hook;
> -	char				*lsm;
> +	struct lsm_id			*lsmid;
>   } __randomize_layout;
>   
>   /*
> @@ -2114,7 +2122,7 @@ extern struct security_hook_heads security_hook_heads;
>   extern char *lsm_names;
>   
>   extern void security_add_hooks(struct security_hook_list *hooks, int count,
> -				char *lsm);
> +			       struct lsm_id *lsmid);
>   
>   #define LSM_FLAG_LEGACY_MAJOR	BIT(0)
>   #define LSM_FLAG_EXCLUSIVE	BIT(1)
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 3e8d4bacd59d..b74dc70088ca 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -128,6 +128,64 @@ enum lockdown_reason {
>   	LOCKDOWN_CONFIDENTIALITY_MAX,
>   };
>   
> +/*
> + * Data exported by the security modules
> + *
> + * Any LSM that provides secid or secctx based hooks must be included.
> + */
> +#define LSMBLOB_ENTRIES ( \
> +	(IS_ENABLED(CONFIG_SECURITY_SELINUX) ? 1 : 0) + \
> +	(IS_ENABLED(CONFIG_SECURITY_SMACK) ? 1 : 0) + \
> +	(IS_ENABLED(CONFIG_SECURITY_APPARMOR) ? 1 : 0))
> +
> +struct lsmblob {
> +	u32     secid[LSMBLOB_ENTRIES];
> +};
> +
> +#define LSMBLOB_INVALID		-1	/* Not a valid LSM slot number */
> +#define LSMBLOB_NEEDED		-2	/* Slot requested on initialization */
> +#define LSMBLOB_NOT_NEEDED	-3	/* Slot not requested */
> +
> +/**
> + * lsmblob_init - initialize an lsmblob structure.
> + * @blob: Pointer to the data to initialize
> + * @secid: The initial secid value
> + *
> + * Set all secid for all modules to the specified value.
> + */
> +static inline void lsmblob_init(struct lsmblob *blob, u32 secid)
> +{
> +	int i;
> +
> +	for (i = 0; i < LSMBLOB_ENTRIES; i++)
> +		blob->secid[i] = secid;
> +}
> +
> +/**
> + * lsmblob_is_set - report if there is an value in the lsmblob
> + * @blob: Pointer to the exported LSM data
> + *
> + * Returns true if there is a secid set, false otherwise
> + */
> +static inline bool lsmblob_is_set(struct lsmblob *blob)
> +{
> +	struct lsmblob empty = {};
> +
> +	return !!memcmp(blob, &empty, sizeof(*blob));
> +}
> +
> +/**
> + * lsmblob_equal - report if the two lsmblob's are equal
> + * @bloba: Pointer to one LSM data
> + * @blobb: Pointer to the other LSM data
> + *
> + * Returns true if all entries in the two are equal, false otherwise
> + */
> +static inline bool lsmblob_equal(struct lsmblob *bloba, struct lsmblob *blobb)
> +{
> +	return !memcmp(bloba, blobb, sizeof(*bloba));
> +}
> +
>   /* These functions are in security/commoncap.c */
>   extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
>   		       int cap, unsigned int opts);
> diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
> index 61b24f4eb355..146d75e5e021 100644
> --- a/security/apparmor/lsm.c
> +++ b/security/apparmor/lsm.c
> @@ -1147,6 +1147,11 @@ struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = {
>   	.lbs_sock = sizeof(struct aa_sk_ctx),
>   };
>   
> +static struct lsm_id apparmor_lsmid __lsm_ro_after_init = {
> +	.lsm  = "apparmor",
> +	.slot = LSMBLOB_NEEDED
> +};
> +
>   static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
>   	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
>   	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
> @@ -1847,7 +1852,7 @@ static int __init apparmor_init(void)
>   		goto buffers_out;
>   	}
>   	security_add_hooks(apparmor_hooks, ARRAY_SIZE(apparmor_hooks),
> -				"apparmor");
> +				&apparmor_lsmid);
>   
>   	/* Report that AppArmor successfully initialized */
>   	apparmor_initialized = 1;
> diff --git a/security/commoncap.c b/security/commoncap.c
> index f4ee0ae106b2..9dcfd2a0e891 100644
> --- a/security/commoncap.c
> +++ b/security/commoncap.c
> @@ -1339,6 +1339,11 @@ int cap_mmap_file(struct file *file, unsigned long reqprot,
>   
>   #ifdef CONFIG_SECURITY
>   
> +static struct lsm_id capability_lsmid __lsm_ro_after_init = {
> +	.lsm  = "capability",
> +	.slot = LSMBLOB_NOT_NEEDED
> +};
> +
>   static struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
>   	LSM_HOOK_INIT(capable, cap_capable),
>   	LSM_HOOK_INIT(settime, cap_settime),
> @@ -1363,7 +1368,7 @@ static struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
>   static int __init capability_init(void)
>   {
>   	security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
> -				"capability");
> +			   &capability_lsmid);
>   	return 0;
>   }
>   
> diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
> index ee5cb944f4ad..86317e78899f 100644
> --- a/security/loadpin/loadpin.c
> +++ b/security/loadpin/loadpin.c
> @@ -180,6 +180,11 @@ static int loadpin_load_data(enum kernel_load_data_id id)
>   	return loadpin_read_file(NULL, (enum kernel_read_file_id) id);
>   }
>   
> +static struct lsm_id loadpin_lsmid __lsm_ro_after_init = {
> +	.lsm  = "loadpin",
> +	.slot = LSMBLOB_NOT_NEEDED
> +};
> +
>   static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = {
>   	LSM_HOOK_INIT(sb_free_security, loadpin_sb_free_security),
>   	LSM_HOOK_INIT(kernel_read_file, loadpin_read_file),
> @@ -227,7 +232,8 @@ static int __init loadpin_init(void)
>   	pr_info("ready to pin (currently %senforcing)\n",
>   		enforce ? "" : "not ");
>   	parse_exclude();
> -	security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin");
> +	security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks),
> +			   &loadpin_lsmid);
>   	return 0;
>   }
>   
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index b2f87015d6e9..91662325e450 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -102,6 +102,11 @@ static struct security_hook_list lockdown_hooks[] __lsm_ro_after_init = {
>   	LSM_HOOK_INIT(locked_down, lockdown_is_locked_down),
>   };
>   
> +static struct lsm_id lockdown_lsmid __lsm_ro_after_init = {
> +	.lsm = "landlock",

Working on it but not there yet ;)

^ permalink raw reply

* Re: [PATCH v6 05/10] proc: add helpers to set and get proc hidepid and gid mount options
From: kbuild test robot @ 2019-12-25 23:06 UTC (permalink / raw)
  To: Alexey Gladkov
  Cc: kbuild-all, LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module, Akinobu Mita, Alexander Viro,
	Alexey Dobriyan, Alexey Gladkov, Andrew Morton, Andy Lutomirski,
	Daniel Micay, Djalal Harouni, Dmitry V . Levin,
	Eric W . Biederman, Greg Kroah-Hartman, Ingo Molnar,
	J . Bruce Fields, Jeff Layton, Jonathan Corbet, Kees Cook,
	Linus Torvalds, Oleg Nesterov, Solar Designer, Stephen Rothwell
In-Reply-To: <20191225125151.1950142-6-gladkov.alexey@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 7592 bytes --]

Hi Alexey,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux/master]
[also build test ERROR on lwn/docs-next linus/master v5.5-rc3 next-20191220]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Alexey-Gladkov/proc-modernize-proc-to-support-multiple-private-instances/20191226-060818
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 1522d9da40bdfe502c91163e6d769332897201fa
config: i386-tinyconfig (attached as .config)
compiler: gcc-7 (Debian 7.5.0-3) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   ld: init/do_mounts.o: in function `proc_fs_pid_gid':
>> do_mounts.c:(.text+0x5): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: arch/x86/kernel/setup.o: in function `proc_fs_pid_gid':
   setup.c:(.text+0x3): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: arch/x86/kernel/e820.o: in function `proc_fs_pid_gid':
   e820.c:(.text+0xb1): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: arch/x86/kernel/fpu/xstate.o: in function `proc_fs_pid_gid':
   xstate.c:(.text+0x36): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: arch/x86/kernel/reboot.o: in function `proc_fs_pid_gid':
   reboot.c:(.text+0x1): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: arch/x86/mm/init_32.o: in function `proc_fs_pid_gid':
   init_32.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: arch/x86/mm/fault.o: in function `proc_fs_pid_gid':
   fault.c:(.text+0x908): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: arch/x86/mm/ioremap.o: in function `proc_fs_pid_gid':
   ioremap.c:(.text+0x277): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/fork.o: in function `proc_fs_pid_gid':
   fork.c:(.text+0x539): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/exec_domain.o: in function `proc_fs_pid_gid':
   exec_domain.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/cpu.o: in function `proc_fs_pid_gid':
   cpu.c:(.text+0x104): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/exit.o: in function `proc_fs_pid_gid':
   exit.c:(.text+0x22c): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/resource.o: in function `proc_fs_pid_gid':
   resource.c:(.text+0x362): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sysctl.o: in function `proc_fs_pid_gid':
   sysctl.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/signal.o: in function `proc_fs_pid_gid':
   signal.c:(.text+0x55b): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/core.o: in function `proc_fs_pid_gid':
   core.c:(.text+0x2e4): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/loadavg.o: in function `proc_fs_pid_gid':
   loadavg.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/clock.o: in function `proc_fs_pid_gid':
   clock.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/cputime.o: in function `proc_fs_pid_gid':
   cputime.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/idle.o: in function `proc_fs_pid_gid':
   idle.c:(.text+0x2c): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/fair.o: in function `proc_fs_pid_gid':
   fair.c:(.text+0x8cb): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/rt.o: in function `proc_fs_pid_gid':
   rt.c:(.text+0x703): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/deadline.o: in function `proc_fs_pid_gid':
   deadline.c:(.text+0xb02): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/wait.o: in function `proc_fs_pid_gid':
   wait.c:(.text+0x15c): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/wait_bit.o: in function `proc_fs_pid_gid':
   wait_bit.c:(.text+0x9d): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/swait.o: in function `proc_fs_pid_gid':
   swait.c:(.text+0x4): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/sched/completion.o: in function `proc_fs_pid_gid':
   completion.c:(.text+0x4): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/time/timer_list.o: in function `proc_fs_pid_gid':
   timer_list.c:(.text+0x12): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: kernel/dma.o: in function `proc_fs_pid_gid':
   dma.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: mm/vmstat.o: in function `proc_fs_pid_gid':
   vmstat.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: mm/slab_common.o: in function `proc_fs_pid_gid':
   slab_common.c:(.text+0x0): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: mm/vmalloc.o: in function `proc_fs_pid_gid':
   vmalloc.c:(.text+0x4fd): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: fs/filesystems.o: in function `proc_fs_pid_gid':
   filesystems.c:(.text+0x36): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
   ld: drivers/char/misc.o: in function `proc_fs_pid_gid':
   misc.c:(.text+0xc4): multiple definition of `proc_fs_pid_gid'; init/main.o:main.c:(.text+0x19): first defined here
--
   In file included from init/main.c:18:0:
>> include/linux/proc_fs.h:138:47: warning: 'struct proc_info_fs' declared inside parameter list will not be visible outside of this definition or declaration
    static inline void proc_fs_set_pid_gid(struct proc_info_fs *fs_info, kgid_t gid)
                                                  ^~~~~~~~~~~~

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 7285 bytes --]

^ permalink raw reply

* Re: [PATCH v6 09/10] proc: add option to mount only a pids subset
From: kbuild test robot @ 2019-12-25 23:08 UTC (permalink / raw)
  To: Alexey Gladkov
  Cc: kbuild-all, LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module, Akinobu Mita, Alexander Viro,
	Alexey Dobriyan, Alexey Gladkov, Andrew Morton, Andy Lutomirski,
	Daniel Micay, Djalal Harouni, Dmitry V . Levin,
	Eric W . Biederman, Greg Kroah-Hartman, Ingo Molnar,
	J . Bruce Fields, Jeff Layton, Jonathan Corbet, Kees Cook,
	Linus Torvalds, Oleg Nesterov, Solar Designer, Stephen Rothwell
In-Reply-To: <20191225125151.1950142-10-gladkov.alexey@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2340 bytes --]

Hi Alexey,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linux/master]
[cannot apply to lwn/docs-next linus/master v5.5-rc3 next-20191220]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Alexey-Gladkov/proc-modernize-proc-to-support-multiple-private-instances/20191226-060818
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 1522d9da40bdfe502c91163e6d769332897201fa
config: i386-tinyconfig (attached as .config)
compiler: gcc-7 (Debian 7.5.0-3) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/sunrpc/stats.h:13:0,
                    from include/linux/sunrpc/clnt.h:22,
                    from include/linux/nfs_fs.h:32,
                    from init/do_mounts.c:23:
   include/linux/proc_fs.h:162:47: warning: 'struct proc_info_fs' declared inside parameter list will not be visible outside of this definition or declaration
    static inline void proc_fs_set_pid_gid(struct proc_info_fs *fs_info, kgid_t gid)
                                                  ^~~~~~~~~~~~
   include/linux/proc_fs.h: In function 'proc_fs_pidonly':
>> include/linux/proc_fs.h:168:9: warning: 'return' with a value, in function returning void
     return PROC_PIDONLY_OFF;
            ^~~~~~~~~~~~~~~~
   include/linux/proc_fs.h:166:20: note: declared here
    static inline void proc_fs_pidonly(struct proc_fs_info *fs_info)
                       ^~~~~~~~~~~~~~~

vim +/return +168 include/linux/proc_fs.h

   161	
 > 162	static inline void proc_fs_set_pid_gid(struct proc_info_fs *fs_info, kgid_t gid)
   163	{
   164	}
   165	
   166	static inline void proc_fs_pidonly(struct proc_fs_info *fs_info)
   167	{
 > 168		return PROC_PIDONLY_OFF;
   169	}
   170	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 7285 bytes --]

^ permalink raw reply

* Re: [PATCH] security: apparmor: Fix a possible sleep-in-atomic-context bug in find_attach()
From: Al Viro @ 2019-12-26 22:30 UTC (permalink / raw)
  To: Jia-Ju Bai
  Cc: john.johansen, jmorris, serge, linux-security-module,
	linux-kernel
In-Reply-To: <20191217131220.11613-1-baijiaju1990@gmail.com>

On Tue, Dec 17, 2019 at 09:12:20PM +0800, Jia-Ju Bai wrote:
> The kernel may sleep while holding a RCU lock.
> The function call path (from bottom to top) in Linux 4.19 is:
> 
> security/apparmor/domain.c, 331: 
> 	vfs_getxattr_alloc(GFP_KERNEL) in aa_xattrs_match
> security/apparmor/domain.c, 425: 
> 	aa_xattrs_match in __attach_match
> security/apparmor/domain.c, 485: 
> 	__attach_match in find_attach
> security/apparmor/domain.c, 484:
>     rcu_read_lock in find_attach
> 
> vfs_getxattr_alloc(GFP_KERNEL) can sleep at runtime.
> 
> To fix this possible bug, GFP_KERNEL is replaced with GFP_ATOMIC for
> vfs_getxattr_alloc().
> 
> This bug is found by a static analysis tool STCheck written by myself. 
> 
> Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
> ---
>  security/apparmor/domain.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
> index 9be7ccb8379e..60b54ce57d1f 100644
> --- a/security/apparmor/domain.c
> +++ b/security/apparmor/domain.c
> @@ -325,7 +325,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
>  
>  	for (i = 0; i < profile->xattr_count; i++) {
>  		size = vfs_getxattr_alloc(d, profile->xattrs[i], &value,
> -					  value_size, GFP_KERNEL);
> +					  value_size, GFP_ATOMIC);

<stares>

How can that possibly make any sense?  We are, by the look of it, trying
to read extended attributes of some sort here.  How the hell can that
possibly hope to be non-blocking?

<goes to read>
Yup, vfs_getxattr_alloc() does call xattr ->get() method, which certainly
can cause IO.  GFP_ATOMIC affects only the allocation done in
vfs_getxattr_alloc() itself, ->get() call doesn't even see it.

AFAICS, that "fix" is pure cargo-culting; if that code *can* be called in
non-blocking context, we are fucked, GFP_ATOMIC or no GFP_ATOMIC.

Let's look at your call chain...  find_attach() calls __attach_match()
under rcu_read_lock().  Yes, it does, and by the look of the function
itself it does expect to be called thus.

Call of aa_xattrs_match() in there.  Present, no obvious dropping/regaining
rcu_read_lock() around it.  Conditional upon perm & MAY_EXEC, but that
doesn't seem to be provably excludable by the arguments of this particular
call.  And yes, aa_xattrs_match() is very obviously blocking.

So you've caught a real bug, but the "fix" doesn't fix anything whatsoever;
reading xattrs *does* block, no matter what.

By the look at git blame, we have commit 8e51f9087f4024d20f70f4d9831e1f45d8088331
Author: Matthew Garrett <mjg59@google.com>
Date:   Thu Feb 8 12:37:19 2018 -0800

    apparmor: Add support for attaching profiles via xattr, presence and value
    
    Make it possible to tie Apparmor profiles to the presence of one or more
    extended attributes, and optionally their values. An example usecase for
    this is to automatically transition to a more privileged Apparmor profile
    if an executable has a valid IMA signature, which can then be appraised
    by the IMA subsystem.
    
    Signed-off-by: Matthew Garrett <mjg59@google.com>
    Signed-off-by: John Johansen <john.johansen@canonical.com>

to thank for that.  And by the time of that commit the call chain already
existed, complete with rcu_read_lock().

AFAICS, it really is buggered by design - you can't read xattrs in such
context.  Again, GFP_ATOMIC is useless here - the problem is not in
allocation, it's IO, possibly over network, etc. 

Morever, *anything* that passes GFP_ATOMIC to vfs_getxattr_alloc() is
deeply suspect - it's almost certainly cargo-culting in attempt to
do inherently blocking operation in non-blocking context.  <greps>
No GFP_ATOMIC (thankfully), but there's a bunch of GFP_NOFS and
I really wonder if _those_ make any sense here...

^ permalink raw reply

* [GIT PULL] tomoyo fixes for 5.5
From: Tetsuo Handa @ 2019-12-30 11:31 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-security-module

Hello Linus,

This is my first time for sending pull requests. It seems that most people
create a tag signed with GPG key but a few people send pull requests on
master branch without signing with GPG key. Did I follow necessary steps?
---
The following changes since commit 6794862a16ef41f753abd75c03a152836e4c8028:

  Merge tag 'for-5.5-rc1-kconfig-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux (2019-12-09 12:14:31 -0800)

are available in the git repository at:

  git://git.osdn.net/gitroot/tomoyo/tomoyo-test1.git master

for you to fetch changes up to 6bd5ce6089b561f5392460bfb654dea89356ab1b:

  tomoyo: Suppress RCU warning at list_for_each_entry_rcu(). (2019-12-16 23:02:27 +0900)

----------------------------------------------------------------
Tetsuo Handa (2):
      tomoyo: Don't use nifty names on sockets.
      tomoyo: Suppress RCU warning at list_for_each_entry_rcu().

 security/tomoyo/common.c   |  9 ++++++---
 security/tomoyo/domain.c   | 15 ++++++++++-----
 security/tomoyo/group.c    |  9 ++++++---
 security/tomoyo/realpath.c | 32 +-------------------------------
 security/tomoyo/util.c     |  6 ++++--
 5 files changed, 27 insertions(+), 44 deletions(-)

^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: KP Singh @ 2019-12-30 14:58 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, James Morris, Kees Cook, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Greg Kroah-Hartman, Nicolas Ferre,
	Stanislav Fomichev, Quentin Monnet, Andrey Ignatov, Joe Stringer
In-Reply-To: <20191222012722.gdqhppxpfmqfqbld@ast-mbp.dhcp.thefacebook.com>

On 21-Dez 17:27, Alexei Starovoitov wrote:
> On Fri, Dec 20, 2019 at 04:41:55PM +0100, KP Singh wrote:
> > // Declare the eBPF program mprotect_audit which attaches to
> > // to the file_mprotect LSM hook and accepts three arguments.
> > BPF_TRACE_3("lsm/file_mprotect", mprotect_audit,
> > 	    struct vm_area_struct *, vma,
> > 	    unsigned long, reqprot, unsigned long, prot
> > {
> > 	unsigned long vm_start = _(vma->vm_start);
> > 	return 0;
> > }
> 

Hi Alexei,

Thanks for the feedback. This is really helpful!

> I think the only sore point of the patchset is:
> security/bpf/include/hooks.h   | 1015 ++++++++++++++++++++++++++++++++
> With bpf trampoline this type of 'kernel types -> bpf types' converters
> are no longer necessary. Please take a look at tcp-congestion-control patchset:
> https://patchwork.ozlabs.org/cover/1214417/
> Instead of doing similar thing (like your patch 1 plus patch 6) it's using
> trampoline to provide bpf based congestion control callbacks into tcp stack.
> The same trampoline-based mechanism can be reused by bpf_lsm.
> Then all manual work of doing BPF_LSM_HOOK(...) for every hook won't be
> necessary. It will also prove the point that attaching BPF to raw LSM hooks
> doesn't freeze them into stable abi.

Really cool!

I looked into how BPF trampolines are being used in tracing and the
new STRUCT_OPS patchset and was able protoype
(https://github.com/sinkap/linux-krsi/tree/patch/v1/trampoline_prototype,
not ready for review yet) which:

* Gets rid of security/bpf/include/hooks.h and all of the static
  macro magic essentially making the LSM ~truly instrumentable~ at
  runtime.
* Gets rid of the generation of any new types as we already have
  all the BTF information in the kernel in the following two types:

struct security_hook_heads {
        .
        .
        struct hlist_head file_mprotect;   <- Append the callback at this offset
        .
        .
};

and

union security_list_options {
	int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot,
				unsigned long prot);
};

Which is the same type as the typedef that's currently being generated
, i.e. lsm_btf_file_mprotect

In the current prototype, libbpf converts the name of the hook into an
offset into the security_hook_heads and the verifier does the
following when a program is loaded:

* Verifies the offset and the type at the offset (struct hlist_head).
* Resolves the func_proto (by looking up the type in
  security_list_options) and updates prog->aux with the name and
  func_proto which are then verified similar to raw_tp programs with
  btf_ctx_access.

On attachment:

* A trampoline is created and appended to the security_hook_heads
  for the BPF LSM.
* An anonymous FD is returned and the attachment is conditional on the
  references to FD (as suggested and similar to fentry/fexit tracing
  programs).

This implies that the BPF programs are "the LSM hook" as opposed to
being executed inside a statically defined hook body which requires
mutable LSM hooks for which I was able to re-use some of ideas in
Sargun's patch:

https://lore.kernel.org/lkml/20180408065916.GA2832@ircssh-2.c.rugged-nimbus-611.internal/

to maintain a separate security_hook_heads struct for dynamically
added LSM hooks by the BPF LSM which are executed after all the
statically defined hooks.

> Longer program names are supplied via btf's func_info.
> It feels that:
> cat /sys/kernel/security/bpf/process_execution
> env_dumper__v2
> is reinventing the wheel. bpftool is the main introspection tool.
> It can print progs attached to perf, cgroup, networking. I think it's better to
> stay consistent and do the same with bpf-lsm.

I agree, based on the new feedback, I don't think we need securityFS
attachment points anymore. I was able to get rid of it completely.

> 
> Another issue is in proposed attaching method:
> hook_fd = open("/sys/kernel/security/bpf/process_execution");
> sys_bpf(attach, prog_fd, hook_fd);
> With bpf tracing we moved to FD-based attaching, because permanent attaching is
> problematic in production. We're going to provide FD-based api to attach to
> networking as well, because xdp/tc/cgroup prog attaching suffers from the same
> production issues. Mainly with permanent attaching there is no ownership of
> attachment. Everything is global and permanent. It's not clear what
> process/script suppose to detach/cleanup. I suggest bpf-lsm use FD-based
> attaching from the beginning. Take a look at raw_tp/tp_btf/fentry/fexit style
> of attaching. All of them return FD which represents what libbpf calls
> 'bpf_link' concept. Once refcnt of that FD goes to zero that link (attachment)
> is destroyed and program is detached _by the kernel_. To make such links
> permanent the application can pin them in bpffs. The pinning patches haven't
> landed yet, but the concept of the link is quite powerful and much more
> production friendly than permanent attaching.

I like this. This also means we don't immediately need the handling of
duplicate names so I dropped that bit of the patch as well and updated
the attachment to use this mechanism.

> bpf-lsm will still be able to attach multiple progs to the same hook and
> see what is attached via bpftool.
> 
> The rest looks good. Thank you for working on it.

There are some choices we need to make here from an API perspective:

* Should we "repurpose" attr->attach_btf_id and use it as an offset
  into security_hook_heads or add a new attribute
  (e.g lsm_hook_offset) for the offset or use name of the LSM hook
  (e.g. lsm_hook_name).
* Since we don't have the files in securityFS, the attachment does not
  have a target_fd. Should we add a new type of BPF command?
  e.g. LSM_HOOK_OPEN?

I will clean up the prototype, incorporate some of the other feedback
received, and send a v2.

Wishing everyone a very Happy New Year!

- KP


^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: KP Singh @ 2019-12-30 15:04 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: KP Singh, open list, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <CAEf4BzYiUZtSJKh-UBL0jwyo6d=Cne2YtEyGU8ONykmSUSsuNA@mail.gmail.com>

On 23-Dec 22:51, Andrii Nakryiko wrote:
> On Fri, Dec 20, 2019 at 7:42 AM KP Singh <kpsingh@chromium.org> wrote:
> >
> > From: KP Singh <kpsingh@google.com>
> >
> > This patch series is a continuation of the KRSI RFC
> > (https://lore.kernel.org/bpf/20190910115527.5235-1-kpsingh@chromium.org/)
> >
> 
> [...]
> 
> > # Usage Examples
> >
> > A simple example and some documentation is included in the patchset.
> >
> > In order to better illustrate the capabilities of the framework some
> > more advanced prototype code has also been published separately:
> >
> > * Logging execution events (including environment variables and arguments):
> > https://github.com/sinkap/linux-krsi/blob/patch/v1/examples/samples/bpf/lsm_audit_env.c
> > * Detecting deletion of running executables:
> > https://github.com/sinkap/linux-krsi/blob/patch/v1/examples/samples/bpf/lsm_detect_exec_unlink.c
> > * Detection of writes to /proc/<pid>/mem:
> > https://github.com/sinkap/linux-krsi/blob/patch/v1/examples/samples/bpf/lsm_audit_env.c
> 
> Are you planning on submitting these examples for inclusion into
> samples/bpf or selftests/bpf? It would be great to have more examples
> and we can review and suggest nicer ways to go about writing them
> (e.g., BPF skeleton and global data Alexei mentioned earlier).

Eventually, yes and in selftest/bpf.

But these examples depend on using security blobs and some non-atomic
calls in the BPF helpers which are not handled as a part of the
initial patch-set.

Once we have the initial framework finalized, I will update the
examples and the helpers they are based on and send these separate
patch-sets on the list for review.

- KP

> 
> >
> > We have updated Google's internal telemetry infrastructure and have
> > started deploying this LSM on our Linux Workstations. This gives us more
> > confidence in the real-world applications of such a system.
> >
> > KP Singh (13):
> >   bpf: Refactor BPF_EVENT context macros to its own header.
> >   bpf: lsm: Add a skeleton and config options
> >   bpf: lsm: Introduce types for eBPF based LSM
> >   bpf: lsm: Allow btf_id based attachment for LSM hooks
> >   tools/libbpf: Add support in libbpf for BPF_PROG_TYPE_LSM
> >   bpf: lsm: Init Hooks and create files in securityfs
> >   bpf: lsm: Implement attach, detach and execution.
> >   bpf: lsm: Show attached program names in hook read handler.
> >   bpf: lsm: Add a helper function bpf_lsm_event_output
> >   bpf: lsm: Handle attachment of the same program
> >   tools/libbpf: Add bpf_program__attach_lsm
> >   bpf: lsm: Add selftests for BPF_PROG_TYPE_LSM
> >   bpf: lsm: Add Documentation
> >
> 
> [...]

^ permalink raw reply

* Re: [PATCH bpf-next v1 09/13] bpf: lsm: Add a helper function bpf_lsm_event_output
From: KP Singh @ 2019-12-30 15:11 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: KP Singh, open list, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <CAEf4BzYgcez2G1qJW9saJmzfeYirGdH58aAcUk-+YTJF6vyOuQ@mail.gmail.com>

On 23-Dec 22:36, Andrii Nakryiko wrote:
> On Fri, Dec 20, 2019 at 7:43 AM KP Singh <kpsingh@chromium.org> wrote:
> >
> > From: KP Singh <kpsingh@google.com>
> >
> > This helper is similar to bpf_perf_event_output except that
> > it does need a ctx argument which is more usable in the
> > BTF based LSM programs where the context is converted to
> > the signature of the attacthed BTF type.
> >
> > An example usage of this function would be:
> >
> > struct {
> >          __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
> >          __uint(key_size, sizeof(int));
> >          __uint(value_size, sizeof(u32));
> > } perf_map SEC(".maps");
> >
> > BPF_TRACE_1(bpf_prog1, "lsm/bprm_check_security,
> >             struct linux_binprm *, bprm)
> > {
> >         char buf[BUF_SIZE];
> >         int len;
> >         u64 flags = BPF_F_CURRENT_CPU;
> >
> >         /* some logic that fills up buf with len data */
> >         len = fill_up_buf(buf);
> >         if (len < 0)
> >                 return len;
> >         if (len > BU)
> >                 return 0;
> >
> >         bpf_lsm_event_output(&perf_map, flags, buf, len);
> 
> This seems to be generally useful and not LSM-specific, so maybe name
> it more generically as bpf_event_output instead?

Agreed, I am happy to rename this.

> 
> I'm also curious why we needed both bpf_perf_event_output and
> bpf_perf_event_output_raw_tp, if it could be done as simply as you did
> it here. What's different between those three and why your
> bpf_lsm_event_output doesn't need pt_regs passed into them?

That's because my implementation uses the following function from
bpf_trace.c:

u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)

This does not require a pt_regs argument and handles fetching them
internally:

	regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);

	perf_fetch_caller_regs(regs);
	perf_sample_data_init(sd, 0, 0);
	sd->raw = &raw;

	ret = __bpf_perf_event_output(regs, map, flags, sd);

- KP

> 
> >         return 0;
> > }
> >
> > Signed-off-by: KP Singh <kpsingh@google.com>
> > ---
> >  include/uapi/linux/bpf.h       | 10 +++++++++-
> >  kernel/bpf/verifier.c          |  1 +
> >  security/bpf/ops.c             | 21 +++++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h | 10 +++++++++-
> >  4 files changed, 40 insertions(+), 2 deletions(-)
> >
> 
> [...]

^ permalink raw reply

* Re: [PATCH bpf-next v1 06/13] bpf: lsm: Init Hooks and create files in securityfs
From: KP Singh @ 2019-12-30 15:37 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: KP Singh, open list, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <CAEf4BzZ+wMTjghpr4=e5AY9xeFjvm-Rc+JooJzJstBW1r73z4A@mail.gmail.com>

On 23-Dec 22:28, Andrii Nakryiko wrote:
> On Fri, Dec 20, 2019 at 7:43 AM KP Singh <kpsingh@chromium.org> wrote:
> >
> > From: KP Singh <kpsingh@google.com>
> >
> > The LSM creates files in securityfs for each hook registered with the
> > LSM.
> >
> >     /sys/kernel/security/bpf/<h_name>
> >
> > The list of LSM hooks are maintained in an internal header "hooks.h"
> > Eventually, this list should either be defined collectively in
> > include/linux/lsm_hooks.h or auto-generated from it.
> >
> > * Creation of a file for the hook in the securityfs.
> > * Allocation of a bpf_lsm_hook data structure which stores
> >   a pointer to the dentry of the newly created file in securityfs.
> > * Creation of a typedef for the hook so that BTF information
> >   can be generated for the LSM hooks to:
> >
> >   - Make them "Compile Once, Run Everywhere".
> >   - Pass the right arguments when the attached programs are run.
> >   - Verify the accesses made by the program by using the BTF
> >     information.
> >
> > Signed-off-by: KP Singh <kpsingh@google.com>
> > ---
> >  include/linux/bpf_lsm.h        |   12 +
> >  security/bpf/Makefile          |    4 +-
> >  security/bpf/include/bpf_lsm.h |   63 ++
> >  security/bpf/include/fs.h      |   23 +
> >  security/bpf/include/hooks.h   | 1015 ++++++++++++++++++++++++++++++++
> >  security/bpf/lsm.c             |  138 ++++-
> >  security/bpf/lsm_fs.c          |   82 +++
> >  7 files changed, 1333 insertions(+), 4 deletions(-)
> >  create mode 100644 include/linux/bpf_lsm.h
> >  create mode 100644 security/bpf/include/bpf_lsm.h
> >  create mode 100644 security/bpf/include/fs.h
> >  create mode 100644 security/bpf/include/hooks.h
> >  create mode 100644 security/bpf/lsm_fs.c
> >
> 
> [...]
> 
> > +
> > +/*
> > + * The hooks can have an int or void return type, these macros allow having a
> > + * single implementation of DEFINE_LSM_HOOK irrespective of the return type.
> > + */
> > +#define LSM_HOOK_RET(ret, x) LSM_HOOK_RET_##ret(x)
> > +#define LSM_HOOK_RET_int(x) x
> > +#define LSM_HOOK_RET_void(x)
> > +
> > +/*
> > + * This macro defines the body of a LSM hook which runs the eBPF programs that
> > + * are attached to the hook and returns the error code from the eBPF programs if
> > + * the return type of the hook is int.
> > + */
> > +#define DEFINE_LSM_HOOK(hook, ret, proto, args)                                \
> > +typedef ret (*lsm_btf_##hook)(proto);                                  \
> > +static ret bpf_lsm_##hook(proto)                                       \
> > +{                                                                      \
> > +       return LSM_HOOK_RET(ret, LSM_RUN_PROGS(hook##_type, args));     \
> >  }
> 
> I'm probably missing something, but according to LSM_HOOK_RET
> definition for when ret==void, bpf_lsm_##hook will be a noop and won't
> call any BPF program. Did I miss some additional macro magic?
> 

Good catch! You're right. These macros will not be there in v2 as
we move to using trampolines based callbacks.

> >
> > +/*
> > + * Define the body of each of the LSM hooks defined in hooks.h.
> > + */
> > +#define BPF_LSM_HOOK(hook, ret, args, proto) \
> > +       DEFINE_LSM_HOOK(hook, ret, BPF_LSM_ARGS(args), BPF_LSM_ARGS(proto))
> > +#include "hooks.h"
> > +#undef BPF_LSM_HOOK
> > +#undef DEFINE_LSM_HOOK
> > +
> > +/*
> > + * Initialize the bpf_lsm_hooks_list for each of the hooks defined in hooks.h.
> > + * The list contains information for each of the hook and can be indexed by the
> > + * its type to initialize security FS, attach, detach and execute eBPF programs
> > + * for the hook.
> > + */
> > +struct bpf_lsm_hook bpf_lsm_hooks_list[] = {
> > +       #define BPF_LSM_HOOK(h, ...)                                    \
> > +               [h##_type] = {                                          \
> > +                       .h_type = h##_type,                             \
> > +                       .mutex = __MUTEX_INITIALIZER(                   \
> > +                               bpf_lsm_hooks_list[h##_type].mutex),    \
> > +                       .name = #h,                                     \
> > +                       .btf_hook_func =                                \
> > +                               (void *)(lsm_btf_##h)(bpf_lsm_##h),     \
> 
> this btf_hook_func, is it assigned just so that type information for
> bpf_lsm_xxx typedefs are preserved, is that right? It doesn't seem to
> be ever called or read. If I'm not missing anything, check out
> Martin's latest STRUCT_OPS patch set. He defines EMIT_TYPE_INFO(type)
> macro, which will ensure that BTF for specified type is emitted into
> vmlinux BTF, without actually using any extra space, defining extra
> fields or static variables, etc. I suggest using the same for the
> cleanest result.
> 
> One more thing regarding lsm_bpf_ typedefs. Currently you are defining
> them as a pointer to func_proto, matching LSM hook. There is an
> alternative approach, which has few benefits over using func_proto. If
> instead you define a struct, where each argument of func prototype is
> represented as 8-byte aligned field, this will contain all the
> necessary information for BPF verifier to do its job (just like
> func_proto). But in addition to that, when vmlinux.h is generated, it
> will contain a nice struct bpf_lsm_<hook_name> with correct structure
> to be used **directly** in BPF program, as a single context argument.
> So with vmlinux.h, users won't have to re-define all the argument
> types and names in their BPF_TRACE_x definition. Let me provide
> concrete example from your cover letter. This is what you provide as
> an example:

Is this also doable for the new approach suggsted by Alexei
and prototyped in?

https://lore.kernel.org/bpf/CAEf4BzYiUZtSJKh-UBL0jwyo6d=Cne2YtEyGU8ONykmSUSsuNA@mail.gmail.com/T/#m7c7ec0e7d8e803c6c357495d9eea59028a67cac6

which uses trampolines. The new approach gets rid of any type
generation and macros in security/bpf/lsm_hooks.h. Maybe the
btf_vmlinux can be augmented at runtime to generate context struct
upon attachment?

> 
> BPF_TRACE_3("lsm/file_mprotect", mprotect_audit,
>             struct vm_area_struct *, vma,
>             unsigned long, reqprot, unsigned long, prot) {...}
> 
> on kernel side, you'll have:
> 
> typedef int (*bpf_lsm_file_mprotect)(struct vm_area_struct *vma,
>                                      unsigned long reqprot,
>                                      unsigned long prot);
> 
> So you can see that user has to go and copy/paste all the arguments
> and their types and paste them in this verbose BPF_TRACE_3 macro to
> define correct BPF program.
> 
> Now, imagine that instead of typedef above, we define equivalent struct:
> 
> struct bpf_lsm_file_mprotect {
>     struct vm_area_struct *vma;
>     unsigned long reqprot;
>     unsigned long prot;
> };
> 
> This type will get dumped into vmlinux.h, which can be used from BPF
> user code as such:
> 
> SEC("lsm/file_mprotect")
> int mprotect_audito(struct bpf_lsm_file_mprotect *ctx)
> {
>     ... here you can use ctx->vma, ctx->reqprot, ctx->prot ...
> }
> 
> 
> Meanwhile, there will be just minimal changes to BPF verifier to use
> such struct instead of func_proto for verification of LSM programs.
> 
> We currently have similar issue with raw_tp programs and I've been
> thinking about switching that to structs instead of func_proto, so we
> might as well coordinate that and reuse the same logic in BPF
> verifier.
> 
> Thoughts?

Thanks for the explanation!

Using structs is definitely better if we chose to go with static type
generation.

- KP

> 
> 
> 
> > +               },
> > +       #include "hooks.h"
> > +       #undef BPF_LSM_HOOK
> > +};
> > +
> 
> [...]

^ permalink raw reply

* Re: [PATCH bpf-next v1 06/13] bpf: lsm: Init Hooks and create files in securityfs
From: Andrii Nakryiko @ 2019-12-30 18:52 UTC (permalink / raw)
  To: KP Singh
  Cc: open list, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, James Morris, Kees Cook, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Greg Kroah-Hartman, Nicolas Ferre,
	Stanislav Fomichev, Quentin Monnet, Andrey Ignatov, Joe Stringer
In-Reply-To: <20191230153711.GD70684@google.com>

On Mon, Dec 30, 2019 at 7:37 AM KP Singh <kpsingh@chromium.org> wrote:
>
> On 23-Dec 22:28, Andrii Nakryiko wrote:
> > On Fri, Dec 20, 2019 at 7:43 AM KP Singh <kpsingh@chromium.org> wrote:
> > >
> > > From: KP Singh <kpsingh@google.com>
> > >
> > > The LSM creates files in securityfs for each hook registered with the
> > > LSM.
> > >
> > >     /sys/kernel/security/bpf/<h_name>
> > >
> > > The list of LSM hooks are maintained in an internal header "hooks.h"
> > > Eventually, this list should either be defined collectively in
> > > include/linux/lsm_hooks.h or auto-generated from it.
> > >
> > > * Creation of a file for the hook in the securityfs.
> > > * Allocation of a bpf_lsm_hook data structure which stores
> > >   a pointer to the dentry of the newly created file in securityfs.
> > > * Creation of a typedef for the hook so that BTF information
> > >   can be generated for the LSM hooks to:
> > >
> > >   - Make them "Compile Once, Run Everywhere".
> > >   - Pass the right arguments when the attached programs are run.
> > >   - Verify the accesses made by the program by using the BTF
> > >     information.
> > >
> > > Signed-off-by: KP Singh <kpsingh@google.com>
> > > ---
> > >  include/linux/bpf_lsm.h        |   12 +
> > >  security/bpf/Makefile          |    4 +-
> > >  security/bpf/include/bpf_lsm.h |   63 ++
> > >  security/bpf/include/fs.h      |   23 +
> > >  security/bpf/include/hooks.h   | 1015 ++++++++++++++++++++++++++++++++
> > >  security/bpf/lsm.c             |  138 ++++-
> > >  security/bpf/lsm_fs.c          |   82 +++
> > >  7 files changed, 1333 insertions(+), 4 deletions(-)
> > >  create mode 100644 include/linux/bpf_lsm.h
> > >  create mode 100644 security/bpf/include/bpf_lsm.h
> > >  create mode 100644 security/bpf/include/fs.h
> > >  create mode 100644 security/bpf/include/hooks.h
> > >  create mode 100644 security/bpf/lsm_fs.c
> > >
> >
> > [...]
> >
> > > +
> > > +/*
> > > + * The hooks can have an int or void return type, these macros allow having a
> > > + * single implementation of DEFINE_LSM_HOOK irrespective of the return type.
> > > + */
> > > +#define LSM_HOOK_RET(ret, x) LSM_HOOK_RET_##ret(x)
> > > +#define LSM_HOOK_RET_int(x) x
> > > +#define LSM_HOOK_RET_void(x)
> > > +
> > > +/*
> > > + * This macro defines the body of a LSM hook which runs the eBPF programs that
> > > + * are attached to the hook and returns the error code from the eBPF programs if
> > > + * the return type of the hook is int.
> > > + */
> > > +#define DEFINE_LSM_HOOK(hook, ret, proto, args)                                \
> > > +typedef ret (*lsm_btf_##hook)(proto);                                  \
> > > +static ret bpf_lsm_##hook(proto)                                       \
> > > +{                                                                      \
> > > +       return LSM_HOOK_RET(ret, LSM_RUN_PROGS(hook##_type, args));     \
> > >  }
> >
> > I'm probably missing something, but according to LSM_HOOK_RET
> > definition for when ret==void, bpf_lsm_##hook will be a noop and won't
> > call any BPF program. Did I miss some additional macro magic?
> >
>
> Good catch! You're right. These macros will not be there in v2 as
> we move to using trampolines based callbacks.

Cool, no worries.

>
> > >
> > > +/*
> > > + * Define the body of each of the LSM hooks defined in hooks.h.
> > > + */
> > > +#define BPF_LSM_HOOK(hook, ret, args, proto) \
> > > +       DEFINE_LSM_HOOK(hook, ret, BPF_LSM_ARGS(args), BPF_LSM_ARGS(proto))
> > > +#include "hooks.h"
> > > +#undef BPF_LSM_HOOK
> > > +#undef DEFINE_LSM_HOOK
> > > +
> > > +/*
> > > + * Initialize the bpf_lsm_hooks_list for each of the hooks defined in hooks.h.
> > > + * The list contains information for each of the hook and can be indexed by the
> > > + * its type to initialize security FS, attach, detach and execute eBPF programs
> > > + * for the hook.
> > > + */
> > > +struct bpf_lsm_hook bpf_lsm_hooks_list[] = {
> > > +       #define BPF_LSM_HOOK(h, ...)                                    \
> > > +               [h##_type] = {                                          \
> > > +                       .h_type = h##_type,                             \
> > > +                       .mutex = __MUTEX_INITIALIZER(                   \
> > > +                               bpf_lsm_hooks_list[h##_type].mutex),    \
> > > +                       .name = #h,                                     \
> > > +                       .btf_hook_func =                                \
> > > +                               (void *)(lsm_btf_##h)(bpf_lsm_##h),     \
> >
> > this btf_hook_func, is it assigned just so that type information for
> > bpf_lsm_xxx typedefs are preserved, is that right? It doesn't seem to
> > be ever called or read. If I'm not missing anything, check out
> > Martin's latest STRUCT_OPS patch set. He defines EMIT_TYPE_INFO(type)
> > macro, which will ensure that BTF for specified type is emitted into
> > vmlinux BTF, without actually using any extra space, defining extra
> > fields or static variables, etc. I suggest using the same for the
> > cleanest result.
> >
> > One more thing regarding lsm_bpf_ typedefs. Currently you are defining
> > them as a pointer to func_proto, matching LSM hook. There is an
> > alternative approach, which has few benefits over using func_proto. If
> > instead you define a struct, where each argument of func prototype is
> > represented as 8-byte aligned field, this will contain all the
> > necessary information for BPF verifier to do its job (just like
> > func_proto). But in addition to that, when vmlinux.h is generated, it
> > will contain a nice struct bpf_lsm_<hook_name> with correct structure
> > to be used **directly** in BPF program, as a single context argument.
> > So with vmlinux.h, users won't have to re-define all the argument
> > types and names in their BPF_TRACE_x definition. Let me provide
> > concrete example from your cover letter. This is what you provide as
> > an example:
>
> Is this also doable for the new approach suggsted by Alexei
> and prototyped in?
>
> https://lore.kernel.org/bpf/CAEf4BzYiUZtSJKh-UBL0jwyo6d=Cne2YtEyGU8ONykmSUSsuNA@mail.gmail.com/T/#m7c7ec0e7d8e803c6c357495d9eea59028a67cac6
>
> which uses trampolines. The new approach gets rid of any type
> generation and macros in security/bpf/lsm_hooks.h. Maybe the
> btf_vmlinux can be augmented at runtime to generate context struct
> upon attachment?

If it doesn't generate "controllable" types (and seems like existing
types are not readily usable as well), then we can't use vmlinux's BTF
as is. But we can augment vmlinux.h header during generation time,
based on naming convention and extra post-processing of vmlinux BTF.
That's sort of a point of special `format core` mode in bpftool, which
we currently discuss on mailing list as well, see [0].

  [0] https://lore.kernel.org/bpf/CAEf4BzY4ffWaeFckPuqNGNAU1uBG3TmTK+CjY1LVa2G+RGz=cA@mail.gmail.com/T/#u


>
> >
> > BPF_TRACE_3("lsm/file_mprotect", mprotect_audit,
> >             struct vm_area_struct *, vma,
> >             unsigned long, reqprot, unsigned long, prot) {...}
> >
> > on kernel side, you'll have:
> >
> > typedef int (*bpf_lsm_file_mprotect)(struct vm_area_struct *vma,
> >                                      unsigned long reqprot,
> >                                      unsigned long prot);
> >
> > So you can see that user has to go and copy/paste all the arguments
> > and their types and paste them in this verbose BPF_TRACE_3 macro to
> > define correct BPF program.
> >
> > Now, imagine that instead of typedef above, we define equivalent struct:
> >
> > struct bpf_lsm_file_mprotect {
> >     struct vm_area_struct *vma;
> >     unsigned long reqprot;
> >     unsigned long prot;
> > };
> >
> > This type will get dumped into vmlinux.h, which can be used from BPF
> > user code as such:
> >
> > SEC("lsm/file_mprotect")
> > int mprotect_audito(struct bpf_lsm_file_mprotect *ctx)
> > {
> >     ... here you can use ctx->vma, ctx->reqprot, ctx->prot ...
> > }
> >
> >
> > Meanwhile, there will be just minimal changes to BPF verifier to use
> > such struct instead of func_proto for verification of LSM programs.
> >
> > We currently have similar issue with raw_tp programs and I've been
> > thinking about switching that to structs instead of func_proto, so we
> > might as well coordinate that and reuse the same logic in BPF
> > verifier.
> >
> > Thoughts?
>
> Thanks for the explanation!
>
> Using structs is definitely better if we chose to go with static type
> generation.
>

Yes, that's what I think is preferable for tp_btf programs as well.

> - KP
>
> >
> >
> >
> > > +               },
> > > +       #include "hooks.h"
> > > +       #undef BPF_LSM_HOOK
> > > +};
> > > +
> >
> > [...]

^ permalink raw reply

* Re: [PATCH bpf-next v1 09/13] bpf: lsm: Add a helper function bpf_lsm_event_output
From: Andrii Nakryiko @ 2019-12-30 18:56 UTC (permalink / raw)
  To: KP Singh
  Cc: open list, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, James Morris, Kees Cook, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Greg Kroah-Hartman, Nicolas Ferre,
	Stanislav Fomichev, Quentin Monnet, Andrey Ignatov, Joe Stringer
In-Reply-To: <20191230151135.GC70684@google.com>

On Mon, Dec 30, 2019 at 7:11 AM KP Singh <kpsingh@chromium.org> wrote:
>
> On 23-Dec 22:36, Andrii Nakryiko wrote:
> > On Fri, Dec 20, 2019 at 7:43 AM KP Singh <kpsingh@chromium.org> wrote:
> > >
> > > From: KP Singh <kpsingh@google.com>
> > >
> > > This helper is similar to bpf_perf_event_output except that
> > > it does need a ctx argument which is more usable in the
> > > BTF based LSM programs where the context is converted to
> > > the signature of the attacthed BTF type.
> > >
> > > An example usage of this function would be:
> > >
> > > struct {
> > >          __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
> > >          __uint(key_size, sizeof(int));
> > >          __uint(value_size, sizeof(u32));
> > > } perf_map SEC(".maps");
> > >
> > > BPF_TRACE_1(bpf_prog1, "lsm/bprm_check_security,
> > >             struct linux_binprm *, bprm)
> > > {
> > >         char buf[BUF_SIZE];
> > >         int len;
> > >         u64 flags = BPF_F_CURRENT_CPU;
> > >
> > >         /* some logic that fills up buf with len data */
> > >         len = fill_up_buf(buf);
> > >         if (len < 0)
> > >                 return len;
> > >         if (len > BU)
> > >                 return 0;
> > >
> > >         bpf_lsm_event_output(&perf_map, flags, buf, len);
> >
> > This seems to be generally useful and not LSM-specific, so maybe name
> > it more generically as bpf_event_output instead?
>
> Agreed, I am happy to rename this.
>
> >
> > I'm also curious why we needed both bpf_perf_event_output and
> > bpf_perf_event_output_raw_tp, if it could be done as simply as you did
> > it here. What's different between those three and why your
> > bpf_lsm_event_output doesn't need pt_regs passed into them?
>
> That's because my implementation uses the following function from
> bpf_trace.c:
>
> u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
>                      void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
>
> This does not require a pt_regs argument and handles fetching them
> internally:
>
>         regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
>
>         perf_fetch_caller_regs(regs);
>         perf_sample_data_init(sd, 0, 0);
>         sd->raw = &raw;
>
>         ret = __bpf_perf_event_output(regs, map, flags, sd);
>
> - KP

Yeah, I saw that bit. I guess I'm confused why we couldn't do the same
for, say, raw_tracepoint case. Now Jiri Olsa is adding another similar
helper doing its own storage of pt_regs. If all of them can share the
same (even if with bigger nest_level) array of pt_regs, that would
great.

>
> >
> > >         return 0;
> > > }
> > >
> > > Signed-off-by: KP Singh <kpsingh@google.com>
> > > ---
> > >  include/uapi/linux/bpf.h       | 10 +++++++++-
> > >  kernel/bpf/verifier.c          |  1 +
> > >  security/bpf/ops.c             | 21 +++++++++++++++++++++
> > >  tools/include/uapi/linux/bpf.h | 10 +++++++++-
> > >  4 files changed, 40 insertions(+), 2 deletions(-)
> > >
> >
> > [...]

^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: Andrii Nakryiko @ 2019-12-30 18:58 UTC (permalink / raw)
  To: KP Singh
  Cc: open list, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, James Morris, Kees Cook, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Greg Kroah-Hartman, Nicolas Ferre,
	Stanislav Fomichev, Quentin Monnet, Andrey Ignatov, Joe Stringer
In-Reply-To: <20191230150424.GB70684@google.com>

On Mon, Dec 30, 2019 at 7:04 AM KP Singh <kpsingh@chromium.org> wrote:
>
> On 23-Dec 22:51, Andrii Nakryiko wrote:
> > On Fri, Dec 20, 2019 at 7:42 AM KP Singh <kpsingh@chromium.org> wrote:
> > >
> > > From: KP Singh <kpsingh@google.com>
> > >
> > > This patch series is a continuation of the KRSI RFC
> > > (https://lore.kernel.org/bpf/20190910115527.5235-1-kpsingh@chromium.org/)
> > >
> >
> > [...]
> >
> > > # Usage Examples
> > >
> > > A simple example and some documentation is included in the patchset.
> > >
> > > In order to better illustrate the capabilities of the framework some
> > > more advanced prototype code has also been published separately:
> > >
> > > * Logging execution events (including environment variables and arguments):
> > > https://github.com/sinkap/linux-krsi/blob/patch/v1/examples/samples/bpf/lsm_audit_env.c
> > > * Detecting deletion of running executables:
> > > https://github.com/sinkap/linux-krsi/blob/patch/v1/examples/samples/bpf/lsm_detect_exec_unlink.c
> > > * Detection of writes to /proc/<pid>/mem:
> > > https://github.com/sinkap/linux-krsi/blob/patch/v1/examples/samples/bpf/lsm_audit_env.c
> >
> > Are you planning on submitting these examples for inclusion into
> > samples/bpf or selftests/bpf? It would be great to have more examples
> > and we can review and suggest nicer ways to go about writing them
> > (e.g., BPF skeleton and global data Alexei mentioned earlier).
>
> Eventually, yes and in selftest/bpf.
>
> But these examples depend on using security blobs and some non-atomic
> calls in the BPF helpers which are not handled as a part of the
> initial patch-set.
>
> Once we have the initial framework finalized, I will update the
> examples and the helpers they are based on and send these separate
> patch-sets on the list for review.

Great! The reason I was asking is that once they are in selftests, it
would be nice to switch them to use all the latest BPF usability
improvements to make code cleaner and have it as another good example
of modern BPF program. Like use BTF-defined maps, BPF skeleton,
vmlinux.h, etc. We can go over this when the time comes, though :)


>
> - KP
>
> >
> > >
> > > We have updated Google's internal telemetry infrastructure and have
> > > started deploying this LSM on our Linux Workstations. This gives us more
> > > confidence in the real-world applications of such a system.
> > >
> > > KP Singh (13):
> > >   bpf: Refactor BPF_EVENT context macros to its own header.
> > >   bpf: lsm: Add a skeleton and config options
> > >   bpf: lsm: Introduce types for eBPF based LSM
> > >   bpf: lsm: Allow btf_id based attachment for LSM hooks
> > >   tools/libbpf: Add support in libbpf for BPF_PROG_TYPE_LSM
> > >   bpf: lsm: Init Hooks and create files in securityfs
> > >   bpf: lsm: Implement attach, detach and execution.
> > >   bpf: lsm: Show attached program names in hook read handler.
> > >   bpf: lsm: Add a helper function bpf_lsm_event_output
> > >   bpf: lsm: Handle attachment of the same program
> > >   tools/libbpf: Add bpf_program__attach_lsm
> > >   bpf: lsm: Add selftests for BPF_PROG_TYPE_LSM
> > >   bpf: lsm: Add Documentation
> > >
> >
> > [...]

^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: Andrii Nakryiko @ 2019-12-30 19:14 UTC (permalink / raw)
  To: KP Singh
  Cc: Alexei Starovoitov, open list, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, James Morris, Kees Cook,
	Thomas Garnier, Michael Halcrow, Paul Turner, Brendan Gregg,
	Jann Horn, Matthew Garrett, Christian Brauner,
	Mickaël Salaün, Florent Revest, Brendan Jackman,
	Martin KaFai Lau, Song Liu, Yonghong Song, Serge E. Hallyn,
	Mauro Carvalho Chehab, David S. Miller, Greg Kroah-Hartman,
	Nicolas Ferre, Stanislav Fomichev, Quentin Monnet, Andrey Ignatov,
	Joe Stringer
In-Reply-To: <20191230145846.GA70684@google.com>

On Mon, Dec 30, 2019 at 6:59 AM KP Singh <kpsingh@chromium.org> wrote:
>
> On 21-Dez 17:27, Alexei Starovoitov wrote:
> > On Fri, Dec 20, 2019 at 04:41:55PM +0100, KP Singh wrote:
> > > // Declare the eBPF program mprotect_audit which attaches to
> > > // to the file_mprotect LSM hook and accepts three arguments.
> > > BPF_TRACE_3("lsm/file_mprotect", mprotect_audit,
> > >         struct vm_area_struct *, vma,
> > >         unsigned long, reqprot, unsigned long, prot
> > > {
> > >     unsigned long vm_start = _(vma->vm_start);
> > >     return 0;
> > > }
> >
>
> Hi Alexei,
>
> Thanks for the feedback. This is really helpful!
>
> > I think the only sore point of the patchset is:
> > security/bpf/include/hooks.h   | 1015 ++++++++++++++++++++++++++++++++
> > With bpf trampoline this type of 'kernel types -> bpf types' converters
> > are no longer necessary. Please take a look at tcp-congestion-control patchset:
> > https://patchwork.ozlabs.org/cover/1214417/
> > Instead of doing similar thing (like your patch 1 plus patch 6) it's using
> > trampoline to provide bpf based congestion control callbacks into tcp stack.
> > The same trampoline-based mechanism can be reused by bpf_lsm.
> > Then all manual work of doing BPF_LSM_HOOK(...) for every hook won't be
> > necessary. It will also prove the point that attaching BPF to raw LSM hooks
> > doesn't freeze them into stable abi.
>
> Really cool!
>
> I looked into how BPF trampolines are being used in tracing and the
> new STRUCT_OPS patchset and was able protoype
> (https://github.com/sinkap/linux-krsi/tree/patch/v1/trampoline_prototype,
> not ready for review yet) which:
>
> * Gets rid of security/bpf/include/hooks.h and all of the static
>   macro magic essentially making the LSM ~truly instrumentable~ at
>   runtime.
> * Gets rid of the generation of any new types as we already have
>   all the BTF information in the kernel in the following two types:
>
> struct security_hook_heads {
>         .
>         .
>         struct hlist_head file_mprotect;   <- Append the callback at this offset
>         .
>         .
> };
>
> and
>
> union security_list_options {
>         int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot,
>                                 unsigned long prot);
> };
>
> Which is the same type as the typedef that's currently being generated
> , i.e. lsm_btf_file_mprotect
>
> In the current prototype, libbpf converts the name of the hook into an
> offset into the security_hook_heads and the verifier does the
> following when a program is loaded:
>
> * Verifies the offset and the type at the offset (struct hlist_head).
> * Resolves the func_proto (by looking up the type in
>   security_list_options) and updates prog->aux with the name and
>   func_proto which are then verified similar to raw_tp programs with
>   btf_ctx_access.
>
> On attachment:
>
> * A trampoline is created and appended to the security_hook_heads
>   for the BPF LSM.
> * An anonymous FD is returned and the attachment is conditional on the
>   references to FD (as suggested and similar to fentry/fexit tracing
>   programs).
>
> This implies that the BPF programs are "the LSM hook" as opposed to
> being executed inside a statically defined hook body which requires
> mutable LSM hooks for which I was able to re-use some of ideas in
> Sargun's patch:
>
> https://lore.kernel.org/lkml/20180408065916.GA2832@ircssh-2.c.rugged-nimbus-611.internal/
>
> to maintain a separate security_hook_heads struct for dynamically
> added LSM hooks by the BPF LSM which are executed after all the
> statically defined hooks.
>
> > Longer program names are supplied via btf's func_info.
> > It feels that:
> > cat /sys/kernel/security/bpf/process_execution
> > env_dumper__v2
> > is reinventing the wheel. bpftool is the main introspection tool.
> > It can print progs attached to perf, cgroup, networking. I think it's better to
> > stay consistent and do the same with bpf-lsm.
>
> I agree, based on the new feedback, I don't think we need securityFS
> attachment points anymore. I was able to get rid of it completely.
>
> >
> > Another issue is in proposed attaching method:
> > hook_fd = open("/sys/kernel/security/bpf/process_execution");
> > sys_bpf(attach, prog_fd, hook_fd);
> > With bpf tracing we moved to FD-based attaching, because permanent attaching is
> > problematic in production. We're going to provide FD-based api to attach to
> > networking as well, because xdp/tc/cgroup prog attaching suffers from the same
> > production issues. Mainly with permanent attaching there is no ownership of
> > attachment. Everything is global and permanent. It's not clear what
> > process/script suppose to detach/cleanup. I suggest bpf-lsm use FD-based
> > attaching from the beginning. Take a look at raw_tp/tp_btf/fentry/fexit style
> > of attaching. All of them return FD which represents what libbpf calls
> > 'bpf_link' concept. Once refcnt of that FD goes to zero that link (attachment)
> > is destroyed and program is detached _by the kernel_. To make such links
> > permanent the application can pin them in bpffs. The pinning patches haven't
> > landed yet, but the concept of the link is quite powerful and much more
> > production friendly than permanent attaching.
>
> I like this. This also means we don't immediately need the handling of
> duplicate names so I dropped that bit of the patch as well and updated
> the attachment to use this mechanism.
>
> > bpf-lsm will still be able to attach multiple progs to the same hook and
> > see what is attached via bpftool.
> >
> > The rest looks good. Thank you for working on it.
>
> There are some choices we need to make here from an API perspective:
>
> * Should we "repurpose" attr->attach_btf_id and use it as an offset
>   into security_hook_heads or add a new attribute
>   (e.g lsm_hook_offset) for the offset or use name of the LSM hook
>   (e.g. lsm_hook_name).

I think setting this to member index inside union
security_list_options will be better? Or member index inside struct
security_hook_heads. Seems like kernel will have to "join" those two
anyways, right (one for type info, another for trampoline)? Offset is
less convenient either way.

> * Since we don't have the files in securityFS, the attachment does not
>   have a target_fd. Should we add a new type of BPF command?
>   e.g. LSM_HOOK_OPEN?

Semantics of LSM program seems closer to fentry/fexit/raw_tp, so maybe
instead use BPF_RAW_TRACEPOINT_OPEN command? On libbpf side it's all
going to be abstracted behind bpf_program__attach() anyways.

>
> I will clean up the prototype, incorporate some of the other feedback
> received, and send a v2.
>
> Wishing everyone a very Happy New Year!

Thanks, you too!

>
> - KP
>

^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: Kees Cook @ 2019-12-30 19:15 UTC (permalink / raw)
  To: KP Singh
  Cc: Casey Schaufler, open list, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, James Morris, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Greg Kroah-Hartman, Nicolas Ferre,
	Stanislav Fomichev, Quentin Monnet, Andrey Ignatov, Joe Stringer
In-Reply-To: <CACYkzJ5nYh7eGuru4vQ=2ZWumGPszBRbgqxmhd4WQRXktAUKkQ@mail.gmail.com>

On Fri, Dec 20, 2019 at 06:38:45PM +0100, KP Singh wrote:
> Hi Casey,
> 
> Thanks for taking a look!
> 
> On Fri, Dec 20, 2019 at 6:17 PM Casey Schaufler <casey@schaufler-ca.com> wrote:
> >
> > On 12/20/2019 7:41 AM, KP Singh wrote:
> > > From: KP Singh <kpsingh@google.com>
> > >
> > > This patch series is a continuation of the KRSI RFC
> > > (https://lore.kernel.org/bpf/20190910115527.5235-1-kpsingh@chromium.org/)
> > >
> > > # Motivation
> > >
> > > Google does rich analysis of runtime security data collected from
> > > internal Linux deployments (corporate devices and servers) to detect and
> > > thwart threats in real-time. Currently, this is done in custom kernel
> > > modules but we would like to replace this with something that's upstream
> > > and useful to others.
> > >
> > > The current kernel infrastructure for providing telemetry (Audit, Perf
> > > etc.) is disjoint from access enforcement (i.e. LSMs).  Augmenting the
> > > information provided by audit requires kernel changes to audit, its
> > > policy language and user-space components. Furthermore, building a MAC
> > > policy based on the newly added telemetry data requires changes to
> > > various LSMs and their respective policy languages.
> > >
> > > This patchset proposes a new stackable and privileged LSM which allows
> > > the LSM hooks to be implemented using eBPF. This facilitates a unified
> > > and dynamic (not requiring re-compilation of the kernel) audit and MAC
> > > policy.
> > >
> > > # Why an LSM?
> > >
> > > Linux Security Modules target security behaviours rather than the
> > > kernel's API. For example, it's easy to miss out a newly added system
> > > call for executing processes (eg. execve, execveat etc.) but the LSM
> > > framework ensures that all process executions trigger the relevant hooks
> > > irrespective of how the process was executed.
> > >
> > > Allowing users to implement LSM hooks at runtime also benefits the LSM
> > > eco-system by enabling a quick feedback loop from the security community
> > > about the kind of behaviours that the LSM Framework should be targeting.
> > >
> > > # How does it work?
> > >
> > > The LSM introduces a new eBPF (https://docs.cilium.io/en/v1.6/bpf/)
> > > program type, BPF_PROG_TYPE_LSM, which can only be attached to a LSM
> > > hook.  All LSM hooks are exposed as files in securityfs. Attachment
> > > requires CAP_SYS_ADMIN for loading eBPF programs and CAP_MAC_ADMIN for
> > > modifying MAC policies.
> > >
> > > The eBPF programs are passed the same arguments as the LSM hooks and
> > > executed in the body of the hook.
> >
> > This effectively exposes the LSM hooks as external APIs.
> > It would mean that we can't change or delete them. That
> > would be bad.
> 
> Perhaps this should have been clearer, we *do not* want to make LSM hooks
> a stable API and expect the eBPF programs to adapt when such changes occur.
> 
> Based on our comparison with the previous approach, this still ends up
> being a better trade-off (w.r.t. maintenance) when compared to adding
> specific helpers or verifier logic for  each new hook or field that
> needs to be exposed.

Given the discussion around tracing and stable ABI at the last kernel
summit, Linus's mandate is mainly around "every day users" and not
around these system-builder-sensitive cases where everyone has a strong
expectation to rebuild their policy when the kernel changes. i.e. it's
not "powertop", which was Linus's example of "and then everyone running
Fedora breaks".

So, while I know we've tried in the past to follow the letter of the
law, it seems Linus really expects this only to be followed when it will
have "real world" impact on unsuspecting end users.

Obviously James Morris has the final say here, but as I understand it,
it is fine to expose these here for the same reasons it's fine to expose
the (ever changing) tracepoints and BPF hooks.

-Kees

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH bpf-next v1 06/13] bpf: lsm: Init Hooks and create files in securityfs
From: Kees Cook @ 2019-12-30 19:20 UTC (permalink / raw)
  To: KP Singh
  Cc: Andrii Nakryiko, open list, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, James Morris, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Greg Kroah-Hartman, Nicolas Ferre,
	Stanislav Fomichev, Quentin Monnet, Andrey Ignatov, Joe Stringer
In-Reply-To: <20191230153711.GD70684@google.com>

On Mon, Dec 30, 2019 at 04:37:11PM +0100, KP Singh wrote:
> On 23-Dec 22:28, Andrii Nakryiko wrote:
> > On Fri, Dec 20, 2019 at 7:43 AM KP Singh <kpsingh@chromium.org> wrote:
> > [...]
> 
> Good catch! You're right. These macros will not be there in v2 as
> we move to using trampolines based callbacks.

Speaking of which -- is the BPF trampoline code correctly designed to be
W^X?

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH bpf-next v1 04/13] bpf: lsm: Allow btf_id based attachment for LSM hooks
From: KP Singh @ 2019-12-30 19:22 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: open list, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, James Morris, Kees Cook, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Mickaël Salaün,
	Florent Revest, Brendan Jackman, Martin KaFai Lau, Song Liu,
	Yonghong Song, Serge E. Hallyn, Mauro Carvalho Chehab,
	David S. Miller, Greg Kroah-Hartman, Nicolas Ferre,
	Stanislav Fomichev, Quentin Monnet, Andrey Ignatov, Joe Stringer
In-Reply-To: <CAEf4BzaJ7YdSofV9-_D5zGC4GrwRvdPY3xyx7p+1rPD=Km2aXQ@mail.gmail.com>

On 23-Dez 15:54, Andrii Nakryiko wrote:
> On Fri, Dec 20, 2019 at 7:42 AM KP Singh <kpsingh@chromium.org> wrote:
> >
> > From: KP Singh <kpsingh@google.com>
> >
> > Refactor and re-use most of the logic for BPF_PROG_TYPE_TRACING with a few
> > changes.
> >
> > - The LSM hook BTF types are prefixed with "lsm_btf_"

Got rid of this for v2 as we are using trampoline. Will keep this in
mind if we ever need to generate type information. Thanks!

> 
> btf_trace_ and btf_struct_ops all have btf_ first, let's keep this consistent.
> 
> > - These types do not need the first (void *) pointer argument. The verifier
> >   only looks for this argument if prod->aux->attach_btf_trace is set.
> >
> > Signed-off-by: KP Singh <kpsingh@google.com>
> > ---
> >  kernel/bpf/syscall.c  |  1 +
> >  kernel/bpf/verifier.c | 83 ++++++++++++++++++++++++++++++++++++++++---
> >  2 files changed, 80 insertions(+), 4 deletions(-)
> >
> 
> [...]
> 
> > +
> > +       t = btf_type_by_id(btf_vmlinux, btf_id);
> > +       if (!t) {
> > +               verbose(env, "attach_btf_id %u is invalid\n", btf_id);
> > +               return -EINVAL;
> > +       }
> > +
> > +       tname = btf_name_by_offset(btf_vmlinux, t->name_off);
> > +       if (!tname) {
> 
> it can be empty, so better: !tname || !tname[0]

Will fix the usages in v2. 

- KP
> 
> > +               verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
> > +               return -EINVAL;
> > +       }
> > +
> 
> [...]

^ permalink raw reply

* Re: [PATCH bpf-next v1 00/13] MAC and Audit policy using eBPF (KRSI)
From: Kees Cook @ 2019-12-30 19:30 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: KP Singh, linux-kernel, bpf, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, James Morris, Thomas Garnier,
	Michael Halcrow, Paul Turner, Brendan Gregg, Jann Horn,
	Matthew Garrett, Christian Brauner, Florent Revest,
	Brendan Jackman, Martin KaFai Lau, Song Liu, Yonghong Song,
	Serge E. Hallyn, Mauro Carvalho Chehab, David S. Miller,
	Greg Kroah-Hartman, Nicolas Ferre, Stanislav Fomichev,
	Quentin Monnet, Andrey Ignatov, Joe Stringer,
	Mickaël Salaün
In-Reply-To: <a6b61f33-82dc-0c1c-7a6c-1926343ef63e@digikod.net>

On Fri, Dec 20, 2019 at 11:46:47PM +0100, Mickaël Salaün wrote:
> I'm working on a version of Landlock without eBPF, but still with the
> initial sought properties: safe unprivileged composability, modularity, and
> dynamic update. I'll send this version soon.
> 
> I hope that the work and experience from Landlock to bring eBPF to LSM will
> continue to be used through KRSI. Landlock will now focus on the
> unprivileged sandboxing part, without eBPF. Stay tuned!

Will it end up looking at all like pledge? I'm still struggling to come
up with a sensible pledge-like design on top of seccomp, especially
given the need to have it very closely tied to the running libc...

-- 
Kees Cook

^ permalink raw reply

* Re: [GIT PULL] tomoyo fixes for 5.5
From: Linus Torvalds @ 2019-12-30 20:14 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: linux-security-module
In-Reply-To: <8483f2c2-626d-382f-3994-ee29daebff75@i-love.sakura.ne.jp>

On Mon, Dec 30, 2019 at 3:32 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> This is my first time for sending pull requests. It seems that most people
> create a tag signed with GPG key but a few people send pull requests on
> master branch without signing with GPG key. Did I follow necessary steps?

I do require the gpg signed tag for non-kernel.org pull requests like this.

I trust the security at kernel.org - it requires 2FA and a gpg key
just to even push to a git repo there at all - but even there I
_prefer_ tags. But outside of kernel.org I absolutely do want to see a
signed tag for a pull request, not just a master branch.

Side note: I don't actually require the pgp key to be something I have
a direct path to, and if you can't get big set of signatures on yours,
that's fine for initial pull requests. The key ends up still being a
kind of identity, and we can work on getting the proper web of trust
built up over time.

           Linus

^ permalink raw reply

* Re: [PATCH v6 07/10] proc: flush task dcache entries from all procfs instances
From: J Freyensee @ 2019-12-30 22:03 UTC (permalink / raw)
  To: Alexey Gladkov, LKML, Kernel Hardening, Linux API, Linux FS Devel,
	Linux Security Module
  Cc: Akinobu Mita, Alexander Viro, Alexey Dobriyan, Andrew Morton,
	Andy Lutomirski, Daniel Micay, Djalal Harouni, Dmitry V . Levin,
	Eric W . Biederman, Greg Kroah-Hartman, Ingo Molnar,
	J . Bruce Fields, Jeff Layton, Jonathan Corbet, Kees Cook,
	Linus Torvalds, Oleg Nesterov, Solar Designer, Stephen Rothwell
In-Reply-To: <20191225125151.1950142-8-gladkov.alexey@gmail.com>

snip

.

.

.

>   
> +#ifdef CONFIG_PROC_FS
> +static inline void pidns_proc_lock(struct pid_namespace *pid_ns)
> +{
> +	down_write(&pid_ns->rw_proc_mounts);
> +}
> +
> +static inline void pidns_proc_unlock(struct pid_namespace *pid_ns)
> +{
> +	up_write(&pid_ns->rw_proc_mounts);
> +}
> +
> +static inline void pidns_proc_lock_shared(struct pid_namespace *pid_ns)
> +{
> +	down_read(&pid_ns->rw_proc_mounts);
> +}
> +
> +static inline void pidns_proc_unlock_shared(struct pid_namespace *pid_ns)
> +{
> +	up_read(&pid_ns->rw_proc_mounts);
> +}
> +#else /* !CONFIG_PROC_FS */
> +
Apologies for my newbie question. I couldn't help but notice all these 
function calls are assuming that the parameter struct pid_namespace 
*pid_ns will never be NULL.  Is that a good assumption?

I don't have the background in this code to answer on my own, but I 
thought I'd raise the question.

Thanks,
Jay


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox