* [kernel-hardening] [RFC v2 1/3] procfs: parse mount options @ 2011-11-19 11:01 ` Vasiliy Kulikov 0 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-19 11:01 UTC (permalink / raw) To: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro Cc: H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds This patch adds support of procfs mount options. Actual mount options are coming in the next patches. Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> --- fs/proc/inode.c | 10 +++++++++ fs/proc/internal.h | 1 + fs/proc/root.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7737c54..9b9f92a 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -7,6 +7,7 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/kernel.h> +#include <linux/pid_namespace.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/stat.h> @@ -17,7 +18,9 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/sysctl.h> +#include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/mount.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -102,12 +105,19 @@ void __init proc_init_inodecache(void) init_once); } +static int proc_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + return 0; +} + static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = proc_evict_inode, .statfs = simple_statfs, + .remount_fs = proc_remount, + .show_options = proc_show_options, }; static void __pde_users_dec(struct proc_dir_entry *pde) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7838e5c..2925775 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde); int proc_fill_super(struct super_block *); struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); +int proc_remount(struct super_block *sb, int *flags, char *data); /* * These are generic /proc routines that use the internal diff --git a/fs/proc/root.c b/fs/proc/root.c index 9a8a2b7..165a0d1 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,6 +18,7 @@ #include <linux/bitops.h> #include <linux/mount.h> #include <linux/pid_namespace.h> +#include <linux/parser.h> #include "internal.h" @@ -36,6 +37,48 @@ static int proc_set_super(struct super_block *sb, void *data) return err; } +enum { + Opt_err, +}; + +static const match_table_t tokens = { + {Opt_err, NULL}, +}; + +static int proc_parse_options(char *options, struct pid_namespace *pid) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + pr_debug("proc: options = %s\n", options); + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + args[0].to = args[0].from = 0; + token = match_token(p, tokens, args); + switch (token) { + default: + pr_err("proc: unrecognized mount option \"%s\" " + "or missing value\n", p); + return 0; + } + } + + return 1; +} + +int proc_remount(struct super_block *sb, int *flags, char *data) +{ + struct pid_namespace *pid = sb->s_fs_info; + return !proc_parse_options(data, pid); +} + static struct dentry *proc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -43,11 +86,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, struct super_block *sb; struct pid_namespace *ns; struct proc_inode *ei; + char *options; - if (flags & MS_KERNMOUNT) + if (flags & MS_KERNMOUNT) { ns = (struct pid_namespace *)data; - else + options = NULL; + } else { ns = current->nsproxy->pid_ns; + options = data; + } sb = sget(fs_type, proc_test_super, proc_set_super, ns); if (IS_ERR(sb)) @@ -55,6 +102,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (!sb->s_root) { sb->s_flags = flags; + if (!proc_parse_options(options, ns)) { + deactivate_locked_super(sb); + return ERR_PTR(-EINVAL); + } err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [RFC v2 1/3] procfs: parse mount options @ 2011-11-19 11:01 ` Vasiliy Kulikov 0 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-19 11:01 UTC (permalink / raw) To: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro Cc: H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds This patch adds support of procfs mount options. Actual mount options are coming in the next patches. Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> --- fs/proc/inode.c | 10 +++++++++ fs/proc/internal.h | 1 + fs/proc/root.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7737c54..9b9f92a 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -7,6 +7,7 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/kernel.h> +#include <linux/pid_namespace.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/stat.h> @@ -17,7 +18,9 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/sysctl.h> +#include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/mount.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -102,12 +105,19 @@ void __init proc_init_inodecache(void) init_once); } +static int proc_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + return 0; +} + static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = proc_evict_inode, .statfs = simple_statfs, + .remount_fs = proc_remount, + .show_options = proc_show_options, }; static void __pde_users_dec(struct proc_dir_entry *pde) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7838e5c..2925775 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde); int proc_fill_super(struct super_block *); struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); +int proc_remount(struct super_block *sb, int *flags, char *data); /* * These are generic /proc routines that use the internal diff --git a/fs/proc/root.c b/fs/proc/root.c index 9a8a2b7..165a0d1 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,6 +18,7 @@ #include <linux/bitops.h> #include <linux/mount.h> #include <linux/pid_namespace.h> +#include <linux/parser.h> #include "internal.h" @@ -36,6 +37,48 @@ static int proc_set_super(struct super_block *sb, void *data) return err; } +enum { + Opt_err, +}; + +static const match_table_t tokens = { + {Opt_err, NULL}, +}; + +static int proc_parse_options(char *options, struct pid_namespace *pid) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + pr_debug("proc: options = %s\n", options); + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + args[0].to = args[0].from = 0; + token = match_token(p, tokens, args); + switch (token) { + default: + pr_err("proc: unrecognized mount option \"%s\" " + "or missing value\n", p); + return 0; + } + } + + return 1; +} + +int proc_remount(struct super_block *sb, int *flags, char *data) +{ + struct pid_namespace *pid = sb->s_fs_info; + return !proc_parse_options(data, pid); +} + static struct dentry *proc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -43,11 +86,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, struct super_block *sb; struct pid_namespace *ns; struct proc_inode *ei; + char *options; - if (flags & MS_KERNMOUNT) + if (flags & MS_KERNMOUNT) { ns = (struct pid_namespace *)data; - else + options = NULL; + } else { ns = current->nsproxy->pid_ns; + options = data; + } sb = sget(fs_type, proc_test_super, proc_set_super, ns); if (IS_ERR(sb)) @@ -55,6 +102,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (!sb->s_root) { sb->s_flags = flags; + if (!proc_parse_options(options, ns)) { + deactivate_locked_super(sb); + return ERR_PTR(-EINVAL); + } err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [kernel-hardening] [RFC v2 2/3] procfs: add hidepid= and gid= mount options 2011-11-19 11:01 ` Vasiliy Kulikov @ 2011-11-19 11:02 ` Vasiliy Kulikov -1 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-19 11:02 UTC (permalink / raw) To: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro Cc: H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds This patch adds support of mount options to restrict access to /proc/PID/ directories. The default backward-compatible "relaxed" behaviour is left untouched. The first mount option is called "hidepid" and its value defines how much info about processes we want to be available for non-owners: hidepid=0 (default) means the old behavior - anybody may read all world-readable /proc/PID/* files. hidepid=1 means users may not access any /proc/<pid>/ directories, but their own. Sensitive files like cmdline, sched*, status are now protected against other users. As permission checking done in proc_pid_permission() and files' permissions are left untouched, programs expecting specific files' modes are not confused. hidepid=2 means hidepid=1 plus all /proc/PID/ will be invisible to other users. It doesn't mean that it hides whether a process exists (it can be learned by other means, e.g. by kill -0 $PID), but it hides process' euid and egid. It compicates intruder's task of gathering info about running processes, whether some daemon runs with elevated privileges, whether another user runs some sensitive program, whether other users run any program at all, etc. gid=XXX defines a group that will be able to gather all processes' info (as in hidepid=0 mode). This group should be used instead of putting nonroot user in sudoers file or something. However, untrusted users (like daemons, etc.) which are not supposed to monitor the tasks in the whole system should not be added to the group. hidepid=1 or higher is designed to restrict access to procfs files, which might reveal some sensitive private information like precise keystrokes timings: http://www.openwall.com/lists/oss-security/2011/11/05/3 hidepid=1/2 doesn't break monitoring userspace tools. ps, top, pgrep, and conky gracefully handle EPERM/ENOENT and behave as if the current user is the only user running processes. pstree shows the process subtree which contains "pstree" process. Note: the patch doesn't deal with setuid/setgid issues of keeping preopened descriptors of procfs files (like https://lkml.org/lkml/2011/2/7/368). We rely on that the leaked information like the scheduling counters of setuid apps doesn't threaten anybody's privacy - only the user started the setuid program may read the counters. Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> --- fs/proc/base.c | 64 ++++++++++++++++++++++++++++++++++++++++- fs/proc/inode.c | 8 +++++ fs/proc/root.c | 19 +++++++++++- include/linux/pid_namespace.h | 2 + 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 2db1bd3..8caf5cb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -627,6 +627,45 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +/* + * May current process learn task's sched/cmdline info (for hide_pid_min=1) + * or euid/egid (for hide_pid_min=2)? + */ +static bool has_pid_permissions(struct pid_namespace *pid, + struct task_struct *task, + int hide_pid_min) +{ + if (pid->hide_pid < hide_pid_min) + return true; + if (in_group_p(pid->pid_gid)) + return true; + return ptrace_may_access(task, PTRACE_MODE_READ); +} + + +static int proc_pid_permission(struct inode *inode, int mask) +{ + struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct task_struct *task = get_proc_task(inode); + + if (!has_pid_permissions(pid, task, 1)) { + if (pid->hide_pid == 2) { + /* + * Let's make getdents(), stat(), and open() + * consistent with each other. If a process + * may not stat() a file, it shouldn't be seen + * in procfs at all. + */ + return -ENOENT; + } + + return -EPERM; + } + return generic_permission(inode, mask); +} + + + static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; @@ -1757,6 +1796,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) struct inode *inode = dentry->d_inode; struct task_struct *task; const struct cred *cred; + struct pid_namespace *pid = dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1765,6 +1805,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = 0; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { + if (!has_pid_permissions(pid, task, 2)) { + rcu_read_unlock(); + /* + * This doesn't prevent learning whether PID exists, + * it only makes getattr() consistent with readdir(). + */ + return -ENOENT; + } if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { cred = __task_cred(task); @@ -2935,6 +2983,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) @@ -3138,6 +3187,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi proc_pid_instantiate, iter.task, NULL); } +static int fake_filldir(void *buf, const char *name, int namelen, + loff_t offset, u64 ino, unsigned d_type) +{ + return 0; +} + /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { @@ -3145,6 +3200,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) struct task_struct *reaper; struct tgid_iter iter; struct pid_namespace *ns; + filldir_t __filldir; if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) goto out_no_task; @@ -3166,8 +3222,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { + if (has_pid_permissions(ns, iter.task, 2)) + __filldir = filldir; + else + __filldir = fake_filldir; + filp->f_pos = iter.tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { put_task_struct(iter.task); goto out; } @@ -3502,6 +3563,7 @@ static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 9b9f92a..6b5d927 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -107,6 +107,14 @@ void __init proc_init_inodecache(void) static int proc_show_options(struct seq_file *seq, struct vfsmount *vfs) { + struct super_block *sb = vfs->mnt_sb; + struct pid_namespace *pid = sb->s_fs_info; + + if (pid->pid_gid) + seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); + if (pid->hide_pid != 0) + seq_printf(seq, ",hidepid=%u", pid->hide_pid); + return 0; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 165a0d1..73af7b2 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -38,10 +38,12 @@ static int proc_set_super(struct super_block *sb, void *data) } enum { - Opt_err, + Opt_gid, Opt_hidepid, Opt_err, }; static const match_table_t tokens = { + {Opt_hidepid, "hidepid=%u"}, + {Opt_gid, "gid=%u"}, {Opt_err, NULL}, }; @@ -49,6 +51,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) { char *p; substring_t args[MAX_OPT_ARGS]; + int option; pr_debug("proc: options = %s\n", options); @@ -63,6 +66,20 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + pid->pid_gid = option; + break; + case Opt_hidepid: + if (match_int(&args[0], &option)) + return 0; + if (option < 0 || option > 2) { + pr_err("proc: hidepid value must be between 0 and 2.\n"); + return 0; + } + pid->hide_pid = option; + break; default: pr_err("proc: unrecognized mount option \"%s\" " "or missing value\n", p); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 38d1032..e7cf666 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -30,6 +30,8 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + gid_t pid_gid; + int hide_pid; }; extern struct pid_namespace init_pid_ns; -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [RFC v2 2/3] procfs: add hidepid= and gid= mount options @ 2011-11-19 11:02 ` Vasiliy Kulikov 0 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-19 11:02 UTC (permalink / raw) To: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro Cc: H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds This patch adds support of mount options to restrict access to /proc/PID/ directories. The default backward-compatible "relaxed" behaviour is left untouched. The first mount option is called "hidepid" and its value defines how much info about processes we want to be available for non-owners: hidepid=0 (default) means the old behavior - anybody may read all world-readable /proc/PID/* files. hidepid=1 means users may not access any /proc/<pid>/ directories, but their own. Sensitive files like cmdline, sched*, status are now protected against other users. As permission checking done in proc_pid_permission() and files' permissions are left untouched, programs expecting specific files' modes are not confused. hidepid=2 means hidepid=1 plus all /proc/PID/ will be invisible to other users. It doesn't mean that it hides whether a process exists (it can be learned by other means, e.g. by kill -0 $PID), but it hides process' euid and egid. It compicates intruder's task of gathering info about running processes, whether some daemon runs with elevated privileges, whether another user runs some sensitive program, whether other users run any program at all, etc. gid=XXX defines a group that will be able to gather all processes' info (as in hidepid=0 mode). This group should be used instead of putting nonroot user in sudoers file or something. However, untrusted users (like daemons, etc.) which are not supposed to monitor the tasks in the whole system should not be added to the group. hidepid=1 or higher is designed to restrict access to procfs files, which might reveal some sensitive private information like precise keystrokes timings: http://www.openwall.com/lists/oss-security/2011/11/05/3 hidepid=1/2 doesn't break monitoring userspace tools. ps, top, pgrep, and conky gracefully handle EPERM/ENOENT and behave as if the current user is the only user running processes. pstree shows the process subtree which contains "pstree" process. Note: the patch doesn't deal with setuid/setgid issues of keeping preopened descriptors of procfs files (like https://lkml.org/lkml/2011/2/7/368). We rely on that the leaked information like the scheduling counters of setuid apps doesn't threaten anybody's privacy - only the user started the setuid program may read the counters. Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> --- fs/proc/base.c | 64 ++++++++++++++++++++++++++++++++++++++++- fs/proc/inode.c | 8 +++++ fs/proc/root.c | 19 +++++++++++- include/linux/pid_namespace.h | 2 + 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 2db1bd3..8caf5cb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -627,6 +627,45 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +/* + * May current process learn task's sched/cmdline info (for hide_pid_min=1) + * or euid/egid (for hide_pid_min=2)? + */ +static bool has_pid_permissions(struct pid_namespace *pid, + struct task_struct *task, + int hide_pid_min) +{ + if (pid->hide_pid < hide_pid_min) + return true; + if (in_group_p(pid->pid_gid)) + return true; + return ptrace_may_access(task, PTRACE_MODE_READ); +} + + +static int proc_pid_permission(struct inode *inode, int mask) +{ + struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct task_struct *task = get_proc_task(inode); + + if (!has_pid_permissions(pid, task, 1)) { + if (pid->hide_pid == 2) { + /* + * Let's make getdents(), stat(), and open() + * consistent with each other. If a process + * may not stat() a file, it shouldn't be seen + * in procfs at all. + */ + return -ENOENT; + } + + return -EPERM; + } + return generic_permission(inode, mask); +} + + + static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; @@ -1757,6 +1796,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) struct inode *inode = dentry->d_inode; struct task_struct *task; const struct cred *cred; + struct pid_namespace *pid = dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1765,6 +1805,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = 0; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { + if (!has_pid_permissions(pid, task, 2)) { + rcu_read_unlock(); + /* + * This doesn't prevent learning whether PID exists, + * it only makes getattr() consistent with readdir(). + */ + return -ENOENT; + } if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { cred = __task_cred(task); @@ -2935,6 +2983,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) @@ -3138,6 +3187,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi proc_pid_instantiate, iter.task, NULL); } +static int fake_filldir(void *buf, const char *name, int namelen, + loff_t offset, u64 ino, unsigned d_type) +{ + return 0; +} + /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { @@ -3145,6 +3200,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) struct task_struct *reaper; struct tgid_iter iter; struct pid_namespace *ns; + filldir_t __filldir; if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) goto out_no_task; @@ -3166,8 +3222,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { + if (has_pid_permissions(ns, iter.task, 2)) + __filldir = filldir; + else + __filldir = fake_filldir; + filp->f_pos = iter.tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { put_task_struct(iter.task); goto out; } @@ -3502,6 +3563,7 @@ static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 9b9f92a..6b5d927 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -107,6 +107,14 @@ void __init proc_init_inodecache(void) static int proc_show_options(struct seq_file *seq, struct vfsmount *vfs) { + struct super_block *sb = vfs->mnt_sb; + struct pid_namespace *pid = sb->s_fs_info; + + if (pid->pid_gid) + seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); + if (pid->hide_pid != 0) + seq_printf(seq, ",hidepid=%u", pid->hide_pid); + return 0; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 165a0d1..73af7b2 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -38,10 +38,12 @@ static int proc_set_super(struct super_block *sb, void *data) } enum { - Opt_err, + Opt_gid, Opt_hidepid, Opt_err, }; static const match_table_t tokens = { + {Opt_hidepid, "hidepid=%u"}, + {Opt_gid, "gid=%u"}, {Opt_err, NULL}, }; @@ -49,6 +51,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) { char *p; substring_t args[MAX_OPT_ARGS]; + int option; pr_debug("proc: options = %s\n", options); @@ -63,6 +66,20 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + pid->pid_gid = option; + break; + case Opt_hidepid: + if (match_int(&args[0], &option)) + return 0; + if (option < 0 || option > 2) { + pr_err("proc: hidepid value must be between 0 and 2.\n"); + return 0; + } + pid->hide_pid = option; + break; default: pr_err("proc: unrecognized mount option \"%s\" " "or missing value\n", p); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 38d1032..e7cf666 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -30,6 +30,8 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + gid_t pid_gid; + int hide_pid; }; extern struct pid_namespace init_pid_ns; -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [kernel-hardening] Re: [RFC v2 2/3] procfs: add hidepid= and gid= mount options 2011-11-19 11:02 ` Vasiliy Kulikov @ 2011-12-05 22:55 ` Hugh Dickins -1 siblings, 0 replies; 14+ messages in thread From: Hugh Dickins @ 2011-12-05 22:55 UTC (permalink / raw) To: Vasiliy Kulikov Cc: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, James Morris, Oleg Nesterov On Sat, 19 Nov 2011, Vasiliy Kulikov wrote: > This patch adds support of mount options to restrict access to > /proc/PID/ directories. The default backward-compatible "relaxed" > behaviour is left untouched. This patch, in 3.2.0-rc3-next-20111202, leaks tasks: watch while :; do ps; grep KernelStack /proc/meminfo; sleep 1; done Hugh ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [RFC v2 2/3] procfs: add hidepid= and gid= mount options @ 2011-12-05 22:55 ` Hugh Dickins 0 siblings, 0 replies; 14+ messages in thread From: Hugh Dickins @ 2011-12-05 22:55 UTC (permalink / raw) To: Vasiliy Kulikov Cc: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, James Morris, Oleg Nesterov On Sat, 19 Nov 2011, Vasiliy Kulikov wrote: > This patch adds support of mount options to restrict access to > /proc/PID/ directories. The default backward-compatible "relaxed" > behaviour is left untouched. This patch, in 3.2.0-rc3-next-20111202, leaks tasks: watch while :; do ps; grep KernelStack /proc/meminfo; sleep 1; done Hugh ^ permalink raw reply [flat|nested] 14+ messages in thread
* [kernel-hardening] Re: [RFC v2 2/3] procfs: add hidepid= and gid= mount options 2011-12-05 22:55 ` Hugh Dickins @ 2011-12-08 19:21 ` Vasiliy Kulikov -1 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-12-08 19:21 UTC (permalink / raw) To: Hugh Dickins Cc: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, James Morris, Oleg Nesterov Hi Hugh, On Mon, Dec 05, 2011 at 14:55 -0800, Hugh Dickins wrote: > On Sat, 19 Nov 2011, Vasiliy Kulikov wrote: > > This patch adds support of mount options to restrict access to > > /proc/PID/ directories. The default backward-compatible "relaxed" > > behaviour is left untouched. > > This patch, in 3.2.0-rc3-next-20111202, leaks tasks: watch > while :; do ps; grep KernelStack /proc/meminfo; sleep 1; done Thank you very much for the report! Unfortunately, I have no time to look at it now, will try to debug the issue these weekends. Thanks, -- Vasiliy Kulikov http://www.openwall.com - bringing security into open computing environments ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [RFC v2 2/3] procfs: add hidepid= and gid= mount options @ 2011-12-08 19:21 ` Vasiliy Kulikov 0 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-12-08 19:21 UTC (permalink / raw) To: Hugh Dickins Cc: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, James Morris, Oleg Nesterov Hi Hugh, On Mon, Dec 05, 2011 at 14:55 -0800, Hugh Dickins wrote: > On Sat, 19 Nov 2011, Vasiliy Kulikov wrote: > > This patch adds support of mount options to restrict access to > > /proc/PID/ directories. The default backward-compatible "relaxed" > > behaviour is left untouched. > > This patch, in 3.2.0-rc3-next-20111202, leaks tasks: watch > while :; do ps; grep KernelStack /proc/meminfo; sleep 1; done Thank you very much for the report! Unfortunately, I have no time to look at it now, will try to debug the issue these weekends. Thanks, -- Vasiliy Kulikov http://www.openwall.com - bringing security into open computing environments ^ permalink raw reply [flat|nested] 14+ messages in thread
* [kernel-hardening] [RFC v2 3/3] procfs: add documentation for procfs mount options 2011-11-19 11:01 ` Vasiliy Kulikov @ 2011-11-19 11:02 ` Vasiliy Kulikov -1 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-19 11:02 UTC (permalink / raw) To: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro, Randy Dunlap Cc: H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds Signed-off-by: Vasiliy Kulikov <seooon@openwall.com> --- Documentation/filesystems/proc.txt | 39 ++++++++++++++++++++++++++++++++++++ 1 files changed, 39 insertions(+), 0 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 0ec91f0..12fee13 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -41,6 +41,8 @@ Table of Contents 3.5 /proc/<pid>/mountinfo - Information about mounts 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm + 4 Configuring procfs + 4.1 Mount options ------------------------------------------------------------------------------ Preface @@ -1542,3 +1544,40 @@ a task to set its own or one of its thread siblings comm value. The comm value is limited in size compared to the cmdline value, so writing anything longer then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated comm value. + + +------------------------------------------------------------------------------ +Configuring procfs +------------------------------------------------------------------------------ + +4.1 Mount options +--------------------- + +The following mount options are supported: + + hidepid= Set /proc/<pid>/ access mode. + gid= Set the group authorized to learn processes information. + +hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories +(default). + +hidepid=1 means users may not access any /proc/<pid>/ directories but their +own. Sensitive files like cmdline, sched*, status are now protected against +other users. This makes it impossible to learn whether any user runs +specific program (given the program doesn't reveal itself by its behaviour). +As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users, +poorly written programs passing sensitive information via program arguments are +now protected against local eavesdroppers. + +hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other +users. It doesn't mean that it hides a fact whether a process with a specific +pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"), +but it hides process' uid and gid, which may be learned by stat()'ing +/proc/<pid>/ otherwise. It greatly complicates an intruder's task of gathering +information about running processes, whether some daemon runs with elevated +privileges, whether other user runs some sensitive program, whether other users +run any program at all, etc. + +gid= defines a group authorized to learn processes information otherwise +prohibited by hidepid=. If you use some daemon like identd which needs to learn +information about processes information, just add identd to this group. -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [RFC v2 3/3] procfs: add documentation for procfs mount options @ 2011-11-19 11:02 ` Vasiliy Kulikov 0 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-19 11:02 UTC (permalink / raw) To: kernel-hardening, Andrew Morton, linux-kernel, Alexey Dobriyan, Al Viro, Randy Dunlap Cc: H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds Signed-off-by: Vasiliy Kulikov <seooon@openwall.com> --- Documentation/filesystems/proc.txt | 39 ++++++++++++++++++++++++++++++++++++ 1 files changed, 39 insertions(+), 0 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 0ec91f0..12fee13 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -41,6 +41,8 @@ Table of Contents 3.5 /proc/<pid>/mountinfo - Information about mounts 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm + 4 Configuring procfs + 4.1 Mount options ------------------------------------------------------------------------------ Preface @@ -1542,3 +1544,40 @@ a task to set its own or one of its thread siblings comm value. The comm value is limited in size compared to the cmdline value, so writing anything longer then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated comm value. + + +------------------------------------------------------------------------------ +Configuring procfs +------------------------------------------------------------------------------ + +4.1 Mount options +--------------------- + +The following mount options are supported: + + hidepid= Set /proc/<pid>/ access mode. + gid= Set the group authorized to learn processes information. + +hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories +(default). + +hidepid=1 means users may not access any /proc/<pid>/ directories but their +own. Sensitive files like cmdline, sched*, status are now protected against +other users. This makes it impossible to learn whether any user runs +specific program (given the program doesn't reveal itself by its behaviour). +As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users, +poorly written programs passing sensitive information via program arguments are +now protected against local eavesdroppers. + +hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other +users. It doesn't mean that it hides a fact whether a process with a specific +pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"), +but it hides process' uid and gid, which may be learned by stat()'ing +/proc/<pid>/ otherwise. It greatly complicates an intruder's task of gathering +information about running processes, whether some daemon runs with elevated +privileges, whether other user runs some sensitive program, whether other users +run any program at all, etc. + +gid= defines a group authorized to learn processes information otherwise +prohibited by hidepid=. If you use some daemon like identd which needs to learn +information about processes information, just add identd to this group. -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [kernel-hardening] Re: [RFC v2 1/3] procfs: parse mount options 2011-11-19 11:01 ` Vasiliy Kulikov @ 2011-11-22 0:34 ` Andrew Morton -1 siblings, 0 replies; 14+ messages in thread From: Andrew Morton @ 2011-11-22 0:34 UTC (permalink / raw) To: Vasiliy Kulikov Cc: kernel-hardening, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds On Sat, 19 Nov 2011 15:01:26 +0400 Vasiliy Kulikov <segooon@gmail.com> wrote: > This patch adds support of procfs mount options. > Actual mount options are coming in the next patches. The patches look sane to me. I assume that `mount -o remount' has been tested and works as expected? I also assume that any file which was opened prior to the remount will remain accessible to any process which has the fd. Is this acceptable from a security/operational POV? ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [RFC v2 1/3] procfs: parse mount options @ 2011-11-22 0:34 ` Andrew Morton 0 siblings, 0 replies; 14+ messages in thread From: Andrew Morton @ 2011-11-22 0:34 UTC (permalink / raw) To: Vasiliy Kulikov Cc: kernel-hardening, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds On Sat, 19 Nov 2011 15:01:26 +0400 Vasiliy Kulikov <segooon@gmail.com> wrote: > This patch adds support of procfs mount options. > Actual mount options are coming in the next patches. The patches look sane to me. I assume that `mount -o remount' has been tested and works as expected? I also assume that any file which was opened prior to the remount will remain accessible to any process which has the fd. Is this acceptable from a security/operational POV? ^ permalink raw reply [flat|nested] 14+ messages in thread
* [kernel-hardening] Re: [RFC v2 1/3] procfs: parse mount options 2011-11-22 0:34 ` Andrew Morton @ 2011-11-22 10:07 ` Vasiliy Kulikov -1 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-22 10:07 UTC (permalink / raw) To: Andrew Morton Cc: kernel-hardening, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds Hi Andrew, On Mon, Nov 21, 2011 at 16:34 -0800, Andrew Morton wrote: > On Sat, 19 Nov 2011 15:01:26 +0400 > Vasiliy Kulikov <segooon@gmail.com> wrote: > > > This patch adds support of procfs mount options. > > Actual mount options are coming in the next patches. > > The patches look sane to me. Thank you for the review! > I assume that `mount -o remount' has been tested and works as expected? Yes. > I also assume that any file which was opened prior to the remount will > remain accessible to any process which has the fd. Is this acceptable > from a security/operational POV? No, currently this "check permissions on open()" is violated at least in getattr(). On each access the full permission checking is done. It is trivial to fix (by moving hide_pid to inode), but does it worth it? I see the scheme is totally constant during the system lifetime, i.e. procfs policy is defined during the boot in boot mount scripts and is not changed during the system lifetime at all. I see it as an ability to define different policies on per-container basis, but not a "change it from time to time" thing. If anybody see the case where it makes sense, I'll move hide_pid to inode. FWIW, in grsecurity it is a CONFIG_ option and there is no such problem at all. :-) Thanks, -- Vasiliy Kulikov http://www.openwall.com - bringing security into open computing environments ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [RFC v2 1/3] procfs: parse mount options @ 2011-11-22 10:07 ` Vasiliy Kulikov 0 siblings, 0 replies; 14+ messages in thread From: Vasiliy Kulikov @ 2011-11-22 10:07 UTC (permalink / raw) To: Andrew Morton Cc: kernel-hardening, linux-kernel, Alexey Dobriyan, Al Viro, H. Peter Anvin, Greg KH, Theodore Tso, Alan Cox, Linus Torvalds Hi Andrew, On Mon, Nov 21, 2011 at 16:34 -0800, Andrew Morton wrote: > On Sat, 19 Nov 2011 15:01:26 +0400 > Vasiliy Kulikov <segooon@gmail.com> wrote: > > > This patch adds support of procfs mount options. > > Actual mount options are coming in the next patches. > > The patches look sane to me. Thank you for the review! > I assume that `mount -o remount' has been tested and works as expected? Yes. > I also assume that any file which was opened prior to the remount will > remain accessible to any process which has the fd. Is this acceptable > from a security/operational POV? No, currently this "check permissions on open()" is violated at least in getattr(). On each access the full permission checking is done. It is trivial to fix (by moving hide_pid to inode), but does it worth it? I see the scheme is totally constant during the system lifetime, i.e. procfs policy is defined during the boot in boot mount scripts and is not changed during the system lifetime at all. I see it as an ability to define different policies on per-container basis, but not a "change it from time to time" thing. If anybody see the case where it makes sense, I'll move hide_pid to inode. FWIW, in grsecurity it is a CONFIG_ option and there is no such problem at all. :-) Thanks, -- Vasiliy Kulikov http://www.openwall.com - bringing security into open computing environments ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2011-12-08 19:24 UTC | newest] Thread overview: 14+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2011-11-19 11:01 [kernel-hardening] [RFC v2 1/3] procfs: parse mount options Vasiliy Kulikov 2011-11-19 11:01 ` Vasiliy Kulikov 2011-11-19 11:02 ` [kernel-hardening] [RFC v2 2/3] procfs: add hidepid= and gid= " Vasiliy Kulikov 2011-11-19 11:02 ` Vasiliy Kulikov 2011-12-05 22:55 ` [kernel-hardening] " Hugh Dickins 2011-12-05 22:55 ` Hugh Dickins 2011-12-08 19:21 ` [kernel-hardening] " Vasiliy Kulikov 2011-12-08 19:21 ` Vasiliy Kulikov 2011-11-19 11:02 ` [kernel-hardening] [RFC v2 3/3] procfs: add documentation for procfs " Vasiliy Kulikov 2011-11-19 11:02 ` Vasiliy Kulikov 2011-11-22 0:34 ` [kernel-hardening] Re: [RFC v2 1/3] procfs: parse " Andrew Morton 2011-11-22 0:34 ` Andrew Morton 2011-11-22 10:07 ` [kernel-hardening] " Vasiliy Kulikov 2011-11-22 10:07 ` Vasiliy Kulikov
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.