From: ebiederm@xmission.com (Eric W. Biederman)
To: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: daniel@iogearbox.net, peterz@infradead.org,
linux-kernel@vger.kernel.org, acme@kernel.org,
alexander.shishkin@linux.intel.com, mingo@redhat.com,
paulus@samba.org, kernel@kyup.com, rostedt@goodmis.org,
viro@zeniv.linux.org.uk, aravinda@linux.vnet.ibm.com,
ananth@in.ibm.com
Subject: Re: [RFC PATCH v2 3/3] tracefs: add 'newinstance' mount option
Date: Wed, 03 Aug 2016 21:54:57 -0500 [thread overview]
Message-ID: <8760rhdz0e.fsf@x220.int.ebiederm.org> (raw)
In-Reply-To: <146965486994.23765.17493394560604547789.stgit@hbathini.in.ibm.com> (Hari Bathini's message of "Thu, 28 Jul 2016 02:57:49 +0530")
Hari Bathini <hbathini@linux.vnet.ibm.com> writes:
> When tracefs is mounted inside a container, its files are visible to
> all containers. This implies that a user from within a container can
> list/delete uprobes registered elsewhere, leading to security issues
> and/or denial of service (Eg. deleting a probe that is registered from
> elsewhere). This patch addresses this problem by adding mount option
> 'newinstance', allowing containers to have their own instance mounted
> separately. Something like the below from within a container:
newinstance is an anti-pattern in devpts and should not be copied.
To fix some severe defects of devpts we had to always create new
istances and the code and the testing to make that all work was
not pleasant. Please don't add another option that we will just have to
make redundant later.
Eric
> $ mount -o newinstance -t tracefs tracefs /sys/kernel/tracing
> $
> $
> $ perf probe /lib/x86_64-linux-gnu/libc.so.6 malloc
> Added new event:
> probe_libc:malloc (on malloc in /lib/x86_64-linux-gnu/libc.so.6)
>
> You can now use it in all perf tools, such as:
>
> perf record -e probe_libc:malloc -aR sleep 1
>
> $
> $
> $ perf probe --list
> probe_libc:malloc (on __libc_malloc in /lib64/libc.so.6)
> $
>
> while another container/host has a completely different view:
>
>
> $ perf probe --list
> probe_libc:memset (on __libc_memset in /lib64/libc.so.6)
> $
>
> This patch reuses the code that provides support to create new instances
> under tracefs instances directory.
>
> Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
> ---
> fs/tracefs/inode.c | 171 ++++++++++++++++++++++++++++++++++++++---------
> include/linux/tracefs.h | 11 ++-
> kernel/trace/trace.c | 52 ++++++++++----
> 3 files changed, 181 insertions(+), 53 deletions(-)
>
> diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
> index 4a0e48f..2d6acda 100644
> --- a/fs/tracefs/inode.c
> +++ b/fs/tracefs/inode.c
> @@ -51,9 +51,9 @@ static const struct file_operations tracefs_file_operations = {
> };
>
> static struct tracefs_dir_ops {
> - int (*mkdir)(const char *name);
> - int (*rmdir)(const char *name);
> -} tracefs_ops;
> + int (*mkdir)(int instance_type, void *data);
> + int (*rmdir)(int instance_type, void *data);
> +} tracefs_instance_ops;
>
> static char *get_dname(struct dentry *dentry)
> {
> @@ -85,7 +85,7 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umo
> * mkdir routine to handle races.
> */
> inode_unlock(inode);
> - ret = tracefs_ops.mkdir(name);
> + ret = tracefs_instance_ops.mkdir(INSTANCE_DIR, name);
> inode_lock(inode);
>
> kfree(name);
> @@ -112,7 +112,7 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
> inode_unlock(inode);
> inode_unlock(dentry->d_inode);
>
> - ret = tracefs_ops.rmdir(name);
> + ret = tracefs_instance_ops.rmdir(INSTANCE_DIR, name);
>
> inode_lock_nested(inode, I_MUTEX_PARENT);
> inode_lock(dentry->d_inode);
> @@ -142,12 +142,14 @@ struct tracefs_mount_opts {
> kuid_t uid;
> kgid_t gid;
> umode_t mode;
> + int newinstance;
> };
>
> enum {
> Opt_uid,
> Opt_gid,
> Opt_mode,
> + Opt_newinstance,
> Opt_err
> };
>
> @@ -155,14 +157,26 @@ static const match_table_t tokens = {
> {Opt_uid, "uid=%u"},
> {Opt_gid, "gid=%u"},
> {Opt_mode, "mode=%o"},
> + {Opt_newinstance, "newinstance"},
> {Opt_err, NULL}
> };
>
> struct tracefs_fs_info {
> struct tracefs_mount_opts mount_opts;
> + struct super_block *sb;
> };
>
> -static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
> +static inline struct tracefs_fs_info *TRACEFS_SB(struct super_block *sb)
> +{
> + return sb->s_fs_info;
> +}
> +
> +#define PARSE_MOUNT 0
> +#define PARSE_REMOUNT 1
> +
> +static int tracefs_parse_options(char *data,
> + int op,
> + struct tracefs_mount_opts *opts)
> {
> substring_t args[MAX_OPT_ARGS];
> int option;
> @@ -173,6 +187,10 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
>
> opts->mode = TRACEFS_DEFAULT_MODE;
>
> + /* newinstance makes sense only on initial mount */
> + if (op == PARSE_MOUNT)
> + opts->newinstance = 0;
> +
> while ((p = strsep(&data, ",")) != NULL) {
> if (!*p)
> continue;
> @@ -200,6 +218,11 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
> return -EINVAL;
> opts->mode = option & S_IALLUGO;
> break;
> + case Opt_newinstance:
> + /* newinstance makes sense only on initial mount */
> + if (op == PARSE_MOUNT)
> + opts->newinstance = 1;
> + break;
> /*
> * We might like to report bad mount options here;
> * but traditionally tracefs has ignored all mount options
> @@ -231,7 +254,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data)
> struct tracefs_fs_info *fsi = sb->s_fs_info;
>
> sync_filesystem(sb);
> - err = tracefs_parse_options(data, &fsi->mount_opts);
> + err = tracefs_parse_options(data, PARSE_REMOUNT, &fsi->mount_opts);
> if (err)
> goto fail;
>
> @@ -254,6 +277,8 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
> from_kgid_munged(&init_user_ns, opts->gid));
> if (opts->mode != TRACEFS_DEFAULT_MODE)
> seq_printf(m, ",mode=%o", opts->mode);
> + if (opts->newinstance)
> + seq_puts(m, ",newinstance");
>
> return 0;
> }
> @@ -264,53 +289,130 @@ static const struct super_operations tracefs_super_operations = {
> .show_options = tracefs_show_options,
> };
>
> -static int trace_fill_super(struct super_block *sb, void *data, int silent)
> +static void *new_tracefs_fs_info(struct super_block *sb)
> {
> - static struct tree_descr trace_files[] = {{""}};
> struct tracefs_fs_info *fsi;
> - int err;
> -
> - save_mount_options(sb, data);
>
> fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
> - sb->s_fs_info = fsi;
> - if (!fsi) {
> - err = -ENOMEM;
> - goto fail;
> - }
> + if (!fsi)
> + return NULL;
>
> - err = tracefs_parse_options(data, &fsi->mount_opts);
> - if (err)
> + fsi->mount_opts.mode = TRACEFS_DEFAULT_MODE;
> + fsi->sb = sb;
> +
> + return fsi;
> +}
> +
> +static int trace_fill_super(struct super_block *sb, void *data, int silent)
> +{
> + struct inode *inode;
> +
> + sb->s_blocksize = PAGE_SIZE;
> + sb->s_blocksize_bits = PAGE_SHIFT;
> + sb->s_magic = TRACEFS_MAGIC;
> + sb->s_op = &tracefs_super_operations;
> + sb->s_time_gran = 1;
> +
> + sb->s_fs_info = new_tracefs_fs_info(sb);
> + if (!sb->s_fs_info)
> goto fail;
>
> - err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
> - if (err)
> + inode = new_inode(sb);
> + if (!inode)
> goto fail;
> + inode->i_ino = 1;
> + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
> + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
> + inode->i_op = &simple_dir_inode_operations;
> + inode->i_fop = &simple_dir_operations;
> + set_nlink(inode, 2);
>
> - sb->s_op = &tracefs_super_operations;
> + sb->s_root = d_make_root(inode);
> + if (sb->s_root)
> + return 0;
>
> - tracefs_apply_options(sb);
> + pr_err("get root dentry failed\n");
>
> return 0;
>
> fail:
> - kfree(fsi);
> - sb->s_fs_info = NULL;
> - return err;
> + return -ENOMEM;
> +}
> +
> +static int compare_init_tracefs_sb(struct super_block *s, void *p)
> +{
> + if (tracefs_mount)
> + return tracefs_mount->mnt_sb == s;
> + return 0;
> }
>
> static struct dentry *trace_mount(struct file_system_type *fs_type,
> int flags, const char *dev_name,
> void *data)
> {
> - return mount_single(fs_type, flags, data, trace_fill_super);
> + int err;
> + struct tracefs_mount_opts opts;
> + struct super_block *s;
> +
> + err = tracefs_parse_options(data, PARSE_MOUNT, &opts);
> + if (err)
> + return ERR_PTR(err);
> +
> + /* Require newinstance for all user namespace mounts to ensure
> + * the mount options are not changed.
> + */
> + if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
> + return ERR_PTR(-EINVAL);
> +
> + if (opts.newinstance)
> + s = sget(fs_type, NULL, set_anon_super, flags, NULL);
> + else
> + s = sget(fs_type, compare_init_tracefs_sb, set_anon_super,
> + flags, NULL);
> +
> + if (IS_ERR(s))
> + return ERR_CAST(s);
> +
> + if (!s->s_root) {
> + err = trace_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
> + if (err)
> + goto out_undo_sget;
> + s->s_flags |= MS_ACTIVE;
> + }
> +
> + if (opts.newinstance) {
> + err = tracefs_instance_ops.mkdir(INSTANCE_MNT, s->s_root);
> + if (err)
> + goto out_undo_sget;
> + }
> +
> + memcpy(&(TRACEFS_SB(s))->mount_opts, &opts, sizeof(opts));
> +
> + tracefs_apply_options(s);
> +
> + return dget(s->s_root);
> +
> +out_undo_sget:
> + deactivate_locked_super(s);
> + return ERR_PTR(err);
> +}
> +
> +static void trace_kill_sb(struct super_block *sb)
> +{
> + struct tracefs_fs_info *fsi = TRACEFS_SB(sb);
> +
> + if (fsi->mount_opts.newinstance)
> + tracefs_instance_ops.rmdir(INSTANCE_MNT, sb->s_root);
> +
> + kfree(fsi);
> + kill_litter_super(sb);
> }
>
> static struct file_system_type trace_fs_type = {
> .owner = THIS_MODULE,
> .name = "tracefs",
> .mount = trace_mount,
> - .kill_sb = kill_litter_super,
> + .kill_sb = trace_kill_sb,
> };
> MODULE_ALIAS_FS("tracefs");
>
> @@ -480,22 +582,23 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
> *
> * Returns the dentry of the instances directory.
> */
> -struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
> - int (*mkdir)(const char *name),
> - int (*rmdir)(const char *name))
> +struct dentry *
> +tracefs_create_instance_dir(const char *name, struct dentry *parent,
> + int (*mkdir)(int instance_type, void *data),
> + int (*rmdir)(int instance_type, void *data))
> {
> struct dentry *dentry;
>
> /* Only allow one instance of the instances directory. */
> - if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
> + if (WARN_ON(tracefs_instance_ops.mkdir || tracefs_instance_ops.rmdir))
> return NULL;
>
> dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
> if (!dentry)
> return NULL;
>
> - tracefs_ops.mkdir = mkdir;
> - tracefs_ops.rmdir = rmdir;
> + tracefs_instance_ops.mkdir = mkdir;
> + tracefs_instance_ops.rmdir = rmdir;
>
> return dentry;
> }
> diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
> index 5b727a1..30d4e55 100644
> --- a/include/linux/tracefs.h
> +++ b/include/linux/tracefs.h
> @@ -25,6 +25,10 @@ struct file_operations;
>
> #ifdef CONFIG_TRACING
>
> +/* instance types */
> +#define INSTANCE_DIR 0 /* created inside instances dir */
> +#define INSTANCE_MNT 1 /* created with newinstance mount option */
> +
> struct dentry *tracefs_create_file(const char *name, umode_t mode,
> struct dentry *parent, void *data,
> const struct file_operations *fops);
> @@ -34,9 +38,10 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent);
> void tracefs_remove(struct dentry *dentry);
> void tracefs_remove_recursive(struct dentry *dentry);
>
> -struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
> - int (*mkdir)(const char *name),
> - int (*rmdir)(const char *name));
> +struct dentry *
> +tracefs_create_instance_dir(const char *name, struct dentry *parent,
> + int (*mkdir)(int instance_type, void *data),
> + int (*rmdir)(int instance_type, void *data));
>
> bool tracefs_initialized(void);
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 23a8111..a991e9d 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -6782,17 +6782,24 @@ static void update_tracer_options(struct trace_array *tr)
> mutex_unlock(&trace_types_lock);
> }
>
> -static int instance_mkdir(const char *name)
> +static int instance_mkdir(int instance_type, void *data)
> {
> + const char *name = "tracing";
> + struct dentry *mnt_root = NULL;
> struct trace_array *tr;
> int ret;
>
> mutex_lock(&trace_types_lock);
>
> - ret = -EEXIST;
> - list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> - if (tr->name && strcmp(tr->name, name) == 0)
> - goto out_unlock;
> + if (instance_type == INSTANCE_MNT)
> + mnt_root = data;
> + else {
> + name = data;
> + ret = -EEXIST;
> + list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> + if (tr->name && strcmp(tr->name, name) == 0)
> + goto out_unlock;
> + }
> }
>
> ret = -ENOMEM;
> @@ -6823,9 +6830,14 @@ static int instance_mkdir(const char *name)
> if (allocate_trace_buffers(tr, trace_buf_size) < 0)
> goto out_free_tr;
>
> - tr->dir = tracefs_create_dir(name, trace_instance_dir);
> - if (!tr->dir)
> - goto out_free_tr;
> + if (instance_type == INSTANCE_MNT) {
> + mnt_root->d_inode->i_private = tr;
> + tr->dir = mnt_root;
> + } else {
> + tr->dir = tracefs_create_dir(name, trace_instance_dir);
> + if (!tr->dir)
> + goto out_free_tr;
> + }
>
> ret = event_trace_add_tracer(tr->dir, tr);
> if (ret) {
> @@ -6856,8 +6868,10 @@ static int instance_mkdir(const char *name)
>
> }
>
> -static int instance_rmdir(const char *name)
> +static int instance_rmdir(int instance_type, void *data)
> {
> + const char *name = "tracing";
> + struct dentry *mnt_root = NULL;
> struct trace_array *tr;
> int found = 0;
> int ret;
> @@ -6865,15 +6879,21 @@ static int instance_rmdir(const char *name)
>
> mutex_lock(&trace_types_lock);
>
> - ret = -ENODEV;
> - list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> - if (tr->name && strcmp(tr->name, name) == 0) {
> - found = 1;
> - break;
> + if (instance_type == INSTANCE_MNT) {
> + mnt_root = data;
> + tr = mnt_root->d_inode->i_private;
> + } else {
> + name = data;
> + ret = -ENODEV;
> + list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> + if (tr->name && strcmp(tr->name, name) == 0) {
> + found = 1;
> + break;
> + }
> }
> + if (!found)
> + goto out_unlock;
> }
> - if (!found)
> - goto out_unlock;
>
> ret = -EBUSY;
> if (tr->ref || (tr->current_trace && tr->current_trace->ref))
next prev parent reply other threads:[~2016-08-04 3:08 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-07-27 21:27 [RFC PATCH v2 0/3] perf/tracefs: Container-aware tracing support Hari Bathini
2016-07-27 21:27 ` [RFC PATCH v2 1/3] perf: filter container events based on cgroup namespace Hari Bathini
2016-07-27 21:27 ` [RFC PATCH v2 2/3] tracefs: add instances support for uprobe events Hari Bathini
2016-08-01 21:45 ` Steven Rostedt
2016-08-02 17:27 ` Hari Bathini
2016-08-02 17:32 ` Hari Bathini
2016-08-02 17:49 ` Steven Rostedt
2016-08-03 19:30 ` Aravinda Prasad
2016-08-03 20:10 ` Steven Rostedt
2016-08-03 20:16 ` Aravinda Prasad
2016-08-04 1:04 ` Steven Rostedt
2016-08-04 13:46 ` Aravinda Prasad
2016-08-04 14:08 ` Steven Rostedt
2016-08-04 14:34 ` Aravinda Prasad
2016-07-27 21:27 ` [RFC PATCH v2 3/3] tracefs: add 'newinstance' mount option Hari Bathini
2016-08-04 2:54 ` Eric W. Biederman [this message]
2016-08-04 12:26 ` Hari Bathini
2016-08-04 14:12 ` Eric W. Biederman
2016-08-04 2:59 ` [RFC PATCH v2 0/3] perf/tracefs: Container-aware tracing support Eric W. Biederman
2016-08-04 14:48 ` Aravinda Prasad
2016-08-04 18:27 ` Eric W. Biederman
2016-08-04 19:11 ` Aravinda Prasad
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8760rhdz0e.fsf@x220.int.ebiederm.org \
--to=ebiederm@xmission.com \
--cc=acme@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=ananth@in.ibm.com \
--cc=aravinda@linux.vnet.ibm.com \
--cc=daniel@iogearbox.net \
--cc=hbathini@linux.vnet.ibm.com \
--cc=kernel@kyup.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=paulus@samba.org \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox