public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Andrey Vagin <avagin@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	"Serge E. Hallyn" <serge@hallyn.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Ingo Molnar <mingo@kernel.org>, Kees Cook <keescook@chromium.org>,
	Mel Gorman <mgorman@suse.de>, Rik van Riel <riel@redhat.com>
Subject: Re: [PATCH] [RFC] mnt: restrict a number of "struct mnt"
Date: Mon, 17 Jun 2013 12:58:00 -0700	[thread overview]
Message-ID: <878v284iif.fsf@xmission.com> (raw)
In-Reply-To: <1371457498-27241-1-git-send-email-avagin@openvz.org> (Andrey Vagin's message of "Mon, 17 Jun 2013 12:24:58 +0400")

Andrey Vagin <avagin@openvz.org> writes:

> I found that a few processes can eat all host memory and nobody can kill them.
> $ mount -t tmpfs xxx /mnt
> $ mount --make-shared /mnt
> $ for i in `seq 30`; do mount --bind /mnt `mktemp -d /mnt/test.XXXXXX` & done
>
> All this processes are unkillable, because they took i_mutex and waits
> namespace_lock.
>
> ...
> 21715 pts/0    D      0:00          \_ mount --bind /mnt /mnt/test.ht6jzO
> 21716 pts/0    D      0:00          \_ mount --bind /mnt /mnt/test.97K4mI
> 21717 pts/0    R      0:01          \_ mount --bind /mnt /mnt/test.gO2CD9
> ...
>
> Each of this process doubles a number of mounts, so at the end we will
> have about 2^32 mounts and the size of struct mnt is 256 bytes, so we
> need about 1TB of RAM.
>
> Another problem is that “umount” of a big tree is very hard operation
> and it requires a lot of time.
> E.g.:
> 16411
> umount("/tmp/xxx", MNT_DETACH)          = 0 <7.852066> (7.8 sec)
> 32795
> umount("/tmp/xxx", MNT_DETACH)          = 0 <34.485501> ( 34 sec)
>
> For all this time sys_umoun takes namespace_sem and vfsmount_lock...
>
> Due to all this reasons I suggest to restrict a number of mounts.
> Probably we can optimize this code in a future, but now this restriction
> can help.

So for anyone seriously worried about this kind of thing in general we
already have the memory control group, which is quite capable of
limiting this kind of thing, and it limits all memory allocations not
just mount.

Is there some reason we want to go down the path of adding and tuning
static limits all over the kernel?  As opposed to streamlining the memory
control group so it is low overhead and everyone that cares can use it?

Should we reach the point where we automatically enable the memory
control group to prevent abuse of the kernel?

Eric

> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: "Eric W. Biederman" <ebiederm@xmission.com>
> Cc: "Serge E. Hallyn" <serge@hallyn.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Rik van Riel <riel@redhat.com>
> Signed-off-by: Andrey Vagin <avagin@openvz.org>
> ---
>  fs/namespace.c                | 66 +++++++++++++++++++++++++------------------
>  include/linux/mnt_namespace.h |  2 ++
>  kernel/sysctl.c               |  8 ++++++
>  3 files changed, 49 insertions(+), 27 deletions(-)
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 7b1ca9b..d22e54c 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -41,6 +41,9 @@ static struct list_head *mountpoint_hashtable __read_mostly;
>  static struct kmem_cache *mnt_cache __read_mostly;
>  static struct rw_semaphore namespace_sem;
>  
> +unsigned int sysctl_mount_nr __read_mostly = 16384;
> +static atomic_t mount_nr = ATOMIC_INIT(0);
> +
>  /* /sys/fs */
>  struct kobject *fs_kobj;
>  EXPORT_SYMBOL_GPL(fs_kobj);
> @@ -164,43 +167,49 @@ unsigned int mnt_get_count(struct mount *mnt)
>  
>  static struct mount *alloc_vfsmnt(const char *name)
>  {
> -	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
> -	if (mnt) {
> -		int err;
> +	struct mount *mnt;
> +	int err;
>  
> -		err = mnt_alloc_id(mnt);
> -		if (err)
> -			goto out_free_cache;
> +	if (atomic_inc_return(&mount_nr) > sysctl_mount_nr)
> +		goto out_dec_mount_nr;
>  
> -		if (name) {
> -			mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
> -			if (!mnt->mnt_devname)
> -				goto out_free_id;
> -		}
> +	mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
> +	if (!mnt)
> +		goto out_dec_mount_nr;
> +
> +	err = mnt_alloc_id(mnt);
> +	if (err)
> +		goto out_free_cache;
> +
> +	if (name) {
> +		mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
> +		if (!mnt->mnt_devname)
> +			goto out_free_id;
> +	}
>  
>  #ifdef CONFIG_SMP
> -		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
> -		if (!mnt->mnt_pcp)
> -			goto out_free_devname;
> +	mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
> +	if (!mnt->mnt_pcp)
> +		goto out_free_devname;
>  
> -		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
> +	this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
>  #else
> -		mnt->mnt_count = 1;
> -		mnt->mnt_writers = 0;
> +	mnt->mnt_count = 1;
> +	mnt->mnt_writers = 0;
>  #endif
>  
> -		INIT_LIST_HEAD(&mnt->mnt_hash);
> -		INIT_LIST_HEAD(&mnt->mnt_child);
> -		INIT_LIST_HEAD(&mnt->mnt_mounts);
> -		INIT_LIST_HEAD(&mnt->mnt_list);
> -		INIT_LIST_HEAD(&mnt->mnt_expire);
> -		INIT_LIST_HEAD(&mnt->mnt_share);
> -		INIT_LIST_HEAD(&mnt->mnt_slave_list);
> -		INIT_LIST_HEAD(&mnt->mnt_slave);
> +	INIT_LIST_HEAD(&mnt->mnt_hash);
> +	INIT_LIST_HEAD(&mnt->mnt_child);
> +	INIT_LIST_HEAD(&mnt->mnt_mounts);
> +	INIT_LIST_HEAD(&mnt->mnt_list);
> +	INIT_LIST_HEAD(&mnt->mnt_expire);
> +	INIT_LIST_HEAD(&mnt->mnt_share);
> +	INIT_LIST_HEAD(&mnt->mnt_slave_list);
> +	INIT_LIST_HEAD(&mnt->mnt_slave);
>  #ifdef CONFIG_FSNOTIFY
> -		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
> +	INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
>  #endif
> -	}
> +
>  	return mnt;
>  
>  #ifdef CONFIG_SMP
> @@ -211,6 +220,8 @@ out_free_id:
>  	mnt_free_id(mnt);
>  out_free_cache:
>  	kmem_cache_free(mnt_cache, mnt);
> +out_dec_mount_nr:
> +	atomic_dec(&mount_nr);
>  	return NULL;
>  }
>  
> @@ -546,6 +557,7 @@ static void free_vfsmnt(struct mount *mnt)
>  #ifdef CONFIG_SMP
>  	free_percpu(mnt->mnt_pcp);
>  #endif
> +	atomic_dec(&mount_nr);
>  	kmem_cache_free(mnt_cache, mnt);
>  }
>  
> diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
> index 12b2ab5..d8e5ec9 100644
> --- a/include/linux/mnt_namespace.h
> +++ b/include/linux/mnt_namespace.h
> @@ -2,6 +2,8 @@
>  #define _NAMESPACE_H_
>  #ifdef __KERNEL__
>  
> +extern unsigned int sysctl_mount_nr;
> +
>  struct mnt_namespace;
>  struct fs_struct;
>  struct user_namespace;
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 9edcf45..bebfdd7 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -61,6 +61,7 @@
>  #include <linux/kmod.h>
>  #include <linux/capability.h>
>  #include <linux/binfmts.h>
> +#include <linux/mnt_namespace.h>
>  #include <linux/sched/sysctl.h>
>  
>  #include <asm/uaccess.h>
> @@ -1616,6 +1617,13 @@ static struct ctl_table fs_table[] = {
>  		.proc_handler	= &pipe_proc_fn,
>  		.extra1		= &pipe_min_size,
>  	},
> +	{
> +		.procname	= "mount-nr",
> +		.data		= &sysctl_mount_nr,
> +		.maxlen		= sizeof(sysctl_mount_nr),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
>  	{ }
>  };

  reply	other threads:[~2013-06-17 19:58 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-17  8:24 [PATCH] [RFC] mnt: restrict a number of "struct mnt" Andrey Vagin
2013-06-17 19:58 ` Eric W. Biederman [this message]
2013-06-17 22:56   ` Andrew Morton
2013-06-18  6:09     ` Andrew Vagin
2013-06-17 22:56   ` Andrey Wagin
2013-06-19 21:35     ` Andrey Wagin
2013-06-21  1:04       ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=878v284iif.fsf@xmission.com \
    --to=ebiederm@xmission.com \
    --cc=akpm@linux-foundation.org \
    --cc=avagin@openvz.org \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=riel@redhat.com \
    --cc=serge@hallyn.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox