From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrey Vagin Subject: [PATCH] [RFC] mnt: restrict a number of "struct mnt" Date: Mon, 17 Jun 2013 12:24:58 +0400 Message-ID: <1371457498-27241-1-git-send-email-avagin@openvz.org> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, Andrey Vagin , "Eric W. Biederman" , "Serge E. Hallyn" , Andrew Morton , Ingo Molnar , Kees Cook , Mel Gorman , Rik van Riel To: Alexander Viro Return-path: Sender: linux-kernel-owner@vger.kernel.org List-Id: linux-fsdevel.vger.kernel.org I found that a few processes can eat all host memory and nobody can kil= l them. $ mount -t tmpfs xxx /mnt $ mount --make-shared /mnt $ for i in `seq 30`; do mount --bind /mnt `mktemp -d /mnt/test.XXXXXX` = & done All this processes are unkillable, because they took i_mutex and waits namespace_lock. =2E.. 21715 pts/0 =C2=A0=C2=A0=C2=A0D =C2=A0=C2=A0=C2=A0=C2=A0=C2=A00:00 =C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\_ mount --bind /mnt /m= nt/test.ht6jzO 21716 pts/0 =C2=A0=C2=A0=C2=A0D =C2=A0=C2=A0=C2=A0=C2=A0=C2=A00:00 =C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\_ mount --bind /mnt /m= nt/test.97K4mI 21717 pts/0 =C2=A0=C2=A0=C2=A0R =C2=A0=C2=A0=C2=A0=C2=A0=C2=A00:01 =C2=A0= =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\_ mount --bind /mnt /m= nt/test.gO2CD9 =2E.. Each of this process doubles a number of mounts, so at the end we will have about 2^32 mounts and the size of struct mnt is 256 bytes, so we need about 1TB of RAM. Another problem is that =E2=80=9Cumount=E2=80=9D of a big tree is very = hard operation and it requires a lot of time. E.g.: 16411 umount("/tmp/xxx", MNT_DETACH) =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=3D 0 <7.852066> (7.8 sec) 32795 umount("/tmp/xxx", MNT_DETACH) =C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0= =C2=A0=C2=A0=3D 0 <34.485501> ( 34 sec) =46or all this time sys_umoun takes namespace_sem and vfsmount_lock... Due to all this reasons I suggest to restrict a number of mounts. Probably we can optimize this code in a future, but now this restrictio= n can help. Cc: Alexander Viro Cc: "Eric W. Biederman" Cc: "Serge E. Hallyn" Cc: Andrew Morton Cc: Ingo Molnar Cc: Kees Cook Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrey Vagin --- fs/namespace.c | 66 +++++++++++++++++++++++++----------= -------- include/linux/mnt_namespace.h | 2 ++ kernel/sysctl.c | 8 ++++++ 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 7b1ca9b..d22e54c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -41,6 +41,9 @@ static struct list_head *mountpoint_hashtable __read_= mostly; static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; =20 +unsigned int sysctl_mount_nr __read_mostly =3D 16384; +static atomic_t mount_nr =3D ATOMIC_INIT(0); + /* /sys/fs */ struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); @@ -164,43 +167,49 @@ unsigned int mnt_get_count(struct mount *mnt) =20 static struct mount *alloc_vfsmnt(const char *name) { - struct mount *mnt =3D kmem_cache_zalloc(mnt_cache, GFP_KERNEL); - if (mnt) { - int err; + struct mount *mnt; + int err; =20 - err =3D mnt_alloc_id(mnt); - if (err) - goto out_free_cache; + if (atomic_inc_return(&mount_nr) > sysctl_mount_nr) + goto out_dec_mount_nr; =20 - if (name) { - mnt->mnt_devname =3D kstrdup(name, GFP_KERNEL); - if (!mnt->mnt_devname) - goto out_free_id; - } + mnt =3D kmem_cache_zalloc(mnt_cache, GFP_KERNEL); + if (!mnt) + goto out_dec_mount_nr; + + err =3D mnt_alloc_id(mnt); + if (err) + goto out_free_cache; + + if (name) { + mnt->mnt_devname =3D kstrdup(name, GFP_KERNEL); + if (!mnt->mnt_devname) + goto out_free_id; + } =20 #ifdef CONFIG_SMP - mnt->mnt_pcp =3D alloc_percpu(struct mnt_pcp); - if (!mnt->mnt_pcp) - goto out_free_devname; + mnt->mnt_pcp =3D alloc_percpu(struct mnt_pcp); + if (!mnt->mnt_pcp) + goto out_free_devname; =20 - this_cpu_add(mnt->mnt_pcp->mnt_count, 1); + this_cpu_add(mnt->mnt_pcp->mnt_count, 1); #else - mnt->mnt_count =3D 1; - mnt->mnt_writers =3D 0; + mnt->mnt_count =3D 1; + mnt->mnt_writers =3D 0; #endif =20 - INIT_LIST_HEAD(&mnt->mnt_hash); - INIT_LIST_HEAD(&mnt->mnt_child); - INIT_LIST_HEAD(&mnt->mnt_mounts); - INIT_LIST_HEAD(&mnt->mnt_list); - INIT_LIST_HEAD(&mnt->mnt_expire); - INIT_LIST_HEAD(&mnt->mnt_share); - INIT_LIST_HEAD(&mnt->mnt_slave_list); - INIT_LIST_HEAD(&mnt->mnt_slave); + INIT_LIST_HEAD(&mnt->mnt_hash); + INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_mounts); + INIT_LIST_HEAD(&mnt->mnt_list); + INIT_LIST_HEAD(&mnt->mnt_expire); + INIT_LIST_HEAD(&mnt->mnt_share); + INIT_LIST_HEAD(&mnt->mnt_slave_list); + INIT_LIST_HEAD(&mnt->mnt_slave); #ifdef CONFIG_FSNOTIFY - INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); + INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif - } + return mnt; =20 #ifdef CONFIG_SMP @@ -211,6 +220,8 @@ out_free_id: mnt_free_id(mnt); out_free_cache: kmem_cache_free(mnt_cache, mnt); +out_dec_mount_nr: + atomic_dec(&mount_nr); return NULL; } =20 @@ -546,6 +557,7 @@ static void free_vfsmnt(struct mount *mnt) #ifdef CONFIG_SMP free_percpu(mnt->mnt_pcp); #endif + atomic_dec(&mount_nr); kmem_cache_free(mnt_cache, mnt); } =20 diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespac= e.h index 12b2ab5..d8e5ec9 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -2,6 +2,8 @@ #define _NAMESPACE_H_ #ifdef __KERNEL__ =20 +extern unsigned int sysctl_mount_nr; + struct mnt_namespace; struct fs_struct; struct user_namespace; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9edcf45..bebfdd7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -61,6 +61,7 @@ #include #include #include +#include #include =20 #include @@ -1616,6 +1617,13 @@ static struct ctl_table fs_table[] =3D { .proc_handler =3D &pipe_proc_fn, .extra1 =3D &pipe_min_size, }, + { + .procname =3D "mount-nr", + .data =3D &sysctl_mount_nr, + .maxlen =3D sizeof(sysctl_mount_nr), + .mode =3D 0644, + .proc_handler =3D proc_dointvec, + }, { } }; =20 --=20 1.8.1.4