public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH resubmit] do_mount: reduce stack consumption
@ 2005-11-04 10:50 Heiko Carstens
  2005-11-04 11:48 ` Al Viro
  2005-11-04 16:48 ` Andrew Morton
  0 siblings, 2 replies; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 10:50 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel; +Cc: Andreas Herrmann

From: Andreas Herrmann <aherrman@de.ibm.com>

This is a resubmit of Andreas' patch that reduces stackframe usage in
do_mount. Problem is that without this patch we get a kernel stack
overflow if we run with 4k stacks (s390 31 bit mode).
See original stack back trace below and Andreas' patch and analysis
here:
http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html

    <4>Call Trace:
    <4>([<000000000014ade6>] buffered_rmqueue+0x36a/0x3b4)
    <4> [<000000000014aed6>] __alloc_pages+0xa6/0x350
    <4> [<000000000014b258>] __get_free_pages+0x38/0x74
    <4> [<000000000014f9fe>] kmem_getpages+0x32/0x140
    <4> [<0000000000150396>] cache_alloc_refill+0x3ae/0x58c
    <4> [<000000000014feb0>] __kmalloc+0xc0/0xe8
    <4> [<00000000002acc64>] zfcp_mempool_alloc+0x24/0x34
    <4> [<00000000001494ea>] mempool_alloc+0x56/0x1e8
    <4> [<00000000002bfb46>] zfcp_fsf_req_create+0x56/0x5d8
    <4> [<00000000002c0cf0>] zfcp_fsf_send_fcp_command_task+0x44/0x5c4
    <4> [<00000000002afbda>] zfcp_scsi_command_async+0x7a/0x1f8
    <4> [<00000000002afe68>] zfcp_scsi_queuecommand+0x44/0x54
    <4> [<000000000024f458>] scsi_dispatch_cmd+0x234/0x3c0
    <4> [<0000000000255fb2>] scsi_request_fn+0x2c6/0x67c
    <4> [<000000000023f3ea>] __generic_unplug_device+0x52/0x60
    <4> [<000000000023d392>] __elv_add_request+0x8a/0x98
    <4> [<0000000000240c5e>] __make_request+0x306/0x62c
    <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
    <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
    <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
    <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
    <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
    <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
    <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
    <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
    <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
    <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
    <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
    <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
    <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
    <4> [<000000000024121a>] submit_bio+0x7a/0x154
    <4> [<00000000001736e6>] submit_bh+0x106/0x184
    <4> [<000000000017514e>] __block_write_full_page+0x23a/0x58c
    <4> [<000000000017558e>] block_write_full_page+0xee/0x108
    <4> [<000000000017a908>] blkdev_writepage+0x24/0x38
    <4> [<00000000001a0f76>] mpage_writepages+0x1da/0x9d0
    <4> [<000000000017a4fa>] generic_writepages+0x22/0x30
    <4> [<000000000014cc1e>] do_writepages+0x36/0x54
    <4> [<0000000000144fe2>] __filemap_fdatawrite+0x5a/0x6c
    <4> [<0000000000145016>] filemap_fdatawrite+0x22/0x30
    <4> [<0000000000171244>] sync_blockdev+0x30/0x5c
    <4> [<00000000001df074>] journal_recover+0x88/0xbc
    <4> [<00000000001e2876>] journal_load+0x52/0xcc
    <4> [<00000000001d37b4>] ext3_fill_super+0x1a48/0x1d38
    <4> [<0000000000179faa>] get_sb_bdev+0x13a/0x208
    <4> [<00000000001d3bd6>] ext3_get_sb+0x22/0x34
    <4> [<000000000017a32e>] do_kern_mount+0x86/0x1f4
    <4> [<000000000019aaf4>] do_mount+0x1c8/0x8e4
    <4> [<000000000019b7ba>] sys_mount+0xce/0x1dc
    <4> [<000000000010f9a6>] sysc_do_restart+0xe/0x12

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---

 fs/namespace.c |   80 ++++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 51 insertions(+), 29 deletions(-)

diff -urpN linux-2.6/fs/namespace.c linux-2.6-patched/fs/namespace.c
--- linux-2.6/fs/namespace.c	2005-10-28 02:02:08.000000000 +0200
+++ linux-2.6-patched/fs/namespace.c	2005-11-03 13:39:30.000000000 +0100
@@ -619,25 +619,31 @@ out_unlock:
  */
 static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 {
-	struct nameidata old_nd;
+	struct nameidata *old_nd;
 	struct vfsmount *mnt = NULL;
 	int err = mount_is_safe(nd);
 	if (err)
 		return err;
 	if (!old_name || !*old_name)
 		return -EINVAL;
-	err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
-	if (err)
+
+	old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
+	if (!old_nd)
+		return -ENOMEM;
+	err = path_lookup(old_name, LOOKUP_FOLLOW, old_nd);
+	if (err) {
+		kfree(old_nd);
 		return err;
+	}
 
 	down_write(&current->namespace->sem);
 	err = -EINVAL;
-	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
+	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd->mnt))) {
 		err = -ENOMEM;
 		if (recurse)
-			mnt = copy_tree(old_nd.mnt, old_nd.dentry);
+			mnt = copy_tree(old_nd->mnt, old_nd->dentry);
 		else
-			mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+			mnt = clone_mnt(old_nd->mnt, old_nd->dentry);
 	}
 
 	if (mnt) {
@@ -656,7 +662,8 @@ static int do_loopback(struct nameidata 
 	}
 
 	up_write(&current->namespace->sem);
-	path_release(&old_nd);
+	path_release(old_nd);
+	kfree(old_nd);
 	return err;
 }
 
@@ -693,22 +700,29 @@ static int do_remount(struct nameidata *
 
 static int do_move_mount(struct nameidata *nd, char *old_name)
 {
-	struct nameidata old_nd, parent_nd;
+	struct {
+		struct nameidata old_nd, parent_nd;
+	} *loc;
 	struct vfsmount *p;
 	int err = 0;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
-	err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
-	if (err)
+	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
+	if (!loc)
+		return -ENOMEM;
+	err = path_lookup(old_name, LOOKUP_FOLLOW, &loc->old_nd);
+	if (err) {
+		kfree (loc);
 		return err;
+	}
 
 	down_write(&current->namespace->sem);
 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+	if (!check_mnt(nd->mnt) || !check_mnt(loc->old_nd.mnt))
 		goto out;
 
 	err = -ENOENT;
@@ -721,28 +735,28 @@ static int do_move_mount(struct nameidat
 		goto out2;
 
 	err = -EINVAL;
-	if (old_nd.dentry != old_nd.mnt->mnt_root)
+	if (loc->old_nd.dentry != loc->old_nd.mnt->mnt_root)
 		goto out2;
 
-	if (old_nd.mnt == old_nd.mnt->mnt_parent)
+	if (loc->old_nd.mnt == loc->old_nd.mnt->mnt_parent)
 		goto out2;
 
 	if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
-	      S_ISDIR(old_nd.dentry->d_inode->i_mode))
+	      S_ISDIR(loc->old_nd.dentry->d_inode->i_mode))
 		goto out2;
 
 	err = -ELOOP;
 	for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent)
-		if (p == old_nd.mnt)
+		if (p == loc->old_nd.mnt)
 			goto out2;
 	err = 0;
 
-	detach_mnt(old_nd.mnt, &parent_nd);
-	attach_mnt(old_nd.mnt, nd);
+	detach_mnt(loc->old_nd.mnt, &loc->parent_nd);
+	attach_mnt(loc->old_nd.mnt, nd);
 
 	/* if the mount is moved, it should no longer be expire
 	 * automatically */
-	list_del_init(&old_nd.mnt->mnt_expire);
+	list_del_init(&loc->old_nd.mnt->mnt_expire);
 out2:
 	spin_unlock(&vfsmount_lock);
 out1:
@@ -750,8 +764,9 @@ out1:
 out:
 	up_write(&current->namespace->sem);
 	if (!err)
-		path_release(&parent_nd);
-	path_release(&old_nd);
+		path_release(&loc->parent_nd);
+	path_release(&loc->old_nd);
+	kfree(loc);
 	return err;
 }
 
@@ -1014,7 +1029,7 @@ int copy_mount_options(const void __user
 long do_mount(char * dev_name, char * dir_name, char *type_page,
 		  unsigned long flags, void *data_page)
 {
-	struct nameidata nd;
+	struct nameidata *nd;
 	int retval = 0;
 	int mnt_flags = 0;
 
@@ -1041,27 +1056,34 @@ long do_mount(char * dev_name, char * di
 		mnt_flags |= MNT_NOEXEC;
 	flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
 
+	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+	if (!nd)
+		return -ENOMEM;
+
 	/* ... and get the mountpoint */
-	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
-	if (retval)
+	retval = path_lookup(dir_name, LOOKUP_FOLLOW, nd);
+	if (retval) {
+		kfree(nd);
 		return retval;
+	}
 
-	retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
+	retval = security_sb_mount(dev_name, nd, type_page, flags, data_page);
 	if (retval)
 		goto dput_out;
 
 	if (flags & MS_REMOUNT)
-		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+		retval = do_remount(nd, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
-		retval = do_loopback(&nd, dev_name, flags & MS_REC);
+		retval = do_loopback(nd, dev_name, flags & MS_REC);
 	else if (flags & MS_MOVE)
-		retval = do_move_mount(&nd, dev_name);
+		retval = do_move_mount(nd, dev_name);
 	else
-		retval = do_new_mount(&nd, type_page, flags, mnt_flags,
+		retval = do_new_mount(nd, type_page, flags, mnt_flags,
 				      dev_name, data_page);
 dput_out:
-	path_release(&nd);
+	path_release(nd);
+	kfree(nd);
 	return retval;
 }
 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 10:50 Heiko Carstens
@ 2005-11-04 11:48 ` Al Viro
  2005-11-04 12:57   ` Heiko Carstens
  2005-11-04 16:48 ` Andrew Morton
  1 sibling, 1 reply; 16+ messages in thread
From: Al Viro @ 2005-11-04 11:48 UTC (permalink / raw)
  To: Heiko Carstens; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann

On Fri, Nov 04, 2005 at 11:50:26AM +0100, Heiko Carstens wrote:
> From: Andreas Herrmann <aherrman@de.ibm.com>
> 
> This is a resubmit of Andreas' patch that reduces stackframe usage in
> do_mount. Problem is that without this patch we get a kernel stack
> overflow if we run with 4k stacks (s390 31 bit mode).
> See original stack back trace below and Andreas' patch and analysis
> here:
> http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html

NAK.  Rationale: too ugly.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 11:48 ` Al Viro
@ 2005-11-04 12:57   ` Heiko Carstens
  2005-11-04 14:06     ` Al Viro
  0 siblings, 1 reply; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 12:57 UTC (permalink / raw)
  To: Al Viro; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann

> > This is a resubmit of Andreas' patch that reduces stackframe usage in
> > do_mount. Problem is that without this patch we get a kernel stack
> > overflow if we run with 4k stacks (s390 31 bit mode).
> > See original stack back trace below and Andreas' patch and analysis
> > here:
> > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
> 
> NAK.  Rationale: too ugly.

Ok, since I can only guess what you don't like: here is an updated patch
that probably addresses a few things.
If you don't like this one too, could you please explain what should be
changed?

Thanks,
Heiko
---

 fs/namespace.c |   79 ++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 29 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 2fa9fdf..c14fbfc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -619,25 +619,30 @@ out_unlock:
  */
 static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 {
-	struct nameidata old_nd;
+	struct nameidata *old_nd;
 	struct vfsmount *mnt = NULL;
 	int err = mount_is_safe(nd);
 	if (err)
 		return err;
 	if (!old_name || !*old_name)
 		return -EINVAL;
-	err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+
+	old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
+	if (!old_nd)
+		return -ENOMEM;
+
+	err = path_lookup(old_name, LOOKUP_FOLLOW, old_nd);
 	if (err)
-		return err;
+		goto out;
 
 	down_write(&current->namespace->sem);
 	err = -EINVAL;
-	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
+	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd->mnt))) {
 		err = -ENOMEM;
 		if (recurse)
-			mnt = copy_tree(old_nd.mnt, old_nd.dentry);
+			mnt = copy_tree(old_nd->mnt, old_nd->dentry);
 		else
-			mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+			mnt = clone_mnt(old_nd->mnt, old_nd->dentry);
 	}
 
 	if (mnt) {
@@ -656,7 +661,9 @@ static int do_loopback(struct nameidata 
 	}
 
 	up_write(&current->namespace->sem);
-	path_release(&old_nd);
+	path_release(old_nd);
+out:
+	kfree(old_nd);
 	return err;
 }
 
@@ -693,22 +700,28 @@ static int do_remount(struct nameidata *
 
 static int do_move_mount(struct nameidata *nd, char *old_name)
 {
-	struct nameidata old_nd, parent_nd;
+	struct {
+		struct nameidata old_nd, parent_nd;
+	} *loc;
 	struct vfsmount *p;
 	int err = 0;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
-	err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
+	if (!loc)
+		return -ENOMEM;
+
+	err = path_lookup(old_name, LOOKUP_FOLLOW, &loc->old_nd);
 	if (err)
-		return err;
+		goto path_out;
 
 	down_write(&current->namespace->sem);
 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+	if (!check_mnt(nd->mnt) || !check_mnt(loc->old_nd.mnt))
 		goto out;
 
 	err = -ENOENT;
@@ -721,28 +734,28 @@ static int do_move_mount(struct nameidat
 		goto out2;
 
 	err = -EINVAL;
-	if (old_nd.dentry != old_nd.mnt->mnt_root)
+	if (loc->old_nd.dentry != loc->old_nd.mnt->mnt_root)
 		goto out2;
 
-	if (old_nd.mnt == old_nd.mnt->mnt_parent)
+	if (loc->old_nd.mnt == loc->old_nd.mnt->mnt_parent)
 		goto out2;
 
 	if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
-	      S_ISDIR(old_nd.dentry->d_inode->i_mode))
+	      S_ISDIR(loc->old_nd.dentry->d_inode->i_mode))
 		goto out2;
 
 	err = -ELOOP;
 	for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent)
-		if (p == old_nd.mnt)
+		if (p == loc->old_nd.mnt)
 			goto out2;
 	err = 0;
 
-	detach_mnt(old_nd.mnt, &parent_nd);
-	attach_mnt(old_nd.mnt, nd);
+	detach_mnt(loc->old_nd.mnt, &loc->parent_nd);
+	attach_mnt(loc->old_nd.mnt, nd);
 
 	/* if the mount is moved, it should no longer be expire
 	 * automatically */
-	list_del_init(&old_nd.mnt->mnt_expire);
+	list_del_init(&loc->old_nd.mnt->mnt_expire);
 out2:
 	spin_unlock(&vfsmount_lock);
 out1:
@@ -750,8 +763,10 @@ out1:
 out:
 	up_write(&current->namespace->sem);
 	if (!err)
-		path_release(&parent_nd);
-	path_release(&old_nd);
+		path_release(&loc->parent_nd);
+	path_release(&loc->old_nd);
+path_out:
+	kfree(loc);
 	return err;
 }
 
@@ -1014,7 +1029,7 @@ int copy_mount_options(const void __user
 long do_mount(char * dev_name, char * dir_name, char *type_page,
 		  unsigned long flags, void *data_page)
 {
-	struct nameidata nd;
+	struct nameidata *nd;
 	int retval = 0;
 	int mnt_flags = 0;
 
@@ -1041,27 +1056,33 @@ long do_mount(char * dev_name, char * di
 		mnt_flags |= MNT_NOEXEC;
 	flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
 
+	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+	if (!nd)
+		return -ENOMEM;
+
 	/* ... and get the mountpoint */
-	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
+	retval = path_lookup(dir_name, LOOKUP_FOLLOW, nd);
 	if (retval)
-		return retval;
+		goto path_out;
 
-	retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
+	retval = security_sb_mount(dev_name, nd, type_page, flags, data_page);
 	if (retval)
 		goto dput_out;
 
 	if (flags & MS_REMOUNT)
-		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+		retval = do_remount(nd, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
-		retval = do_loopback(&nd, dev_name, flags & MS_REC);
+		retval = do_loopback(nd, dev_name, flags & MS_REC);
 	else if (flags & MS_MOVE)
-		retval = do_move_mount(&nd, dev_name);
+		retval = do_move_mount(nd, dev_name);
 	else
-		retval = do_new_mount(&nd, type_page, flags, mnt_flags,
+		retval = do_new_mount(nd, type_page, flags, mnt_flags,
 				      dev_name, data_page);
 dput_out:
-	path_release(&nd);
+	path_release(nd);
+path_out:
+	kfree(nd);
 	return retval;
 }
 

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 12:57   ` Heiko Carstens
@ 2005-11-04 14:06     ` Al Viro
  2005-11-04 22:03       ` Heiko Carstens
  0 siblings, 1 reply; 16+ messages in thread
From: Al Viro @ 2005-11-04 14:06 UTC (permalink / raw)
  To: Heiko Carstens; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann

On Fri, Nov 04, 2005 at 01:57:05PM +0100, Heiko Carstens wrote:
> > > This is a resubmit of Andreas' patch that reduces stackframe usage in
> > > do_mount. Problem is that without this patch we get a kernel stack
> > > overflow if we run with 4k stacks (s390 31 bit mode).
> > > See original stack back trace below and Andreas' patch and analysis
> > > here:
> > > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
> > 
> > NAK.  Rationale: too ugly.
> 
> Ok, since I can only guess what you don't like: here is an updated patch
> that probably addresses a few things.
> If you don't like this one too, could you please explain what should be
> changed?

Depth analysis.  E.g. do_move_mount() change is simply nonsense - _this_
is not going to overflow, no matter what.  And do_add_mount() change
is also very suspicious - looks like you are attacking the wrong place
in call chain.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
@ 2005-11-04 15:53 Andreas Herrmann
  2005-11-04 17:42 ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Andreas Herrmann @ 2005-11-04 15:53 UTC (permalink / raw)
  To: Al Viro; +Cc: Andrew Morton, heicars2, linux-kernel

On 04.11.2005 15:06 Al Viro wrote:
> On Fri, Nov 04, 2005 at 01:57:05PM +0100, Heiko Carstens wrote:
> > Ok, since I can only guess what you don't like: here is an updated 
patch
> > that probably addresses a few things.
> > If you don't like this one too, could you please explain what should 
be
> > changed?

> Depth analysis.  E.g. do_move_mount() change is simply nonsense - _this_
> is not going to overflow, no matter what.  And do_add_mount() change
> is also very suspicious - looks like you are attacking the wrong place
> in call chain.

Obviously you missed the point that (depending on the compiler version,
options etc.) do_move_mount() and do_add_mount() can be inlined.
Hence stack frame size of do_mount is directly influenced by those two
functions. At least when I performed my "skin-deep" analysis of this
kernel stack overflow both functions were inlined in do_mount().

Stack consumptions of all other functions occurring in the backtrace
were moderate or due to the amount of inlined functions.

Just do_mount and its inlined friends use to put larger structures on
the stack that better should be allocated. There would not be any
performance impact for the user to allocate the structs.


Regards,

Andreas

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 10:50 Heiko Carstens
  2005-11-04 11:48 ` Al Viro
@ 2005-11-04 16:48 ` Andrew Morton
  2005-11-04 21:27   ` Heiko Carstens
  1 sibling, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-04 16:48 UTC (permalink / raw)
  To: Heiko Carstens; +Cc: linux-kernel, aherrman

Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
>
> From: Andreas Herrmann <aherrman@de.ibm.com>
> 
> This is a resubmit of Andreas' patch that reduces stackframe usage in
> do_mount. Problem is that without this patch we get a kernel stack
> overflow if we run with 4k stacks (s390 31 bit mode).
> See original stack back trace below and Andreas' patch and analysis
> here:
> http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
> 
>     <4>Call Trace:
>     <4>([<000000000014ade6>] buffered_rmqueue+0x36a/0x3b4)
>     <4> [<000000000014aed6>] __alloc_pages+0xa6/0x350
>     <4> [<000000000014b258>] __get_free_pages+0x38/0x74
>     <4> [<000000000014f9fe>] kmem_getpages+0x32/0x140
>     <4> [<0000000000150396>] cache_alloc_refill+0x3ae/0x58c
>     <4> [<000000000014feb0>] __kmalloc+0xc0/0xe8
>     <4> [<00000000002acc64>] zfcp_mempool_alloc+0x24/0x34
>     <4> [<00000000001494ea>] mempool_alloc+0x56/0x1e8
>     <4> [<00000000002bfb46>] zfcp_fsf_req_create+0x56/0x5d8
>     <4> [<00000000002c0cf0>] zfcp_fsf_send_fcp_command_task+0x44/0x5c4
>     <4> [<00000000002afbda>] zfcp_scsi_command_async+0x7a/0x1f8
>     <4> [<00000000002afe68>] zfcp_scsi_queuecommand+0x44/0x54
>     <4> [<000000000024f458>] scsi_dispatch_cmd+0x234/0x3c0
>     <4> [<0000000000255fb2>] scsi_request_fn+0x2c6/0x67c
>     <4> [<000000000023f3ea>] __generic_unplug_device+0x52/0x60
>     <4> [<000000000023d392>] __elv_add_request+0x8a/0x98
>     <4> [<0000000000240c5e>] __make_request+0x306/0x62c
>     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
>     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
>     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
>     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
>     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
>     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
>     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
>     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
>     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
>     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
>     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
>     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
>     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
>     <4> [<000000000024121a>] submit_bio+0x7a/0x154
>     <4> [<00000000001736e6>] submit_bh+0x106/0x184
>     <4> [<000000000017514e>] __block_write_full_page+0x23a/0x58c
>     <4> [<000000000017558e>] block_write_full_page+0xee/0x108
>     <4> [<000000000017a908>] blkdev_writepage+0x24/0x38
>     <4> [<00000000001a0f76>] mpage_writepages+0x1da/0x9d0
>     <4> [<000000000017a4fa>] generic_writepages+0x22/0x30
>     <4> [<000000000014cc1e>] do_writepages+0x36/0x54
>     <4> [<0000000000144fe2>] __filemap_fdatawrite+0x5a/0x6c
>     <4> [<0000000000145016>] filemap_fdatawrite+0x22/0x30
>     <4> [<0000000000171244>] sync_blockdev+0x30/0x5c
>     <4> [<00000000001df074>] journal_recover+0x88/0xbc
>     <4> [<00000000001e2876>] journal_load+0x52/0xcc
>     <4> [<00000000001d37b4>] ext3_fill_super+0x1a48/0x1d38
>     <4> [<0000000000179faa>] get_sb_bdev+0x13a/0x208
>     <4> [<00000000001d3bd6>] ext3_get_sb+0x22/0x34
>     <4> [<000000000017a32e>] do_kern_mount+0x86/0x1f4
>     <4> [<000000000019aaf4>] do_mount+0x1c8/0x8e4
>     <4> [<000000000019b7ba>] sys_mount+0xce/0x1dc
>     <4> [<000000000010f9a6>] sysc_do_restart+0xe/0x12

I'd call that a device mapper bug.  If you were to increase the stacking
from 4-deep to 5-deep, it will crash the kernel, patched or not.

I'm (very) surprised that DM doesn't perform its stacking iteratively.

Eliminating the 36-byte `struct clone_info' in __split_bio() would be most
effective here.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 15:53 [PATCH resubmit] do_mount: reduce stack consumption Andreas Herrmann
@ 2005-11-04 17:42 ` Andrew Morton
  2005-11-04 17:45   ` Arjan van de Ven
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-04 17:42 UTC (permalink / raw)
  To: Andreas Herrmann; +Cc: viro, heicars2, linux-kernel, Arjan van de Ven

Andreas Herrmann <AHERRMAN@de.ibm.com> wrote:
>
> Obviously you missed the point that (depending on the compiler version,
>  options etc.) do_move_mount() and do_add_mount() can be inlined.

I think we found a way of preventing the 3.x compiler from doing that.  Arjan,
do you recall where we ended up with that problem?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 17:42 ` Andrew Morton
@ 2005-11-04 17:45   ` Arjan van de Ven
  2005-11-04 18:37     ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Arjan van de Ven @ 2005-11-04 17:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Andreas Herrmann, viro, heicars2, linux-kernel

On Fri, Nov 04, 2005 at 09:42:12AM -0800, Andrew Morton wrote:
> Andreas Herrmann <AHERRMAN@de.ibm.com> wrote:
> >
> > Obviously you missed the point that (depending on the compiler version,
> >  options etc.) do_move_mount() and do_add_mount() can be inlined.
> 
> I think we found a way of preventing the 3.x compiler from doing that.  Arjan,
> do you recall where we ended up with that problem?

it was mostly caused by -funit-at-a-time that caused 3.x to go haywire. You
need to turn that off in general.

the real fix is in gcc 4.x (4.1 at least) where gcc got a lot smarter about
stack slot lifetimes

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 17:45   ` Arjan van de Ven
@ 2005-11-04 18:37     ` Andrew Morton
  0 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2005-11-04 18:37 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: AHERRMAN, viro, heicars2, linux-kernel

Arjan van de Ven <arjanv@redhat.com> wrote:
>
> On Fri, Nov 04, 2005 at 09:42:12AM -0800, Andrew Morton wrote:
>  > Andreas Herrmann <AHERRMAN@de.ibm.com> wrote:
>  > >
>  > > Obviously you missed the point that (depending on the compiler version,
>  > >  options etc.) do_move_mount() and do_add_mount() can be inlined.
>  > 
>  > I think we found a way of preventing the 3.x compiler from doing that.  Arjan,
>  > do you recall where we ended up with that problem?
> 
>  it was mostly caused by -funit-at-a-time that caused 3.x to go haywire. You
>  need to turn that off in general.

In that case, does s390 still have a problem that we need to be solving here?

(s390 doesn't implement CONFIG_DEBUG_STACK_USAGE.  Suggest you do so, then
stick a dump_stack() into do_exit()).

(dm is still doing very risky things though)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 16:48 ` Andrew Morton
@ 2005-11-04 21:27   ` Heiko Carstens
  2005-11-04 23:55     ` Adrian Bunk
  0 siblings, 1 reply; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 21:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, aherrman

> > See original stack back trace below and Andreas' patch and analysis
> > here:
> > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html

I probably should add that with "original" stack back trace a trace of
a 2.6.10 kernel was meant, if that wasn't clear, but the DM code is
still the same in 2.6.14.

> >     <4>Call Trace:
...
> >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
...

This part of the call trace is actually good for >1500 bytes of stack
usage and is what kills us and should be fixed.
I'm surprised that there are no other bug reports regarding DM and
stack overflow with 4k stacks.
 
> I'd call that a device mapper bug.  If you were to increase the stacking
> from 4-deep to 5-deep, it will crash the kernel, patched or not.

Yes, you're right. Just forget the do_mount() patch. It's the wrong approach.

Heiko

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 14:06     ` Al Viro
@ 2005-11-04 22:03       ` Heiko Carstens
  0 siblings, 0 replies; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 22:03 UTC (permalink / raw)
  To: Al Viro; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann

> > If you don't like this one too, could you please explain what should be
> > changed?
> 
> Depth analysis.  E.g. do_move_mount() change is simply nonsense - _this_
> is not going to overflow, no matter what.  And do_add_mount() change
> is also very suspicious - looks like you are attacking the wrong place
> in call chain.

You're right (see also my other reply to Andrew).

Thanks,
Heiko


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 21:27   ` Heiko Carstens
@ 2005-11-04 23:55     ` Adrian Bunk
  2005-11-05  0:08       ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Adrian Bunk @ 2005-11-04 23:55 UTC (permalink / raw)
  To: Heiko Carstens; +Cc: Andrew Morton, linux-kernel, aherrman

On Fri, Nov 04, 2005 at 10:27:42PM +0100, Heiko Carstens wrote:
> > > See original stack back trace below and Andreas' patch and analysis
> > > here:
> > > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
> 
> I probably should add that with "original" stack back trace a trace of
> a 2.6.10 kernel was meant, if that wasn't clear, but the DM code is
> still the same in 2.6.14.
> 
> > >     <4>Call Trace:
> ...
> > >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> ...
> 
> This part of the call trace is actually good for >1500 bytes of stack
> usage and is what kills us and should be fixed.
> I'm surprised that there are no other bug reports regarding DM and
> stack overflow with 4k stacks.
>...

There were some reports of dm+xfs overflows with 4k stacks on i386.

The xfs side was sorted out, but I son't know the state of the dm part.

> Heiko

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-04 23:55     ` Adrian Bunk
@ 2005-11-05  0:08       ` Andrew Morton
  2005-11-05  1:15         ` cplk
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-05  0:08 UTC (permalink / raw)
  To: Adrian Bunk; +Cc: heiko.carstens, linux-kernel, aherrman, dm-devel

Adrian Bunk <bunk@stusta.de> wrote:
>
> On Fri, Nov 04, 2005 at 10:27:42PM +0100, Heiko Carstens wrote:
> > > > See original stack back trace below and Andreas' patch and analysis
> > > > here:
> > > > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
> > 
> > I probably should add that with "original" stack back trace a trace of
> > a 2.6.10 kernel was meant, if that wasn't clear, but the DM code is
> > still the same in 2.6.14.
> > 
> > > >     <4>Call Trace:
> > ...
> > > >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > > >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > > >     <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > >     <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > >     <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > >     <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > ...
> > 
> > This part of the call trace is actually good for >1500 bytes of stack
> > usage and is what kills us and should be fixed.
> > I'm surprised that there are no other bug reports regarding DM and
> > stack overflow with 4k stacks.
> >...
> 
> There were some reports of dm+xfs overflows with 4k stacks on i386.
> 
> The xfs side was sorted out, but I son't know the state of the dm part.
> 

The state is Very Bad, IMO.  At the very least, DM should struggle to the
utmost to reduce the stack utilisation around that recursion point.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-05  0:08       ` Andrew Morton
@ 2005-11-05  1:15         ` cplk
  2005-11-05  1:37           ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: cplk @ 2005-11-05  1:15 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Adrian Bunk, heiko.carstens, linux-kernel, aherrman, dm-devel

> > > This part of the call trace is actually good for >1500 bytes of stack
> > > usage and is what kills us and should be fixed.
> > > I'm surprised that there are no other bug reports regarding DM and
> > > stack overflow with 4k stacks.
> > >...
> > 
> > There were some reports of dm+xfs overflows with 4k stacks on i386.
> > 
> > The xfs side was sorted out, but I son't know the state of the dm part.
> > 
> 
> The state is Very Bad, IMO.  At the very least, DM should struggle to the
> utmost to reduce the stack utilisation around that recursion point.

Neil Brown suggested a change to generic_make_request to convert recursion 
through it into iteration (see http://lkml.org/lkml/2005/9/2/34 ), but 
there was no discussion at the time.  Would this help with this case?

Chris

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-05  1:15         ` cplk
@ 2005-11-05  1:37           ` Andrew Morton
  2005-11-05  5:37             ` Neil Brown
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-05  1:37 UTC (permalink / raw)
  To: cplk, Neil Brown; +Cc: bunk, heiko.carstens, linux-kernel, aherrman, dm-devel

cplk@itee.uq.edu.au wrote:
>
> > > > This part of the call trace is actually good for >1500 bytes of stack
> > > > usage and is what kills us and should be fixed.
> > > > I'm surprised that there are no other bug reports regarding DM and
> > > > stack overflow with 4k stacks.
> > > >...
> > > 
> > > There were some reports of dm+xfs overflows with 4k stacks on i386.
> > > 
> > > The xfs side was sorted out, but I son't know the state of the dm part.
> > > 
> > 
> > The state is Very Bad, IMO.  At the very least, DM should struggle to the
> > utmost to reduce the stack utilisation around that recursion point.
> 
> Neil Brown suggested a change to generic_make_request to convert recursion 
> through it into iteration (see http://lkml.org/lkml/2005/9/2/34 ), but 
> there was no discussion at the time.  Would this help with this case?

It certainly would.  That looks like a good thing to do some more work on.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH resubmit] do_mount: reduce stack consumption
  2005-11-05  1:37           ` Andrew Morton
@ 2005-11-05  5:37             ` Neil Brown
  0 siblings, 0 replies; 16+ messages in thread
From: Neil Brown @ 2005-11-05  5:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: cplk, bunk, heiko.carstens, linux-kernel, aherrman, dm-devel

On Friday November 4, akpm@osdl.org wrote:
> cplk@itee.uq.edu.au wrote:
> >
> > > > > This part of the call trace is actually good for >1500 bytes of stack
> > > > > usage and is what kills us and should be fixed.
> > > > > I'm surprised that there are no other bug reports regarding DM and
> > > > > stack overflow with 4k stacks.
> > > > >...
> > > > 
> > > > There were some reports of dm+xfs overflows with 4k stacks on i386.
> > > > 
> > > > The xfs side was sorted out, but I son't know the state of the dm part.
> > > > 
> > > 
> > > The state is Very Bad, IMO.  At the very least, DM should struggle to the
> > > utmost to reduce the stack utilisation around that recursion point.
> > 
> > Neil Brown suggested a change to generic_make_request to convert recursion 
> > through it into iteration (see http://lkml.org/lkml/2005/9/2/34 ), but 
> > there was no discussion at the time.  Would this help with this case?
> 
> It certainly would.  That looks like a good thing to do some more work on.

Ok, I'll dust it off, make sure it seems to work (at the time I first
wrote it, I think it caused 'md' to deadlock) and submit it.

NeilBrown

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2005-11-05  5:57 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-11-04 15:53 [PATCH resubmit] do_mount: reduce stack consumption Andreas Herrmann
2005-11-04 17:42 ` Andrew Morton
2005-11-04 17:45   ` Arjan van de Ven
2005-11-04 18:37     ` Andrew Morton
  -- strict thread matches above, loose matches on Subject: below --
2005-11-04 10:50 Heiko Carstens
2005-11-04 11:48 ` Al Viro
2005-11-04 12:57   ` Heiko Carstens
2005-11-04 14:06     ` Al Viro
2005-11-04 22:03       ` Heiko Carstens
2005-11-04 16:48 ` Andrew Morton
2005-11-04 21:27   ` Heiko Carstens
2005-11-04 23:55     ` Adrian Bunk
2005-11-05  0:08       ` Andrew Morton
2005-11-05  1:15         ` cplk
2005-11-05  1:37           ` Andrew Morton
2005-11-05  5:37             ` Neil Brown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox