* [PATCH resubmit] do_mount: reduce stack consumption
@ 2005-11-04 10:50 Heiko Carstens
2005-11-04 11:48 ` Al Viro
2005-11-04 16:48 ` Andrew Morton
0 siblings, 2 replies; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 10:50 UTC (permalink / raw)
To: Andrew Morton, linux-kernel; +Cc: Andreas Herrmann
From: Andreas Herrmann <aherrman@de.ibm.com>
This is a resubmit of Andreas' patch that reduces stackframe usage in
do_mount. Problem is that without this patch we get a kernel stack
overflow if we run with 4k stacks (s390 31 bit mode).
See original stack back trace below and Andreas' patch and analysis
here:
http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
<4>Call Trace:
<4>([<000000000014ade6>] buffered_rmqueue+0x36a/0x3b4)
<4> [<000000000014aed6>] __alloc_pages+0xa6/0x350
<4> [<000000000014b258>] __get_free_pages+0x38/0x74
<4> [<000000000014f9fe>] kmem_getpages+0x32/0x140
<4> [<0000000000150396>] cache_alloc_refill+0x3ae/0x58c
<4> [<000000000014feb0>] __kmalloc+0xc0/0xe8
<4> [<00000000002acc64>] zfcp_mempool_alloc+0x24/0x34
<4> [<00000000001494ea>] mempool_alloc+0x56/0x1e8
<4> [<00000000002bfb46>] zfcp_fsf_req_create+0x56/0x5d8
<4> [<00000000002c0cf0>] zfcp_fsf_send_fcp_command_task+0x44/0x5c4
<4> [<00000000002afbda>] zfcp_scsi_command_async+0x7a/0x1f8
<4> [<00000000002afe68>] zfcp_scsi_queuecommand+0x44/0x54
<4> [<000000000024f458>] scsi_dispatch_cmd+0x234/0x3c0
<4> [<0000000000255fb2>] scsi_request_fn+0x2c6/0x67c
<4> [<000000000023f3ea>] __generic_unplug_device+0x52/0x60
<4> [<000000000023d392>] __elv_add_request+0x8a/0x98
<4> [<0000000000240c5e>] __make_request+0x306/0x62c
<4> [<0000000000241074>] generic_make_request+0xf0/0x21c
<4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
<4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
<4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
<4> [<0000000000241074>] generic_make_request+0xf0/0x21c
<4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
<4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
<4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
<4> [<0000000000241074>] generic_make_request+0xf0/0x21c
<4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
<4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
<4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
<4> [<0000000000241074>] generic_make_request+0xf0/0x21c
<4> [<000000000024121a>] submit_bio+0x7a/0x154
<4> [<00000000001736e6>] submit_bh+0x106/0x184
<4> [<000000000017514e>] __block_write_full_page+0x23a/0x58c
<4> [<000000000017558e>] block_write_full_page+0xee/0x108
<4> [<000000000017a908>] blkdev_writepage+0x24/0x38
<4> [<00000000001a0f76>] mpage_writepages+0x1da/0x9d0
<4> [<000000000017a4fa>] generic_writepages+0x22/0x30
<4> [<000000000014cc1e>] do_writepages+0x36/0x54
<4> [<0000000000144fe2>] __filemap_fdatawrite+0x5a/0x6c
<4> [<0000000000145016>] filemap_fdatawrite+0x22/0x30
<4> [<0000000000171244>] sync_blockdev+0x30/0x5c
<4> [<00000000001df074>] journal_recover+0x88/0xbc
<4> [<00000000001e2876>] journal_load+0x52/0xcc
<4> [<00000000001d37b4>] ext3_fill_super+0x1a48/0x1d38
<4> [<0000000000179faa>] get_sb_bdev+0x13a/0x208
<4> [<00000000001d3bd6>] ext3_get_sb+0x22/0x34
<4> [<000000000017a32e>] do_kern_mount+0x86/0x1f4
<4> [<000000000019aaf4>] do_mount+0x1c8/0x8e4
<4> [<000000000019b7ba>] sys_mount+0xce/0x1dc
<4> [<000000000010f9a6>] sysc_do_restart+0xe/0x12
Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
fs/namespace.c | 80 ++++++++++++++++++++++++++++++++++++---------------------
1 files changed, 51 insertions(+), 29 deletions(-)
diff -urpN linux-2.6/fs/namespace.c linux-2.6-patched/fs/namespace.c
--- linux-2.6/fs/namespace.c 2005-10-28 02:02:08.000000000 +0200
+++ linux-2.6-patched/fs/namespace.c 2005-11-03 13:39:30.000000000 +0100
@@ -619,25 +619,31 @@ out_unlock:
*/
static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
{
- struct nameidata old_nd;
+ struct nameidata *old_nd;
struct vfsmount *mnt = NULL;
int err = mount_is_safe(nd);
if (err)
return err;
if (!old_name || !*old_name)
return -EINVAL;
- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
- if (err)
+
+ old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
+ if (!old_nd)
+ return -ENOMEM;
+ err = path_lookup(old_name, LOOKUP_FOLLOW, old_nd);
+ if (err) {
+ kfree(old_nd);
return err;
+ }
down_write(¤t->namespace->sem);
err = -EINVAL;
- if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
+ if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd->mnt))) {
err = -ENOMEM;
if (recurse)
- mnt = copy_tree(old_nd.mnt, old_nd.dentry);
+ mnt = copy_tree(old_nd->mnt, old_nd->dentry);
else
- mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+ mnt = clone_mnt(old_nd->mnt, old_nd->dentry);
}
if (mnt) {
@@ -656,7 +662,8 @@ static int do_loopback(struct nameidata
}
up_write(¤t->namespace->sem);
- path_release(&old_nd);
+ path_release(old_nd);
+ kfree(old_nd);
return err;
}
@@ -693,22 +700,29 @@ static int do_remount(struct nameidata *
static int do_move_mount(struct nameidata *nd, char *old_name)
{
- struct nameidata old_nd, parent_nd;
+ struct {
+ struct nameidata old_nd, parent_nd;
+ } *loc;
struct vfsmount *p;
int err = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
- if (err)
+ loc = kmalloc(sizeof(*loc), GFP_KERNEL);
+ if (!loc)
+ return -ENOMEM;
+ err = path_lookup(old_name, LOOKUP_FOLLOW, &loc->old_nd);
+ if (err) {
+ kfree (loc);
return err;
+ }
down_write(¤t->namespace->sem);
while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
;
err = -EINVAL;
- if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+ if (!check_mnt(nd->mnt) || !check_mnt(loc->old_nd.mnt))
goto out;
err = -ENOENT;
@@ -721,28 +735,28 @@ static int do_move_mount(struct nameidat
goto out2;
err = -EINVAL;
- if (old_nd.dentry != old_nd.mnt->mnt_root)
+ if (loc->old_nd.dentry != loc->old_nd.mnt->mnt_root)
goto out2;
- if (old_nd.mnt == old_nd.mnt->mnt_parent)
+ if (loc->old_nd.mnt == loc->old_nd.mnt->mnt_parent)
goto out2;
if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
- S_ISDIR(old_nd.dentry->d_inode->i_mode))
+ S_ISDIR(loc->old_nd.dentry->d_inode->i_mode))
goto out2;
err = -ELOOP;
for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent)
- if (p == old_nd.mnt)
+ if (p == loc->old_nd.mnt)
goto out2;
err = 0;
- detach_mnt(old_nd.mnt, &parent_nd);
- attach_mnt(old_nd.mnt, nd);
+ detach_mnt(loc->old_nd.mnt, &loc->parent_nd);
+ attach_mnt(loc->old_nd.mnt, nd);
/* if the mount is moved, it should no longer be expire
* automatically */
- list_del_init(&old_nd.mnt->mnt_expire);
+ list_del_init(&loc->old_nd.mnt->mnt_expire);
out2:
spin_unlock(&vfsmount_lock);
out1:
@@ -750,8 +764,9 @@ out1:
out:
up_write(¤t->namespace->sem);
if (!err)
- path_release(&parent_nd);
- path_release(&old_nd);
+ path_release(&loc->parent_nd);
+ path_release(&loc->old_nd);
+ kfree(loc);
return err;
}
@@ -1014,7 +1029,7 @@ int copy_mount_options(const void __user
long do_mount(char * dev_name, char * dir_name, char *type_page,
unsigned long flags, void *data_page)
{
- struct nameidata nd;
+ struct nameidata *nd;
int retval = 0;
int mnt_flags = 0;
@@ -1041,27 +1056,34 @@ long do_mount(char * dev_name, char * di
mnt_flags |= MNT_NOEXEC;
flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
+ nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+ if (!nd)
+ return -ENOMEM;
+
/* ... and get the mountpoint */
- retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
- if (retval)
+ retval = path_lookup(dir_name, LOOKUP_FOLLOW, nd);
+ if (retval) {
+ kfree(nd);
return retval;
+ }
- retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
+ retval = security_sb_mount(dev_name, nd, type_page, flags, data_page);
if (retval)
goto dput_out;
if (flags & MS_REMOUNT)
- retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+ retval = do_remount(nd, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
- retval = do_loopback(&nd, dev_name, flags & MS_REC);
+ retval = do_loopback(nd, dev_name, flags & MS_REC);
else if (flags & MS_MOVE)
- retval = do_move_mount(&nd, dev_name);
+ retval = do_move_mount(nd, dev_name);
else
- retval = do_new_mount(&nd, type_page, flags, mnt_flags,
+ retval = do_new_mount(nd, type_page, flags, mnt_flags,
dev_name, data_page);
dput_out:
- path_release(&nd);
+ path_release(nd);
+ kfree(nd);
return retval;
}
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 10:50 Heiko Carstens
@ 2005-11-04 11:48 ` Al Viro
2005-11-04 12:57 ` Heiko Carstens
2005-11-04 16:48 ` Andrew Morton
1 sibling, 1 reply; 16+ messages in thread
From: Al Viro @ 2005-11-04 11:48 UTC (permalink / raw)
To: Heiko Carstens; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann
On Fri, Nov 04, 2005 at 11:50:26AM +0100, Heiko Carstens wrote:
> From: Andreas Herrmann <aherrman@de.ibm.com>
>
> This is a resubmit of Andreas' patch that reduces stackframe usage in
> do_mount. Problem is that without this patch we get a kernel stack
> overflow if we run with 4k stacks (s390 31 bit mode).
> See original stack back trace below and Andreas' patch and analysis
> here:
> http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
NAK. Rationale: too ugly.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 11:48 ` Al Viro
@ 2005-11-04 12:57 ` Heiko Carstens
2005-11-04 14:06 ` Al Viro
0 siblings, 1 reply; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 12:57 UTC (permalink / raw)
To: Al Viro; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann
> > This is a resubmit of Andreas' patch that reduces stackframe usage in
> > do_mount. Problem is that without this patch we get a kernel stack
> > overflow if we run with 4k stacks (s390 31 bit mode).
> > See original stack back trace below and Andreas' patch and analysis
> > here:
> > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
>
> NAK. Rationale: too ugly.
Ok, since I can only guess what you don't like: here is an updated patch
that probably addresses a few things.
If you don't like this one too, could you please explain what should be
changed?
Thanks,
Heiko
---
fs/namespace.c | 79 ++++++++++++++++++++++++++++++++++++---------------------
1 file changed, 50 insertions(+), 29 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fa9fdf..c14fbfc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -619,25 +619,30 @@ out_unlock:
*/
static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
{
- struct nameidata old_nd;
+ struct nameidata *old_nd;
struct vfsmount *mnt = NULL;
int err = mount_is_safe(nd);
if (err)
return err;
if (!old_name || !*old_name)
return -EINVAL;
- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+
+ old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
+ if (!old_nd)
+ return -ENOMEM;
+
+ err = path_lookup(old_name, LOOKUP_FOLLOW, old_nd);
if (err)
- return err;
+ goto out;
down_write(¤t->namespace->sem);
err = -EINVAL;
- if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
+ if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd->mnt))) {
err = -ENOMEM;
if (recurse)
- mnt = copy_tree(old_nd.mnt, old_nd.dentry);
+ mnt = copy_tree(old_nd->mnt, old_nd->dentry);
else
- mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+ mnt = clone_mnt(old_nd->mnt, old_nd->dentry);
}
if (mnt) {
@@ -656,7 +661,9 @@ static int do_loopback(struct nameidata
}
up_write(¤t->namespace->sem);
- path_release(&old_nd);
+ path_release(old_nd);
+out:
+ kfree(old_nd);
return err;
}
@@ -693,22 +700,28 @@ static int do_remount(struct nameidata *
static int do_move_mount(struct nameidata *nd, char *old_name)
{
- struct nameidata old_nd, parent_nd;
+ struct {
+ struct nameidata old_nd, parent_nd;
+ } *loc;
struct vfsmount *p;
int err = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+ loc = kmalloc(sizeof(*loc), GFP_KERNEL);
+ if (!loc)
+ return -ENOMEM;
+
+ err = path_lookup(old_name, LOOKUP_FOLLOW, &loc->old_nd);
if (err)
- return err;
+ goto path_out;
down_write(¤t->namespace->sem);
while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
;
err = -EINVAL;
- if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+ if (!check_mnt(nd->mnt) || !check_mnt(loc->old_nd.mnt))
goto out;
err = -ENOENT;
@@ -721,28 +734,28 @@ static int do_move_mount(struct nameidat
goto out2;
err = -EINVAL;
- if (old_nd.dentry != old_nd.mnt->mnt_root)
+ if (loc->old_nd.dentry != loc->old_nd.mnt->mnt_root)
goto out2;
- if (old_nd.mnt == old_nd.mnt->mnt_parent)
+ if (loc->old_nd.mnt == loc->old_nd.mnt->mnt_parent)
goto out2;
if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
- S_ISDIR(old_nd.dentry->d_inode->i_mode))
+ S_ISDIR(loc->old_nd.dentry->d_inode->i_mode))
goto out2;
err = -ELOOP;
for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent)
- if (p == old_nd.mnt)
+ if (p == loc->old_nd.mnt)
goto out2;
err = 0;
- detach_mnt(old_nd.mnt, &parent_nd);
- attach_mnt(old_nd.mnt, nd);
+ detach_mnt(loc->old_nd.mnt, &loc->parent_nd);
+ attach_mnt(loc->old_nd.mnt, nd);
/* if the mount is moved, it should no longer be expire
* automatically */
- list_del_init(&old_nd.mnt->mnt_expire);
+ list_del_init(&loc->old_nd.mnt->mnt_expire);
out2:
spin_unlock(&vfsmount_lock);
out1:
@@ -750,8 +763,10 @@ out1:
out:
up_write(¤t->namespace->sem);
if (!err)
- path_release(&parent_nd);
- path_release(&old_nd);
+ path_release(&loc->parent_nd);
+ path_release(&loc->old_nd);
+path_out:
+ kfree(loc);
return err;
}
@@ -1014,7 +1029,7 @@ int copy_mount_options(const void __user
long do_mount(char * dev_name, char * dir_name, char *type_page,
unsigned long flags, void *data_page)
{
- struct nameidata nd;
+ struct nameidata *nd;
int retval = 0;
int mnt_flags = 0;
@@ -1041,27 +1056,33 @@ long do_mount(char * dev_name, char * di
mnt_flags |= MNT_NOEXEC;
flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
+ nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+ if (!nd)
+ return -ENOMEM;
+
/* ... and get the mountpoint */
- retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
+ retval = path_lookup(dir_name, LOOKUP_FOLLOW, nd);
if (retval)
- return retval;
+ goto path_out;
- retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
+ retval = security_sb_mount(dev_name, nd, type_page, flags, data_page);
if (retval)
goto dput_out;
if (flags & MS_REMOUNT)
- retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+ retval = do_remount(nd, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
- retval = do_loopback(&nd, dev_name, flags & MS_REC);
+ retval = do_loopback(nd, dev_name, flags & MS_REC);
else if (flags & MS_MOVE)
- retval = do_move_mount(&nd, dev_name);
+ retval = do_move_mount(nd, dev_name);
else
- retval = do_new_mount(&nd, type_page, flags, mnt_flags,
+ retval = do_new_mount(nd, type_page, flags, mnt_flags,
dev_name, data_page);
dput_out:
- path_release(&nd);
+ path_release(nd);
+path_out:
+ kfree(nd);
return retval;
}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 12:57 ` Heiko Carstens
@ 2005-11-04 14:06 ` Al Viro
2005-11-04 22:03 ` Heiko Carstens
0 siblings, 1 reply; 16+ messages in thread
From: Al Viro @ 2005-11-04 14:06 UTC (permalink / raw)
To: Heiko Carstens; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann
On Fri, Nov 04, 2005 at 01:57:05PM +0100, Heiko Carstens wrote:
> > > This is a resubmit of Andreas' patch that reduces stackframe usage in
> > > do_mount. Problem is that without this patch we get a kernel stack
> > > overflow if we run with 4k stacks (s390 31 bit mode).
> > > See original stack back trace below and Andreas' patch and analysis
> > > here:
> > > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
> >
> > NAK. Rationale: too ugly.
>
> Ok, since I can only guess what you don't like: here is an updated patch
> that probably addresses a few things.
> If you don't like this one too, could you please explain what should be
> changed?
Depth analysis. E.g. do_move_mount() change is simply nonsense - _this_
is not going to overflow, no matter what. And do_add_mount() change
is also very suspicious - looks like you are attacking the wrong place
in call chain.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
@ 2005-11-04 15:53 Andreas Herrmann
2005-11-04 17:42 ` Andrew Morton
0 siblings, 1 reply; 16+ messages in thread
From: Andreas Herrmann @ 2005-11-04 15:53 UTC (permalink / raw)
To: Al Viro; +Cc: Andrew Morton, heicars2, linux-kernel
On 04.11.2005 15:06 Al Viro wrote:
> On Fri, Nov 04, 2005 at 01:57:05PM +0100, Heiko Carstens wrote:
> > Ok, since I can only guess what you don't like: here is an updated
patch
> > that probably addresses a few things.
> > If you don't like this one too, could you please explain what should
be
> > changed?
> Depth analysis. E.g. do_move_mount() change is simply nonsense - _this_
> is not going to overflow, no matter what. And do_add_mount() change
> is also very suspicious - looks like you are attacking the wrong place
> in call chain.
Obviously you missed the point that (depending on the compiler version,
options etc.) do_move_mount() and do_add_mount() can be inlined.
Hence stack frame size of do_mount is directly influenced by those two
functions. At least when I performed my "skin-deep" analysis of this
kernel stack overflow both functions were inlined in do_mount().
Stack consumptions of all other functions occurring in the backtrace
were moderate or due to the amount of inlined functions.
Just do_mount and its inlined friends use to put larger structures on
the stack that better should be allocated. There would not be any
performance impact for the user to allocate the structs.
Regards,
Andreas
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 10:50 Heiko Carstens
2005-11-04 11:48 ` Al Viro
@ 2005-11-04 16:48 ` Andrew Morton
2005-11-04 21:27 ` Heiko Carstens
1 sibling, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-04 16:48 UTC (permalink / raw)
To: Heiko Carstens; +Cc: linux-kernel, aherrman
Heiko Carstens <heiko.carstens@de.ibm.com> wrote:
>
> From: Andreas Herrmann <aherrman@de.ibm.com>
>
> This is a resubmit of Andreas' patch that reduces stackframe usage in
> do_mount. Problem is that without this patch we get a kernel stack
> overflow if we run with 4k stacks (s390 31 bit mode).
> See original stack back trace below and Andreas' patch and analysis
> here:
> http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
>
> <4>Call Trace:
> <4>([<000000000014ade6>] buffered_rmqueue+0x36a/0x3b4)
> <4> [<000000000014aed6>] __alloc_pages+0xa6/0x350
> <4> [<000000000014b258>] __get_free_pages+0x38/0x74
> <4> [<000000000014f9fe>] kmem_getpages+0x32/0x140
> <4> [<0000000000150396>] cache_alloc_refill+0x3ae/0x58c
> <4> [<000000000014feb0>] __kmalloc+0xc0/0xe8
> <4> [<00000000002acc64>] zfcp_mempool_alloc+0x24/0x34
> <4> [<00000000001494ea>] mempool_alloc+0x56/0x1e8
> <4> [<00000000002bfb46>] zfcp_fsf_req_create+0x56/0x5d8
> <4> [<00000000002c0cf0>] zfcp_fsf_send_fcp_command_task+0x44/0x5c4
> <4> [<00000000002afbda>] zfcp_scsi_command_async+0x7a/0x1f8
> <4> [<00000000002afe68>] zfcp_scsi_queuecommand+0x44/0x54
> <4> [<000000000024f458>] scsi_dispatch_cmd+0x234/0x3c0
> <4> [<0000000000255fb2>] scsi_request_fn+0x2c6/0x67c
> <4> [<000000000023f3ea>] __generic_unplug_device+0x52/0x60
> <4> [<000000000023d392>] __elv_add_request+0x8a/0x98
> <4> [<0000000000240c5e>] __make_request+0x306/0x62c
> <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> <4> [<000000000024121a>] submit_bio+0x7a/0x154
> <4> [<00000000001736e6>] submit_bh+0x106/0x184
> <4> [<000000000017514e>] __block_write_full_page+0x23a/0x58c
> <4> [<000000000017558e>] block_write_full_page+0xee/0x108
> <4> [<000000000017a908>] blkdev_writepage+0x24/0x38
> <4> [<00000000001a0f76>] mpage_writepages+0x1da/0x9d0
> <4> [<000000000017a4fa>] generic_writepages+0x22/0x30
> <4> [<000000000014cc1e>] do_writepages+0x36/0x54
> <4> [<0000000000144fe2>] __filemap_fdatawrite+0x5a/0x6c
> <4> [<0000000000145016>] filemap_fdatawrite+0x22/0x30
> <4> [<0000000000171244>] sync_blockdev+0x30/0x5c
> <4> [<00000000001df074>] journal_recover+0x88/0xbc
> <4> [<00000000001e2876>] journal_load+0x52/0xcc
> <4> [<00000000001d37b4>] ext3_fill_super+0x1a48/0x1d38
> <4> [<0000000000179faa>] get_sb_bdev+0x13a/0x208
> <4> [<00000000001d3bd6>] ext3_get_sb+0x22/0x34
> <4> [<000000000017a32e>] do_kern_mount+0x86/0x1f4
> <4> [<000000000019aaf4>] do_mount+0x1c8/0x8e4
> <4> [<000000000019b7ba>] sys_mount+0xce/0x1dc
> <4> [<000000000010f9a6>] sysc_do_restart+0xe/0x12
I'd call that a device mapper bug. If you were to increase the stacking
from 4-deep to 5-deep, it will crash the kernel, patched or not.
I'm (very) surprised that DM doesn't perform its stacking iteratively.
Eliminating the 36-byte `struct clone_info' in __split_bio() would be most
effective here.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 15:53 [PATCH resubmit] do_mount: reduce stack consumption Andreas Herrmann
@ 2005-11-04 17:42 ` Andrew Morton
2005-11-04 17:45 ` Arjan van de Ven
0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-04 17:42 UTC (permalink / raw)
To: Andreas Herrmann; +Cc: viro, heicars2, linux-kernel, Arjan van de Ven
Andreas Herrmann <AHERRMAN@de.ibm.com> wrote:
>
> Obviously you missed the point that (depending on the compiler version,
> options etc.) do_move_mount() and do_add_mount() can be inlined.
I think we found a way of preventing the 3.x compiler from doing that. Arjan,
do you recall where we ended up with that problem?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 17:42 ` Andrew Morton
@ 2005-11-04 17:45 ` Arjan van de Ven
2005-11-04 18:37 ` Andrew Morton
0 siblings, 1 reply; 16+ messages in thread
From: Arjan van de Ven @ 2005-11-04 17:45 UTC (permalink / raw)
To: Andrew Morton; +Cc: Andreas Herrmann, viro, heicars2, linux-kernel
On Fri, Nov 04, 2005 at 09:42:12AM -0800, Andrew Morton wrote:
> Andreas Herrmann <AHERRMAN@de.ibm.com> wrote:
> >
> > Obviously you missed the point that (depending on the compiler version,
> > options etc.) do_move_mount() and do_add_mount() can be inlined.
>
> I think we found a way of preventing the 3.x compiler from doing that. Arjan,
> do you recall where we ended up with that problem?
it was mostly caused by -funit-at-a-time that caused 3.x to go haywire. You
need to turn that off in general.
the real fix is in gcc 4.x (4.1 at least) where gcc got a lot smarter about
stack slot lifetimes
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 17:45 ` Arjan van de Ven
@ 2005-11-04 18:37 ` Andrew Morton
0 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2005-11-04 18:37 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: AHERRMAN, viro, heicars2, linux-kernel
Arjan van de Ven <arjanv@redhat.com> wrote:
>
> On Fri, Nov 04, 2005 at 09:42:12AM -0800, Andrew Morton wrote:
> > Andreas Herrmann <AHERRMAN@de.ibm.com> wrote:
> > >
> > > Obviously you missed the point that (depending on the compiler version,
> > > options etc.) do_move_mount() and do_add_mount() can be inlined.
> >
> > I think we found a way of preventing the 3.x compiler from doing that. Arjan,
> > do you recall where we ended up with that problem?
>
> it was mostly caused by -funit-at-a-time that caused 3.x to go haywire. You
> need to turn that off in general.
In that case, does s390 still have a problem that we need to be solving here?
(s390 doesn't implement CONFIG_DEBUG_STACK_USAGE. Suggest you do so, then
stick a dump_stack() into do_exit()).
(dm is still doing very risky things though)
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 16:48 ` Andrew Morton
@ 2005-11-04 21:27 ` Heiko Carstens
2005-11-04 23:55 ` Adrian Bunk
0 siblings, 1 reply; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 21:27 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, aherrman
> > See original stack back trace below and Andreas' patch and analysis
> > here:
> > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
I probably should add that with "original" stack back trace a trace of
a 2.6.10 kernel was meant, if that wasn't clear, but the DM code is
still the same in 2.6.14.
> > <4>Call Trace:
...
> > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
...
This part of the call trace is actually good for >1500 bytes of stack
usage and is what kills us and should be fixed.
I'm surprised that there are no other bug reports regarding DM and
stack overflow with 4k stacks.
> I'd call that a device mapper bug. If you were to increase the stacking
> from 4-deep to 5-deep, it will crash the kernel, patched or not.
Yes, you're right. Just forget the do_mount() patch. It's the wrong approach.
Heiko
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 14:06 ` Al Viro
@ 2005-11-04 22:03 ` Heiko Carstens
0 siblings, 0 replies; 16+ messages in thread
From: Heiko Carstens @ 2005-11-04 22:03 UTC (permalink / raw)
To: Al Viro; +Cc: Andrew Morton, linux-kernel, Andreas Herrmann
> > If you don't like this one too, could you please explain what should be
> > changed?
>
> Depth analysis. E.g. do_move_mount() change is simply nonsense - _this_
> is not going to overflow, no matter what. And do_add_mount() change
> is also very suspicious - looks like you are attacking the wrong place
> in call chain.
You're right (see also my other reply to Andrew).
Thanks,
Heiko
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 21:27 ` Heiko Carstens
@ 2005-11-04 23:55 ` Adrian Bunk
2005-11-05 0:08 ` Andrew Morton
0 siblings, 1 reply; 16+ messages in thread
From: Adrian Bunk @ 2005-11-04 23:55 UTC (permalink / raw)
To: Heiko Carstens; +Cc: Andrew Morton, linux-kernel, aherrman
On Fri, Nov 04, 2005 at 10:27:42PM +0100, Heiko Carstens wrote:
> > > See original stack back trace below and Andreas' patch and analysis
> > > here:
> > > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
>
> I probably should add that with "original" stack back trace a trace of
> a 2.6.10 kernel was meant, if that wasn't clear, but the DM code is
> still the same in 2.6.14.
>
> > > <4>Call Trace:
> ...
> > > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> ...
>
> This part of the call trace is actually good for >1500 bytes of stack
> usage and is what kills us and should be fixed.
> I'm surprised that there are no other bug reports regarding DM and
> stack overflow with 4k stacks.
>...
There were some reports of dm+xfs overflows with 4k stacks on i386.
The xfs side was sorted out, but I son't know the state of the dm part.
> Heiko
cu
Adrian
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-04 23:55 ` Adrian Bunk
@ 2005-11-05 0:08 ` Andrew Morton
2005-11-05 1:15 ` cplk
0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-05 0:08 UTC (permalink / raw)
To: Adrian Bunk; +Cc: heiko.carstens, linux-kernel, aherrman, dm-devel
Adrian Bunk <bunk@stusta.de> wrote:
>
> On Fri, Nov 04, 2005 at 10:27:42PM +0100, Heiko Carstens wrote:
> > > > See original stack back trace below and Andreas' patch and analysis
> > > > here:
> > > > http://www.ussg.iu.edu/hypermail/linux/kernel/0410.3/1844.html
> >
> > I probably should add that with "original" stack back trace a trace of
> > a 2.6.10 kernel was meant, if that wasn't clear, but the DM code is
> > still the same in 2.6.14.
> >
> > > > <4>Call Trace:
> > ...
> > > > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > > > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > > > <4> [<0000000010831380>] __map_bio+0x70/0x160 [dm_mod]
> > > > <4> [<000000001083173e>] __split_bio+0x1e6/0x538 [dm_mod]
> > > > <4> [<0000000010831ba8>] dm_request+0x118/0x25c [dm_mod]
> > > > <4> [<0000000000241074>] generic_make_request+0xf0/0x21c
> > ...
> >
> > This part of the call trace is actually good for >1500 bytes of stack
> > usage and is what kills us and should be fixed.
> > I'm surprised that there are no other bug reports regarding DM and
> > stack overflow with 4k stacks.
> >...
>
> There were some reports of dm+xfs overflows with 4k stacks on i386.
>
> The xfs side was sorted out, but I son't know the state of the dm part.
>
The state is Very Bad, IMO. At the very least, DM should struggle to the
utmost to reduce the stack utilisation around that recursion point.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-05 0:08 ` Andrew Morton
@ 2005-11-05 1:15 ` cplk
2005-11-05 1:37 ` Andrew Morton
0 siblings, 1 reply; 16+ messages in thread
From: cplk @ 2005-11-05 1:15 UTC (permalink / raw)
To: Andrew Morton
Cc: Adrian Bunk, heiko.carstens, linux-kernel, aherrman, dm-devel
> > > This part of the call trace is actually good for >1500 bytes of stack
> > > usage and is what kills us and should be fixed.
> > > I'm surprised that there are no other bug reports regarding DM and
> > > stack overflow with 4k stacks.
> > >...
> >
> > There were some reports of dm+xfs overflows with 4k stacks on i386.
> >
> > The xfs side was sorted out, but I son't know the state of the dm part.
> >
>
> The state is Very Bad, IMO. At the very least, DM should struggle to the
> utmost to reduce the stack utilisation around that recursion point.
Neil Brown suggested a change to generic_make_request to convert recursion
through it into iteration (see http://lkml.org/lkml/2005/9/2/34 ), but
there was no discussion at the time. Would this help with this case?
Chris
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-05 1:15 ` cplk
@ 2005-11-05 1:37 ` Andrew Morton
2005-11-05 5:37 ` Neil Brown
0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2005-11-05 1:37 UTC (permalink / raw)
To: cplk, Neil Brown; +Cc: bunk, heiko.carstens, linux-kernel, aherrman, dm-devel
cplk@itee.uq.edu.au wrote:
>
> > > > This part of the call trace is actually good for >1500 bytes of stack
> > > > usage and is what kills us and should be fixed.
> > > > I'm surprised that there are no other bug reports regarding DM and
> > > > stack overflow with 4k stacks.
> > > >...
> > >
> > > There were some reports of dm+xfs overflows with 4k stacks on i386.
> > >
> > > The xfs side was sorted out, but I son't know the state of the dm part.
> > >
> >
> > The state is Very Bad, IMO. At the very least, DM should struggle to the
> > utmost to reduce the stack utilisation around that recursion point.
>
> Neil Brown suggested a change to generic_make_request to convert recursion
> through it into iteration (see http://lkml.org/lkml/2005/9/2/34 ), but
> there was no discussion at the time. Would this help with this case?
It certainly would. That looks like a good thing to do some more work on.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH resubmit] do_mount: reduce stack consumption
2005-11-05 1:37 ` Andrew Morton
@ 2005-11-05 5:37 ` Neil Brown
0 siblings, 0 replies; 16+ messages in thread
From: Neil Brown @ 2005-11-05 5:37 UTC (permalink / raw)
To: Andrew Morton
Cc: cplk, bunk, heiko.carstens, linux-kernel, aherrman, dm-devel
On Friday November 4, akpm@osdl.org wrote:
> cplk@itee.uq.edu.au wrote:
> >
> > > > > This part of the call trace is actually good for >1500 bytes of stack
> > > > > usage and is what kills us and should be fixed.
> > > > > I'm surprised that there are no other bug reports regarding DM and
> > > > > stack overflow with 4k stacks.
> > > > >...
> > > >
> > > > There were some reports of dm+xfs overflows with 4k stacks on i386.
> > > >
> > > > The xfs side was sorted out, but I son't know the state of the dm part.
> > > >
> > >
> > > The state is Very Bad, IMO. At the very least, DM should struggle to the
> > > utmost to reduce the stack utilisation around that recursion point.
> >
> > Neil Brown suggested a change to generic_make_request to convert recursion
> > through it into iteration (see http://lkml.org/lkml/2005/9/2/34 ), but
> > there was no discussion at the time. Would this help with this case?
>
> It certainly would. That looks like a good thing to do some more work on.
Ok, I'll dust it off, make sure it seems to work (at the time I first
wrote it, I think it caused 'md' to deadlock) and submit it.
NeilBrown
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2005-11-05 5:57 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-11-04 15:53 [PATCH resubmit] do_mount: reduce stack consumption Andreas Herrmann
2005-11-04 17:42 ` Andrew Morton
2005-11-04 17:45 ` Arjan van de Ven
2005-11-04 18:37 ` Andrew Morton
-- strict thread matches above, loose matches on Subject: below --
2005-11-04 10:50 Heiko Carstens
2005-11-04 11:48 ` Al Viro
2005-11-04 12:57 ` Heiko Carstens
2005-11-04 14:06 ` Al Viro
2005-11-04 22:03 ` Heiko Carstens
2005-11-04 16:48 ` Andrew Morton
2005-11-04 21:27 ` Heiko Carstens
2005-11-04 23:55 ` Adrian Bunk
2005-11-05 0:08 ` Andrew Morton
2005-11-05 1:15 ` cplk
2005-11-05 1:37 ` Andrew Morton
2005-11-05 5:37 ` Neil Brown
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox