linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] reduce stack consumption in do_mount
@ 2004-10-28 17:14 Andreas Herrmann
  2004-10-28 20:31 ` Denis Vlasenko
  2004-10-30 16:14 ` Marcelo Tosatti
  0 siblings, 2 replies; 4+ messages in thread
From: Andreas Herrmann @ 2004-10-28 17:14 UTC (permalink / raw)
  To: linux-kernel





I have seen a kernel stack overflow during mount of a SCSI disk on
s390, 31bit, with 4K stack size.

The backtrace showed that there were 3 functions with stack
consumption of above 200 bytes.

These are do_mount (328 bytes stack size), ext3_fill_super (288 bytes)
and mpage_writepages (352 bytes).

For the latter 2 functions the large stack consumption seems to be
just due to extensive inlining of GCC 3.4.

But do_mount and friends store one or more struct nameidata on the
stack.  The size of this structure is 64 bytes on s390.

I suggest to kmalloc this structure in do_mount, do_loopback and
do_move_mount.

Applying the patch, stack size consumption of do_mount is 80 bytes
instead of 328 bytes. This would have avoided the kernel stack
overflow that I have encountered.

The patch is against linux-2.6.10-rc1.


Regards,

Andreas

--

diff -bBrauN linux-2.6.10-rc1/fs/namespace.c linux-2.6.x/fs/namespace.c
--- linux-2.6.10-rc1/fs/namespace.c 2004-10-28 17:29:26.000000000 +0200
+++ linux-2.6.x/fs/namespace.c      2004-10-28 17:29:56.000000000 +0200
@@ -621,25 +621,31 @@
  */
 static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 {
-     struct nameidata old_nd;
+     struct nameidata *old_nd;
      struct vfsmount *mnt = NULL;
      int err = mount_is_safe(nd);
      if (err)
            return err;
      if (!old_name || !*old_name)
            return -EINVAL;
-     err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
-     if (err)
+
+     old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
+     if (!old_nd)
+           return -ENOMEM;
+     err = path_lookup(old_name, LOOKUP_FOLLOW, old_nd);
+     if (err) {
+           kfree(old_nd);
            return err;
+     }

      down_write(&current->namespace->sem);
      err = -EINVAL;
-     if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
+     if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd->mnt))) {
            err = -ENOMEM;
            if (recurse)
-                 mnt = copy_tree(old_nd.mnt, old_nd.dentry);
+                 mnt = copy_tree(old_nd->mnt, old_nd->dentry);
            else
-                 mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+                 mnt = clone_mnt(old_nd->mnt, old_nd->dentry);
      }

      if (mnt) {
@@ -658,7 +664,8 @@
      }

      up_write(&current->namespace->sem);
-     path_release(&old_nd);
+     path_release(old_nd);
+     kfree(old_nd);
      return err;
 }

@@ -695,22 +702,27 @@

 static int do_move_mount(struct nameidata *nd, char *old_name)
 {
-     struct nameidata old_nd, parent_nd;
+     struct nameidata *old_nd, *parent_nd = NULL;
      struct vfsmount *p;
      int err = 0;
      if (!capable(CAP_SYS_ADMIN))
            return -EPERM;
      if (!old_name || !*old_name)
            return -EINVAL;
-     err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
-     if (err)
+     old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
+     if (!old_nd)
+           return -ENOMEM;
+     err = path_lookup(old_name, LOOKUP_FOLLOW, old_nd);
+     if (err) {
+           kfree(old_nd);
            return err;
+     }

      down_write(&current->namespace->sem);
      while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
            ;
      err = -EINVAL;
-     if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+     if (!check_mnt(nd->mnt) || !check_mnt(old_nd->mnt))
            goto out;

      err = -ENOENT;
@@ -723,37 +735,45 @@
            goto out2;

      err = -EINVAL;
-     if (old_nd.dentry != old_nd.mnt->mnt_root)
+     if (old_nd->dentry != old_nd->mnt->mnt_root)
            goto out2;

-     if (old_nd.mnt == old_nd.mnt->mnt_parent)
+     if (old_nd->mnt == old_nd->mnt->mnt_parent)
            goto out2;

      if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
-           S_ISDIR(old_nd.dentry->d_inode->i_mode))
+           S_ISDIR(old_nd->dentry->d_inode->i_mode))
            goto out2;

      err = -ELOOP;
      for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent)
-           if (p == old_nd.mnt)
+           if (p == old_nd->mnt)
                  goto out2;
      err = 0;

-     detach_mnt(old_nd.mnt, &parent_nd);
-     attach_mnt(old_nd.mnt, nd);
+     parent_nd = kmalloc(sizeof(*parent_nd), GFP_ATOMIC);
+     if (!parent_nd) {
+           err = -ENOMEM;
+           goto out2;
+     }
+     detach_mnt(old_nd->mnt, parent_nd);
+     attach_mnt(old_nd->mnt, nd);

      /* if the mount is moved, it should no longer be expire
       * automatically */
-     list_del_init(&old_nd.mnt->mnt_fslink);
+     list_del_init(&old_nd->mnt->mnt_fslink);
 out2:
      spin_unlock(&vfsmount_lock);
 out1:
      up(&nd->dentry->d_inode->i_sem);
 out:
      up_write(&current->namespace->sem);
-     if (!err)
-           path_release(&parent_nd);
-     path_release(&old_nd);
+     if (!err) {
+           path_release(parent_nd);
+           kfree(parent_nd);
+     }
+     path_release(old_nd);
+     kfree(old_nd);
      return err;
 }

@@ -1009,7 +1029,7 @@
 long do_mount(char * dev_name, char * dir_name, char *type_page,
              unsigned long flags, void *data_page)
 {
-     struct nameidata nd;
+     struct nameidata *nd;
      int retval = 0;
      int mnt_flags = 0;

@@ -1036,27 +1056,34 @@
            mnt_flags |= MNT_NOEXEC;
      flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);

+     nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+     if (!nd)
+           return -ENOMEM;
+
      /* ... and get the mountpoint */
-     retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
-     if (retval)
+     retval = path_lookup(dir_name, LOOKUP_FOLLOW, nd);
+     if (retval) {
+           kfree(nd);
            return retval;
+     }

-     retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
+     retval = security_sb_mount(dev_name, nd, type_page, flags, data_page);
      if (retval)
            goto dput_out;

      if (flags & MS_REMOUNT)
-           retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+           retval = do_remount(nd, flags & ~MS_REMOUNT, mnt_flags,
                            data_page);
      else if (flags & MS_BIND)
-           retval = do_loopback(&nd, dev_name, flags & MS_REC);
+           retval = do_loopback(nd, dev_name, flags & MS_REC);
      else if (flags & MS_MOVE)
-           retval = do_move_mount(&nd, dev_name);
+           retval = do_move_mount(nd, dev_name);
      else
-           retval = do_new_mount(&nd, type_page, flags, mnt_flags,
+           retval = do_new_mount(nd, type_page, flags, mnt_flags,
                              dev_name, data_page);
 dput_out:
-     path_release(&nd);
+     path_release(nd);
+     kfree(nd);
      return retval;
 }


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] reduce stack consumption in do_mount
  2004-10-28 17:14 [PATCH] reduce stack consumption in do_mount Andreas Herrmann
@ 2004-10-28 20:31 ` Denis Vlasenko
  2004-10-30 16:14 ` Marcelo Tosatti
  1 sibling, 0 replies; 4+ messages in thread
From: Denis Vlasenko @ 2004-10-28 20:31 UTC (permalink / raw)
  To: Andreas Herrmann, linux-kernel

On Thursday 28 October 2004 20:14, Andreas Herrmann wrote:
> 
> I have seen a kernel stack overflow during mount of a SCSI disk on
> s390, 31bit, with 4K stack size.
> 
> The backtrace showed that there were 3 functions with stack
> consumption of above 200 bytes.
> 
> These are do_mount (328 bytes stack size), ext3_fill_super (288 bytes)
> and mpage_writepages (352 bytes).
> 
> For the latter 2 functions the large stack consumption seems to be
> just due to extensive inlining of GCC 3.4.
> 
> But do_mount and friends store one or more struct nameidata on the
> stack.  The size of this structure is 64 bytes on s390.
> 
> I suggest to kmalloc this structure in do_mount, do_loopback and
> do_move_mount.
> 
> Applying the patch, stack size consumption of do_mount is 80 bytes
> instead of 328 bytes. This would have avoided the kernel stack
> overflow that I have encountered.

[snip]

>  static int do_move_mount(struct nameidata *nd, char *old_name)
>  {
> -     struct nameidata old_nd, parent_nd;
> +     struct nameidata *old_nd, *parent_nd = NULL;
>       struct vfsmount *p;
>       int err = 0;
>       if (!capable(CAP_SYS_ADMIN))
>             return -EPERM;
>       if (!old_name || !*old_name)
>             return -EINVAL;
> -     err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
> -     if (err)
> +     old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
> +     if (!old_nd)
> +           return -ENOMEM;

[snip]

> +     parent_nd = kmalloc(sizeof(*parent_nd), GFP_ATOMIC);
> +     if (!parent_nd) {
> +           err = -ENOMEM;
> +           goto out2;
> +     }

Do it in one go:

	struct {
		struct nameidata old_nd, parent_nd;
	} *loc;
	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
	if (loc)
		return -ENOMEM;

and add loc-> to every occurrence of old_nd and parent_nd.

Reduces time, space, fragmentation overhead and code size.
--
vda


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] reduce stack consumption in do_mount
@ 2004-10-29 14:29 Andreas Herrmann
  0 siblings, 0 replies; 4+ messages in thread
From: Andreas Herrmann @ 2004-10-29 14:29 UTC (permalink / raw)
  To: Denis Vlasenko; +Cc: linux-kernel





      Denis Vlasenko wrote:

[snip]

> Do it in one go:

>    struct {
>       struct nameidata old_nd, parent_nd;
>    } *loc;
>    loc = kmalloc(sizeof(*loc), GFP_KERNEL);
>    if (loc)
>       return -ENOMEM;

> and add loc-> to every occurrence of old_nd and parent_nd.

> Reduces time, space, fragmentation overhead and code size.

Done. Thanks for the advice.
Below is a new patch.


Regards,

Andreas

--
diff -u linux-2.6.10-rc1/fs/namespace.c linux-2.6.x/fs/namespace.c
--- linux-2.6.10-rc1/fs/namespace.c 2004-10-28 17:29:26.000000000 +0200
+++ linux-2.6.x/fs/namespace.c      2004-10-29 16:04:42.000000000 +0200
@@ -621,25 +621,31 @@
  */
 static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 {
-     struct nameidata old_nd;
+     struct nameidata *old_nd;
      struct vfsmount *mnt = NULL;
      int err = mount_is_safe(nd);
      if (err)
            return err;
      if (!old_name || !*old_name)
            return -EINVAL;
-     err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
-     if (err)
+
+     old_nd = kmalloc(sizeof(*old_nd), GFP_KERNEL);
+     if (!old_nd)
+           return -ENOMEM;
+     err = path_lookup(old_name, LOOKUP_FOLLOW, old_nd);
+     if (err) {
+           kfree(old_nd);
            return err;
+     }

      down_write(&current->namespace->sem);
      err = -EINVAL;
-     if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
+     if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd->mnt))) {
            err = -ENOMEM;
            if (recurse)
-                 mnt = copy_tree(old_nd.mnt, old_nd.dentry);
+                 mnt = copy_tree(old_nd->mnt, old_nd->dentry);
            else
-                 mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+                 mnt = clone_mnt(old_nd->mnt, old_nd->dentry);
      }

      if (mnt) {
@@ -658,7 +664,8 @@
      }

      up_write(&current->namespace->sem);
-     path_release(&old_nd);
+     path_release(old_nd);
+     kfree(old_nd);
      return err;
 }

@@ -695,22 +702,29 @@

 static int do_move_mount(struct nameidata *nd, char *old_name)
 {
-     struct nameidata old_nd, parent_nd;
+     struct {
+           struct nameidata old_nd, parent_nd;
+     } *loc;
      struct vfsmount *p;
      int err = 0;
      if (!capable(CAP_SYS_ADMIN))
            return -EPERM;
      if (!old_name || !*old_name)
            return -EINVAL;
-     err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
-     if (err)
+     loc = kmalloc(sizeof(*loc), GFP_KERNEL);
+     if (!loc)
+           return -ENOMEM;
+     err = path_lookup(old_name, LOOKUP_FOLLOW, &loc->old_nd);
+     if (err) {
+           kfree (loc);
            return err;
+     }

      down_write(&current->namespace->sem);
      while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
            ;
      err = -EINVAL;
-     if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+     if (!check_mnt(nd->mnt) || !check_mnt(loc->old_nd.mnt))
            goto out;

      err = -ENOENT;
@@ -723,28 +737,28 @@
            goto out2;

      err = -EINVAL;
-     if (old_nd.dentry != old_nd.mnt->mnt_root)
+     if (loc->old_nd.dentry != loc->old_nd.mnt->mnt_root)
            goto out2;

-     if (old_nd.mnt == old_nd.mnt->mnt_parent)
+     if (loc->old_nd.mnt == loc->old_nd.mnt->mnt_parent)
            goto out2;

      if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
-           S_ISDIR(old_nd.dentry->d_inode->i_mode))
+           S_ISDIR(loc->old_nd.dentry->d_inode->i_mode))
            goto out2;

      err = -ELOOP;
      for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent)
-           if (p == old_nd.mnt)
+           if (p == loc->old_nd.mnt)
                  goto out2;
      err = 0;

-     detach_mnt(old_nd.mnt, &parent_nd);
-     attach_mnt(old_nd.mnt, nd);
+     detach_mnt(loc->old_nd.mnt, &loc->parent_nd);
+     attach_mnt(loc->old_nd.mnt, nd);

      /* if the mount is moved, it should no longer be expire
       * automatically */
-     list_del_init(&old_nd.mnt->mnt_fslink);
+     list_del_init(&loc->old_nd.mnt->mnt_fslink);
 out2:
      spin_unlock(&vfsmount_lock);
 out1:
@@ -752,8 +766,9 @@
 out:
      up_write(&current->namespace->sem);
      if (!err)
-           path_release(&parent_nd);
-     path_release(&old_nd);
+           path_release(&loc->parent_nd);
+     path_release(&loc->old_nd);
+     kfree(loc);
      return err;
 }

@@ -1009,7 +1024,7 @@
 long do_mount(char * dev_name, char * dir_name, char *type_page,
              unsigned long flags, void *data_page)
 {
-     struct nameidata nd;
+     struct nameidata *nd;
      int retval = 0;
      int mnt_flags = 0;

@@ -1036,27 +1051,34 @@
            mnt_flags |= MNT_NOEXEC;
      flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);

+     nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+     if (!nd)
+           return -ENOMEM;
+
      /* ... and get the mountpoint */
-     retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
-     if (retval)
+     retval = path_lookup(dir_name, LOOKUP_FOLLOW, nd);
+     if (retval) {
+           kfree(nd);
            return retval;
+     }

-     retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
+     retval = security_sb_mount(dev_name, nd, type_page, flags, data_page);
      if (retval)
            goto dput_out;

      if (flags & MS_REMOUNT)
-           retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+           retval = do_remount(nd, flags & ~MS_REMOUNT, mnt_flags,
                            data_page);
      else if (flags & MS_BIND)
-           retval = do_loopback(&nd, dev_name, flags & MS_REC);
+           retval = do_loopback(nd, dev_name, flags & MS_REC);
      else if (flags & MS_MOVE)
-           retval = do_move_mount(&nd, dev_name);
+           retval = do_move_mount(nd, dev_name);
      else
-           retval = do_new_mount(&nd, type_page, flags, mnt_flags,
+           retval = do_new_mount(nd, type_page, flags, mnt_flags,
                              dev_name, data_page);
 dput_out:
-     path_release(&nd);
+     path_release(nd);
+     kfree(nd);
      return retval;
 }


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] reduce stack consumption in do_mount
  2004-10-28 17:14 [PATCH] reduce stack consumption in do_mount Andreas Herrmann
  2004-10-28 20:31 ` Denis Vlasenko
@ 2004-10-30 16:14 ` Marcelo Tosatti
  1 sibling, 0 replies; 4+ messages in thread
From: Marcelo Tosatti @ 2004-10-30 16:14 UTC (permalink / raw)
  To: Andreas Herrmann; +Cc: linux-kernel, Denis Vlasenko

On Thu, Oct 28, 2004 at 07:14:52PM +0200, Andreas Herrmann wrote:

> I have seen a kernel stack overflow during mount of a SCSI disk on
> s390, 31bit, with 4K stack size.
> 
> The backtrace showed that there were 3 functions with stack
> consumption of above 200 bytes.
> 
> These are do_mount (328 bytes stack size), ext3_fill_super (288 bytes)
> and mpage_writepages (352 bytes).

One possibly interesting thing for you guys who are trying to reduce 
stack usage is using the SLAB allocator for pagevec structures in the VM
code. mpage_readpages/writepages use those, and pretty much all VM code.

Allocating those structures from SLAB can also increase performance
due to cache colouring, but requires the additional instructions into
kmalloc() for allocation - needs benchmarking.

I want to try so if no one does it before.


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2004-10-30 19:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-10-28 17:14 [PATCH] reduce stack consumption in do_mount Andreas Herrmann
2004-10-28 20:31 ` Denis Vlasenko
2004-10-30 16:14 ` Marcelo Tosatti
  -- strict thread matches above, loose matches on Subject: below --
2004-10-29 14:29 Andreas Herrmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).