All of lore.kernel.org
 help / color / mirror / Atom feed
* simultaneous creates in a directory
@ 2002-09-27 14:02 Matthew Wilcox
  2002-09-27 19:44 ` Andrew Morton
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Wilcox @ 2002-09-27 14:02 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: dwmw2, wli


At startup, tiobench does a large number of open(O_CREAT) calls, all
in the same directory.  This leads to a large number of processes all
trying to do down() on the parent directory's i_sem.  That wouldn't be
so bad, but each one is sleeping with an entire page (of ZONE_NORMAL)
allocated.  800MB of ZONE_NORMAL / 4k pages.. ok, it'd take 200,000
tasks to completely OOM your box, but a user can certainly make life
difficult for us with fewer tasks.

The most important question is: Do we care?  tiobench is only a benchmark.
If this behaviour (creating tens of thousands of new files in a single
directory) is something no real application does, then let's modify the
benchmark to use subdirectories or something.

Unless I misread the code, we don't need `filename' again after calling
path_lookup().  So we _could_ putname() it immediately afterwards, except
that not all callers of filp_open() store their filename in a getname()
block.  The most obvious way to fix this is to have a nameidata flag that
says "putname() once you're done with it".

Impact of that seems fairly limited.  filp_open() is the only caller of
open_namei().  About a dozen places call filp_open (one with the arguments 
swapped).  Most specify their flags themselves, so there's no problem.
sys_open and 32-bit emulation variants in sparc64 / ppc64 take flags from
the user, but these are places where we want to force this flag on anyway.

I'm a little wary of changing the rules so you have to sanitise the
flags from userspace before passing them to filp_open.  But is there
any reason not to do this?

diff -urpNX dontdiff linux-2.5.38/arch/ppc64/kernel/sys_ppc32.c linux-2.5.38-willy/arch/ppc64/kernel/sys_ppc32.c
--- linux-2.5.38/arch/ppc64/kernel/sys_ppc32.c	2002-09-23 10:20:27.000000000 -0400
+++ linux-2.5.38-willy/arch/ppc64/kernel/sys_ppc32.c	2002-09-27 09:48:18.000000000 -0400
@@ -3889,21 +3889,18 @@ long sys32_open(const char * filename, i
 	if (!IS_ERR(tmp)) {
 		fd = get_unused_fd();
 		if (fd >= 0) {
-			struct file * f = filp_open(tmp, flags, mode);
+			struct file * f = filp_open(tmp, flags | O_PUTNAME, mode);
 			error = PTR_ERR(f);
 			if (IS_ERR(f))
 				goto out_error;
 			fd_install(fd, f);
 		}
-out:
-		putname(tmp);
 	}
 	return fd;
 
 out_error:
 	put_unused_fd(fd);
-	fd = error;
-	goto out;
+	return error;
 }
 
 extern asmlinkage long sys_readlink(const char * path, char * buf, int bufsiz);
diff -urpNX dontdiff linux-2.5.38/arch/sparc64/kernel/sys_sparc32.c linux-2.5.38-willy/arch/sparc64/kernel/sys_sparc32.c
--- linux-2.5.38/arch/sparc64/kernel/sys_sparc32.c	2002-09-16 14:46:59.000000000 -0400
+++ linux-2.5.38-willy/arch/sparc64/kernel/sys_sparc32.c	2002-09-27 09:47:23.000000000 -0400
@@ -4061,21 +4061,18 @@ asmlinkage long sparc32_open(const char 
 	if (!IS_ERR(tmp)) {
 		fd = get_unused_fd();
 		if (fd >= 0) {
-			struct file * f = filp_open(tmp, flags, mode);
+			struct file * f = filp_open(tmp, flags | O_PUTNAME, mode);
 			error = PTR_ERR(f);
 			if (IS_ERR(f))
 				goto out_error;
 			fd_install(fd, f);
 		}
-out:
-		putname(tmp);
 	}
 	return fd;
 
 out_error:
 	put_unused_fd(fd);
-	fd = error;
-	goto out;
+	return error;
 }
 
 extern unsigned long do_mremap(unsigned long addr,
diff -urpNX dontdiff linux-2.5.38/fs/namei.c linux-2.5.38-willy/fs/namei.c
--- linux-2.5.38/fs/namei.c	2002-09-23 10:20:32.000000000 -0400
+++ linux-2.5.38-willy/fs/namei.c	2002-09-27 09:45:15.000000000 -0400
@@ -1244,6 +1244,8 @@ int open_namei(const char * pathname, in
 	 */
 	if (!(flag & O_CREAT)) {
 		error = path_lookup(pathname, lookup_flags(flag), nd);
+		if (flag & O_PUTNAME)
+			putname(pathname);
 		if (error)
 			return error;
 		dentry = nd->dentry;
@@ -1254,6 +1256,8 @@ int open_namei(const char * pathname, in
 	 * Create - we need to know the parent.
 	 */
 	error = path_lookup(pathname, LOOKUP_PARENT, nd);
+	if (flag & O_PUTNAME)
+		putname(pathname);
 	if (error)
 		return error;
 
diff -urpNX dontdiff linux-2.5.38/fs/open.c linux-2.5.38-willy/fs/open.c
--- linux-2.5.38/fs/open.c	2002-07-27 14:09:31.000000000 -0400
+++ linux-2.5.38-willy/fs/open.c	2002-09-27 09:42:48.000000000 -0400
@@ -797,21 +797,18 @@ asmlinkage long sys_open(const char * fi
 	if (!IS_ERR(tmp)) {
 		fd = get_unused_fd();
 		if (fd >= 0) {
-			struct file *f = filp_open(tmp, flags, mode);
+			struct file *f = filp_open(tmp, flags | O_PUTNAME, mode);
 			error = PTR_ERR(f);
 			if (IS_ERR(f))
 				goto out_error;
 			fd_install(fd, f);
 		}
-out:
-		putname(tmp);
 	}
 	return fd;
 
 out_error:
 	put_unused_fd(fd);
-	fd = error;
-	goto out;
+	return error;
 }
 
 #ifndef __alpha__
diff -urpNX dontdiff linux-2.5.38/include/linux/fcntl.h linux-2.5.38-willy/include/linux/fcntl.h
--- linux-2.5.38/include/linux/fcntl.h	2002-06-20 18:53:48.000000000 -0400
+++ linux-2.5.38-willy/include/linux/fcntl.h	2002-09-27 09:50:29.000000000 -0400
@@ -7,7 +7,7 @@
 #define F_GETLEASE	(F_LINUX_SPECIFIC_BASE+1)
 
 /*
- * Request nofications on a directory.
+ * Request notifications on a directory.
  * See below for events that may be notified.
  */
 #define F_NOTIFY	(F_LINUX_SPECIFIC_BASE+2)
@@ -45,6 +45,11 @@
 #define IS_SETLK(cmd)	(IS_SETLK32(cmd)  || IS_SETLK64(cmd))
 #define IS_SETLKW(cmd)	(IS_SETLKW32(cmd) || IS_SETLKW64(cmd))
 
+/* Instructs open_namei() to get rid of the name ASAP */
+#ifndef O_PUTNAME
+#define O_PUTNAME 0x80000000
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif

-- 
Revolutions do not require corporate support.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2002-09-27 21:08 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-09-27 14:02 simultaneous creates in a directory Matthew Wilcox
2002-09-27 19:44 ` Andrew Morton
2002-09-27 20:44   ` William Lee Irwin III
2002-09-27 21:08     ` Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.