public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Hanna Linder <hannal@us.ibm.com>
To: Anton Blanchard <anton@samba.org>,
	"Martin J. Bligh" <Martin.Bligh@us.ibm.com>
Cc: lse-tech@lists.sourceforge.net, linux-kernel@vger.kernel.org,
	hannal@us.ibm.com
Subject: Re: [Lse-tech] Re: 10.31 second kernel compile
Date: Thu, 14 Mar 2002 10:21:49 -0800	[thread overview]
Message-ID: <13360000.1016130109@w-hlinder.des> (raw)
In-Reply-To: <20020314112725.GA2008@krispykreme>
In-Reply-To: <20020313085217.GA11658@krispykreme> <460695164.1016001894@[10.10.2.3]> <20020314112725.GA2008@krispykreme>


--On Thursday, March 14, 2002 22:27:26 +1100 Anton Blanchard <anton@samba.org> wrote:

>> >    554 .d_lookup                               
>> 
>> Did you try the dcache patches?
> 
> Not for this, I did do some benchmarking of the RCU dcache patches a
> while ago which I should post.
> 

	There are two dcache patches. The one I wrote based on Al Viro's
	suggestion for fast path walking is especially good for NUMA
	systems hit by cache bouncing (d_lookup is the main culprit in 
	the dcache). Martin had some initial results that looked very
	good. 

	Following is the clean 2.5.6 version which is also available at:
	http://sf.net/projects/lse Under the Read Copy Update Section 
	
	Hanna Linder (hannal@us.ibm.com)
	IBM Linux Technology Center

-----------
diff -Nru -X dontdiff linux-2.5.6/fs/dcache.c linux-2.5.6-fw/fs/dcache.c
--- linux-2.5.6/fs/dcache.c	Thu Mar  7 18:18:13 2002
+++ linux-2.5.6-fw/fs/dcache.c	Fri Mar  8 13:50:43 2002
@@ -704,13 +704,22 @@
  
 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
 {
+	struct dentry * dentry;
+	spin_lock(&dcache_lock);
+	dentry = __d_lookup(parent,name);
+	spin_unlock(&dcache_lock);
+	return dentry;
+}
+
+struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)  
+{
+
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
 	struct list_head *head = d_hash(parent,hash);
 	struct list_head *tmp;
 
-	spin_lock(&dcache_lock);
 	tmp = head->next;
 	for (;;) {
 		struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -732,10 +741,8 @@
 		}
 		__dget_locked(dentry);
 		dentry->d_vfs_flags |= DCACHE_REFERENCED;
-		spin_unlock(&dcache_lock);
 		return dentry;
 	}
-	spin_unlock(&dcache_lock);
 	return NULL;
 }
 
diff -Nru -X dontdiff linux-2.5.6/fs/namei.c linux-2.5.6-fw/fs/namei.c
--- linux-2.5.6/fs/namei.c	Thu Mar  7 18:18:24 2002
+++ linux-2.5.6-fw/fs/namei.c	Fri Mar  8 13:56:25 2002
@@ -268,8 +268,41 @@
 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 {
 	struct dentry * dentry = d_lookup(parent, name);
+	
+	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+			dput(dentry);
+			dentry = NULL;
+		}
+	}
+	return dentry;
+}
 
+/*for fastwalking*/
+static inline void undo_locked(struct nameidata *nd)
+{
+	if(nd->flags & LOOKUP_LOCKED){
+		dget(nd->dentry);
+		mntget(nd->mnt);
+		spin_unlock(&dcache_lock);
+		nd->flags &= ~LOOKUP_LOCKED;
+	}
+}
+
+/*
+ * For fast path lookup while holding the dcache_lock. 
+ * SMP-safe
+ */
+static struct dentry * cached_lookup_nd(struct nameidata * nd, struct qstr * name, int flags)
+{
+	struct dentry * dentry = NULL;
+	if(!(nd->flags & LOOKUP_LOCKED))
+		return cached_lookup(nd->dentry, name, flags);
+	
+	dentry = __d_lookup(nd->dentry, name);
+	
 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+		undo_locked(nd);
 		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 			dput(dentry);
 			dentry = NULL;
@@ -279,6 +312,34 @@
 }
 
 /*
+ * Short-cut version of permission(), for calling by
+ * path_walk(), when dcache lock is held.  Combines parts
+ * of permission() and vfs_permission(), and tests ONLY for
+ * MAY_EXEC permission.
+ *
+ * If appropriate, check DAC only.  If not appropriate, or
+ * short-cut DAC fails, then call permission() to do more
+ * complete permission check.
+ */
+static inline int exec_permission_lite(struct inode *inode)
+{
+	umode_t	mode = inode->i_mode;
+
+	if ((inode->i_op && inode->i_op->permission))
+		return -EACCES;
+
+	if (current->fsuid == inode->i_uid)
+		mode >>= 6;
+	else if (in_group_p(inode->i_gid))
+		mode >>= 3;
+
+	if (mode & MAY_EXEC)
+		return 0;
+
+	return -EACCES;
+}
+
+/*
  * This is called when everything else fails, and we actually have
  * to go to the low-level filesystem to find out what we should do..
  *
@@ -472,7 +533,9 @@
 		struct qstr this;
 		unsigned int c;
 
-		err = permission(inode, MAY_EXEC);
+		err = exec_permission_lite(inode);
+		if(err)
+			err = permission(inode, MAY_EXEC);
 		dentry = ERR_PTR(err);
  		if (err)
 			break;
@@ -507,6 +570,7 @@
 			case 2:	
 				if (this.name[1] != '.')
 					break;
+				undo_locked(nd);
 				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
@@ -523,16 +587,20 @@
 				break;
 		}
 		/* This does the actual lookups.. */
-		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+		dentry = cached_lookup_nd(nd, &this, LOOKUP_CONTINUE);
 		if (!dentry) {
+			undo_locked(nd);
 			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 			err = PTR_ERR(dentry);
 			if (IS_ERR(dentry))
 				break;
 		}
 		/* Check mountpoints.. */
-		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-			;
+		if(d_mountpoint(dentry)){
+			undo_locked(nd);
+			while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+				;
+		}
 
 		err = -ENOENT;
 		inode = dentry->d_inode;
@@ -543,6 +611,7 @@
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
+			undo_locked(nd);
 			err = do_follow_link(dentry, nd);
 			dput(dentry);
 			if (err)
@@ -555,7 +624,8 @@
 			if (!inode->i_op)
 				break;
 		} else {
-			dput(nd->dentry);
+			if (!(nd->flags & LOOKUP_LOCKED))
+				dput(nd->dentry);
 			nd->dentry = dentry;
 		}
 		err = -ENOTDIR; 
@@ -575,6 +645,7 @@
 			case 2:	
 				if (this.name[1] != '.')
 					break;
+				undo_locked(nd);
 				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
@@ -586,7 +657,8 @@
 			if (err < 0)
 				break;
 		}
-		dentry = cached_lookup(nd->dentry, &this, 0);
+		dentry = cached_lookup_nd(nd, &this, 0);
+		undo_locked(nd); 
 		if (!dentry) {
 			dentry = real_lookup(nd->dentry, &this, 0);
 			err = PTR_ERR(dentry);
@@ -626,11 +698,14 @@
 		else if (this.len == 2 && this.name[1] == '.')
 			nd->last_type = LAST_DOTDOT;
 return_base:
+		undo_locked(nd);
 		return 0;
 out_dput:
+		undo_locked(nd);
 		dput(dentry);
 		break;
 	}
+	undo_locked(nd);
 	path_release(nd);
 return_err:
 	return err;
@@ -734,6 +809,36 @@
 	nd->dentry = dget(current->fs->pwd);
 	read_unlock(&current->fs->lock);
 	return 1;
+}
+
+int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
+{
+	nd->last_type = LAST_ROOT; /* if there are only slashes... */
+	nd->flags = flags;
+	if (*name=='/'){
+		read_lock(&current->fs->lock);
+		if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+			nd->mnt = mntget(current->fs->altrootmnt);
+			nd->dentry = dget(current->fs->altroot);
+			read_unlock(&current->fs->lock);
+			if (__emul_lookup_dentry(name,nd))
+				return 0;
+			read_lock(&current->fs->lock);
+		}
+		spin_lock(&dcache_lock); /*to avoid cacheline bouncing with d_count*/
+		nd->mnt = current->fs->rootmnt;
+		nd->dentry = current->fs->root;
+		read_unlock(&current->fs->lock);
+	}
+	else{
+		read_lock(&current->fs->lock);
+		spin_lock(&dcache_lock);
+		nd->mnt = current->fs->pwdmnt;
+		nd->dentry = current->fs->pwd;
+		read_unlock(&current->fs->lock);
+	}
+	nd->flags |= LOOKUP_LOCKED;
+	return (path_walk(name, nd));
 }
 
 /*
diff -Nru -X dontdiff linux-2.5.6/include/linux/dcache.h linux-2.5.6-fw/include/linux/dcache.h
--- linux-2.5.6/include/linux/dcache.h	Thu Mar  7 18:18:30 2002
+++ linux-2.5.6-fw/include/linux/dcache.h	Fri Mar  8 13:50:43 2002
@@ -220,6 +220,7 @@
 
 /* appendix may either be NULL or be used for transname suffixes */
 extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
 
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);
diff -Nru -X dontdiff linux-2.5.6/include/linux/fs.h linux-2.5.6-fw/include/linux/fs.h
--- linux-2.5.6/include/linux/fs.h	Thu Mar  7 18:18:19 2002
+++ linux-2.5.6-fw/include/linux/fs.h	Fri Mar  8 13:50:43 2002
@@ -1273,12 +1273,15 @@
  *  - require a directory
  *  - ending slashes ok even for nonexistent files
  *  - internal "there are more path compnents" flag
+ *  - locked when lookup done with dcache_lock held
  */
 #define LOOKUP_FOLLOW		(1)
 #define LOOKUP_DIRECTORY	(2)
 #define LOOKUP_CONTINUE		(4)
 #define LOOKUP_PARENT		(16)
 #define LOOKUP_NOALT		(32)
+#define LOOKUP_LOCKED		(64)
+
 /*
  * Type of the last component on LOOKUP_PARENT
  */
@@ -1309,13 +1312,7 @@
 extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-static inline int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
-{
-	int error = 0;
-	if (path_init(path, flags, nd))
-		error = path_walk(path, nd);
-	return error;
-}
+extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern int follow_down(struct vfsmount **, struct dentry **);
 extern int follow_up(struct vfsmount **, struct dentry **);


  parent reply	other threads:[~2002-03-14 18:21 UTC|newest]

Thread overview: 137+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-03-13  8:52 10.31 second kernel compile Anton Blanchard
2002-03-13 14:44 ` Martin J. Bligh
2002-03-13 21:44   ` [Lse-tech] " Dave Hansen
2002-03-14  1:07     ` Keith Owens
2002-03-14 11:27   ` Anton Blanchard
2002-03-14 13:16     ` [Lse-tech] " Dipankar Sarma
2002-03-17 13:12       ` some RCU dcache and ratcache results Anton Blanchard
2002-03-14 13:21     ` [Lse-tech] Re: 10.31 second kernel compile Momchil Velikov
2002-03-14 18:33       ` Daniel Phillips
2002-03-15 12:16         ` Chris Wedgwood
2002-03-16  5:12           ` Anton Blanchard
2002-03-15 18:20         ` Linus Torvalds
2002-03-16 11:55           ` Paul Mackerras
2002-03-16 17:25             ` Rik van Riel
2002-03-16 17:57             ` yodaiken
2002-03-16 18:06             ` Linus Torvalds
2002-03-16 18:35               ` yodaiken
2002-03-16 18:45                 ` Linus Torvalds
2002-03-16 18:57                   ` yodaiken
2002-03-16 19:16                     ` Linus Torvalds
2002-03-16 19:43                       ` David Mosberger
2002-03-16 19:58                         ` Linus Torvalds
2002-03-16 20:08                           ` yodaiken
2002-03-16 20:23                             ` Linus Torvalds
2002-03-16 20:36                           ` David Mosberger
2002-03-16 20:46                             ` Linus Torvalds
2002-03-17  1:09                               ` Paul Mackerras
2002-03-17  2:08                                 ` Linus Torvalds
2002-03-16 19:53                       ` yodaiken
2002-03-16 20:02                         ` Linus Torvalds
2002-03-16 20:25                           ` yodaiken
2002-03-27  1:07                       ` Richard Henderson
2002-03-16 20:53               ` Alan Cox
2002-03-18  3:07             ` David S. Miller
2002-03-16 15:24           ` Daniel Phillips
2002-03-16 19:01             ` Linus Torvalds
2002-03-16 22:25               ` Daniel Phillips
2002-03-19 16:35                 ` Bill Davidsen
2002-03-14 19:05       ` Linus Torvalds
2002-03-19 16:40         ` Bill Davidsen
2002-03-14 18:21     ` Hanna Linder [this message]
2002-03-16  5:27       ` Anton Blanchard
2002-03-15  7:12   ` Chris Wedgwood
2002-03-16  6:15 ` 7.52 " Anton Blanchard
2002-03-16  6:42   ` [Lse-tech] " Gerrit Huizenga
2002-03-17 12:34     ` Anton Blanchard
2002-03-17 22:09       ` Theodore Tso
2002-03-18  7:04         ` Jeff Garzik
2002-03-19 18:28           ` Theodore Tso
2002-03-16  8:05   ` Linus Torvalds
2002-03-16 11:04     ` Paul Mackerras
2002-03-16 18:32       ` Linus Torvalds
2002-03-17  2:00         ` Paul Mackerras
2002-03-17  2:40           ` Linus Torvalds
2002-03-17  2:50             ` M. Edward Borasky
2002-03-18 15:08               ` 0.73 " snpe
2002-03-18 19:42           ` 7.52 " Cort Dougan
2002-03-18 20:04             ` Linus Torvalds
2002-03-18 20:23               ` Linus Torvalds
2002-03-18 21:50                 ` Rene Herman
2002-03-18 22:36                 ` Cort Dougan
2002-03-18 22:47                   ` Linus Torvalds
2002-03-18 22:56                     ` Cort Dougan
2002-03-18 23:52                     ` Paul Mackerras
2002-03-19  0:57                       ` Dave Jones
2002-03-19  3:35                         ` Jeff Garzik
2002-03-19  0:22                     ` David S. Miller
2002-03-19  0:27                       ` Cort Dougan
2002-03-19  0:27                         ` David S. Miller
2002-03-19  0:36                           ` Cort Dougan
2002-03-19  0:38                             ` David S. Miller
2002-03-19  1:28                               ` Davide Libenzi
2002-03-19  2:42                 ` Paul Mackerras
2002-03-27  2:53                 ` Richard Henderson
2002-04-02  4:32                   ` Linus Torvalds
2002-04-02 10:50                 ` Pablo Alcaraz
2002-03-18 21:34               ` Cort Dougan
2002-03-18 22:00                 ` Linus Torvalds
2002-03-18 19:37       ` Cort Dougan
2002-03-16 11:54     ` yodaiken
2002-03-16 17:37   ` [Lse-tech] " Martin J. Bligh
2002-03-16 18:57     ` Daniel Egger
2002-03-17  8:18       ` Mike Galbraith
2002-03-17 15:29         ` Martin J. Bligh
2002-03-17  1:45     ` Keith Owens
2002-03-17 13:54     ` David Woodhouse
2002-03-19 16:49     ` Bill Davidsen
     [not found] <20020316113536.A19495@hq.fsmlabs.com.suse.lists.linux.kernel>
     [not found] ` <Pine.LNX.4.33.0203161037160.31913-100000@penguin.transmeta.com.suse.lists.linux.kernel>
     [not found]   ` <20020316115726.B19495@hq.fsmlabs.com.suse.lists.linux.kernel>
2002-03-16 19:32     ` [Lse-tech] Re: 10.31 " Andi Kleen
2002-03-16 19:57       ` yodaiken
2002-03-16 20:05         ` Andi Kleen
2002-03-16 20:12           ` yodaiken
2002-03-16 20:27             ` Richard Gooch
2002-03-16 20:47               ` yodaiken
2002-03-16 21:05                 ` Richard Gooch
2002-03-16 23:34                   ` yodaiken
2002-03-17 13:48                   ` Rik van Riel
2002-03-16 20:34             ` Linus Torvalds
2002-03-16 21:39               ` yodaiken
2002-03-16 21:49                 ` Linus Torvalds
2002-03-17 14:38                   ` Kai Henningsen
2002-03-17 18:20                     ` Alan Cox
2002-03-16 22:00                 ` Alan Cox
2002-03-16 21:49                   ` Linus Torvalds
2002-03-16 23:10                   ` yodaiken
2002-03-17  1:17                     ` rddunlap
2002-03-17  3:34                     ` Alan Cox
2002-03-17 14:52                     ` Kai Henningsen
2002-03-17 21:00                       ` yodaiken
2002-03-19 12:06                 ` Pavel Machek
2002-03-19 21:12                   ` yodaiken
2002-03-19 22:09                     ` Chris Friesen
2002-03-19 22:15                       ` yodaiken
2002-03-20  4:25                     ` Bill Davidsen
2002-03-17  2:50           ` Chris Wedgwood
2002-03-17  3:43             ` Alan Cox
2002-03-17  4:12               ` Chris Wedgwood
2002-03-17  4:31                 ` Alan Cox
2002-03-16 20:14         ` Linus Torvalds
2002-03-16 20:22           ` Andi Kleen
2002-03-19  4:34             ` Rusty Russell
2002-03-16 20:36           ` Richard Gooch
2002-03-16 20:38             ` Linus Torvalds
2002-03-16 20:51               ` Richard Gooch
2002-03-17 13:23           ` Rik van Riel
2002-03-17 18:16             ` Linus Torvalds
2002-03-17 23:01               ` Davide Libenzi
2002-03-18  0:53                 ` Rik van Riel
2002-03-18  1:13                   ` Davide Libenzi
2002-03-18  1:31                     ` Linus Torvalds
2002-03-18  1:56                       ` Davide Libenzi
2002-03-18  1:40                     ` Mike Fedyk
2002-03-18  1:48                       ` Davide Libenzi
2002-03-24 21:12           ` Rogier Wolff
2002-03-24 21:35             ` Andrew Morton
2002-03-24 22:54               ` Nick Craig-Wood
2002-03-24 23:41                 ` Andi Kleen
2002-03-25  6:40               ` Martin J. Bligh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=13360000.1016130109@w-hlinder.des \
    --to=hannal@us.ibm.com \
    --cc=Martin.Bligh@us.ibm.com \
    --cc=anton@samba.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lse-tech@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox