All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hanna Linder <hannal@us.ibm.com>
To: Anton Blanchard <anton@samba.org>,
	"Martin J. Bligh" <Martin.Bligh@us.ibm.com>
Cc: lse-tech@lists.sourceforge.net, linux-kernel@vger.kernel.org,
	hannal@us.ibm.com
Subject: Re: [Lse-tech] Re: 10.31 second kernel compile
Date: Thu, 14 Mar 2002 10:21:49 -0800	[thread overview]
Message-ID: <13360000.1016130109@w-hlinder.des> (raw)
In-Reply-To: <20020314112725.GA2008@krispykreme>
In-Reply-To: <20020313085217.GA11658@krispykreme> <460695164.1016001894@[10.10.2.3]> <20020314112725.GA2008@krispykreme>


--On Thursday, March 14, 2002 22:27:26 +1100 Anton Blanchard <anton@samba.org> wrote:

>> >    554 .d_lookup                               
>> 
>> Did you try the dcache patches?
> 
> Not for this, I did do some benchmarking of the RCU dcache patches a
> while ago which I should post.
> 

	There are two dcache patches. The one I wrote based on Al Viro's
	suggestion for fast path walking is especially good for NUMA
	systems hit by cache bouncing (d_lookup is the main culprit in 
	the dcache). Martin had some initial results that looked very
	good. 

	Following is the clean 2.5.6 version which is also available at:
	http://sf.net/projects/lse Under the Read Copy Update Section 
	
	Hanna Linder (hannal@us.ibm.com)
	IBM Linux Technology Center

-----------
diff -Nru -X dontdiff linux-2.5.6/fs/dcache.c linux-2.5.6-fw/fs/dcache.c
--- linux-2.5.6/fs/dcache.c	Thu Mar  7 18:18:13 2002
+++ linux-2.5.6-fw/fs/dcache.c	Fri Mar  8 13:50:43 2002
@@ -704,13 +704,22 @@
  
 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
 {
+	struct dentry * dentry;
+	spin_lock(&dcache_lock);
+	dentry = __d_lookup(parent,name);
+	spin_unlock(&dcache_lock);
+	return dentry;
+}
+
+struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)  
+{
+
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
 	struct list_head *head = d_hash(parent,hash);
 	struct list_head *tmp;
 
-	spin_lock(&dcache_lock);
 	tmp = head->next;
 	for (;;) {
 		struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -732,10 +741,8 @@
 		}
 		__dget_locked(dentry);
 		dentry->d_vfs_flags |= DCACHE_REFERENCED;
-		spin_unlock(&dcache_lock);
 		return dentry;
 	}
-	spin_unlock(&dcache_lock);
 	return NULL;
 }
 
diff -Nru -X dontdiff linux-2.5.6/fs/namei.c linux-2.5.6-fw/fs/namei.c
--- linux-2.5.6/fs/namei.c	Thu Mar  7 18:18:24 2002
+++ linux-2.5.6-fw/fs/namei.c	Fri Mar  8 13:56:25 2002
@@ -268,8 +268,41 @@
 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 {
 	struct dentry * dentry = d_lookup(parent, name);
+	
+	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+			dput(dentry);
+			dentry = NULL;
+		}
+	}
+	return dentry;
+}
 
+/*for fastwalking*/
+static inline void undo_locked(struct nameidata *nd)
+{
+	if(nd->flags & LOOKUP_LOCKED){
+		dget(nd->dentry);
+		mntget(nd->mnt);
+		spin_unlock(&dcache_lock);
+		nd->flags &= ~LOOKUP_LOCKED;
+	}
+}
+
+/*
+ * For fast path lookup while holding the dcache_lock. 
+ * SMP-safe
+ */
+static struct dentry * cached_lookup_nd(struct nameidata * nd, struct qstr * name, int flags)
+{
+	struct dentry * dentry = NULL;
+	if(!(nd->flags & LOOKUP_LOCKED))
+		return cached_lookup(nd->dentry, name, flags);
+	
+	dentry = __d_lookup(nd->dentry, name);
+	
 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+		undo_locked(nd);
 		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 			dput(dentry);
 			dentry = NULL;
@@ -279,6 +312,34 @@
 }
 
 /*
+ * Short-cut version of permission(), for calling by
+ * path_walk(), when dcache lock is held.  Combines parts
+ * of permission() and vfs_permission(), and tests ONLY for
+ * MAY_EXEC permission.
+ *
+ * If appropriate, check DAC only.  If not appropriate, or
+ * short-cut DAC fails, then call permission() to do more
+ * complete permission check.
+ */
+static inline int exec_permission_lite(struct inode *inode)
+{
+	umode_t	mode = inode->i_mode;
+
+	if ((inode->i_op && inode->i_op->permission))
+		return -EACCES;
+
+	if (current->fsuid == inode->i_uid)
+		mode >>= 6;
+	else if (in_group_p(inode->i_gid))
+		mode >>= 3;
+
+	if (mode & MAY_EXEC)
+		return 0;
+
+	return -EACCES;
+}
+
+/*
  * This is called when everything else fails, and we actually have
  * to go to the low-level filesystem to find out what we should do..
  *
@@ -472,7 +533,9 @@
 		struct qstr this;
 		unsigned int c;
 
-		err = permission(inode, MAY_EXEC);
+		err = exec_permission_lite(inode);
+		if(err)
+			err = permission(inode, MAY_EXEC);
 		dentry = ERR_PTR(err);
  		if (err)
 			break;
@@ -507,6 +570,7 @@
 			case 2:	
 				if (this.name[1] != '.')
 					break;
+				undo_locked(nd);
 				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
@@ -523,16 +587,20 @@
 				break;
 		}
 		/* This does the actual lookups.. */
-		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+		dentry = cached_lookup_nd(nd, &this, LOOKUP_CONTINUE);
 		if (!dentry) {
+			undo_locked(nd);
 			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 			err = PTR_ERR(dentry);
 			if (IS_ERR(dentry))
 				break;
 		}
 		/* Check mountpoints.. */
-		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-			;
+		if(d_mountpoint(dentry)){
+			undo_locked(nd);
+			while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+				;
+		}
 
 		err = -ENOENT;
 		inode = dentry->d_inode;
@@ -543,6 +611,7 @@
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
+			undo_locked(nd);
 			err = do_follow_link(dentry, nd);
 			dput(dentry);
 			if (err)
@@ -555,7 +624,8 @@
 			if (!inode->i_op)
 				break;
 		} else {
-			dput(nd->dentry);
+			if (!(nd->flags & LOOKUP_LOCKED))
+				dput(nd->dentry);
 			nd->dentry = dentry;
 		}
 		err = -ENOTDIR; 
@@ -575,6 +645,7 @@
 			case 2:	
 				if (this.name[1] != '.')
 					break;
+				undo_locked(nd);
 				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
@@ -586,7 +657,8 @@
 			if (err < 0)
 				break;
 		}
-		dentry = cached_lookup(nd->dentry, &this, 0);
+		dentry = cached_lookup_nd(nd, &this, 0);
+		undo_locked(nd); 
 		if (!dentry) {
 			dentry = real_lookup(nd->dentry, &this, 0);
 			err = PTR_ERR(dentry);
@@ -626,11 +698,14 @@
 		else if (this.len == 2 && this.name[1] == '.')
 			nd->last_type = LAST_DOTDOT;
 return_base:
+		undo_locked(nd);
 		return 0;
 out_dput:
+		undo_locked(nd);
 		dput(dentry);
 		break;
 	}
+	undo_locked(nd);
 	path_release(nd);
 return_err:
 	return err;
@@ -734,6 +809,36 @@
 	nd->dentry = dget(current->fs->pwd);
 	read_unlock(&current->fs->lock);
 	return 1;
+}
+
+int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
+{
+	nd->last_type = LAST_ROOT; /* if there are only slashes... */
+	nd->flags = flags;
+	if (*name=='/'){
+		read_lock(&current->fs->lock);
+		if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+			nd->mnt = mntget(current->fs->altrootmnt);
+			nd->dentry = dget(current->fs->altroot);
+			read_unlock(&current->fs->lock);
+			if (__emul_lookup_dentry(name,nd))
+				return 0;
+			read_lock(&current->fs->lock);
+		}
+		spin_lock(&dcache_lock); /*to avoid cacheline bouncing with d_count*/
+		nd->mnt = current->fs->rootmnt;
+		nd->dentry = current->fs->root;
+		read_unlock(&current->fs->lock);
+	}
+	else{
+		read_lock(&current->fs->lock);
+		spin_lock(&dcache_lock);
+		nd->mnt = current->fs->pwdmnt;
+		nd->dentry = current->fs->pwd;
+		read_unlock(&current->fs->lock);
+	}
+	nd->flags |= LOOKUP_LOCKED;
+	return (path_walk(name, nd));
 }
 
 /*
diff -Nru -X dontdiff linux-2.5.6/include/linux/dcache.h linux-2.5.6-fw/include/linux/dcache.h
--- linux-2.5.6/include/linux/dcache.h	Thu Mar  7 18:18:30 2002
+++ linux-2.5.6-fw/include/linux/dcache.h	Fri Mar  8 13:50:43 2002
@@ -220,6 +220,7 @@
 
 /* appendix may either be NULL or be used for transname suffixes */
 extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
 
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);
diff -Nru -X dontdiff linux-2.5.6/include/linux/fs.h linux-2.5.6-fw/include/linux/fs.h
--- linux-2.5.6/include/linux/fs.h	Thu Mar  7 18:18:19 2002
+++ linux-2.5.6-fw/include/linux/fs.h	Fri Mar  8 13:50:43 2002
@@ -1273,12 +1273,15 @@
  *  - require a directory
  *  - ending slashes ok even for nonexistent files
  *  - internal "there are more path compnents" flag
+ *  - locked when lookup done with dcache_lock held
  */
 #define LOOKUP_FOLLOW		(1)
 #define LOOKUP_DIRECTORY	(2)
 #define LOOKUP_CONTINUE		(4)
 #define LOOKUP_PARENT		(16)
 #define LOOKUP_NOALT		(32)
+#define LOOKUP_LOCKED		(64)
+
 /*
  * Type of the last component on LOOKUP_PARENT
  */
@@ -1309,13 +1312,7 @@
 extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-static inline int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
-{
-	int error = 0;
-	if (path_init(path, flags, nd))
-		error = path_walk(path, nd);
-	return error;
-}
+extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern int follow_down(struct vfsmount **, struct dentry **);
 extern int follow_up(struct vfsmount **, struct dentry **);


  parent reply	other threads:[~2002-03-14 18:21 UTC|newest]

Thread overview: 137+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-03-13  8:52 10.31 second kernel compile Anton Blanchard
2002-03-13 14:44 ` Martin J. Bligh
2002-03-13 21:44   ` [Lse-tech] " Dave Hansen
2002-03-14  1:07     ` Keith Owens
2002-03-14 11:27   ` Anton Blanchard
2002-03-14 13:16     ` [Lse-tech] " Dipankar Sarma
2002-03-17 13:12       ` some RCU dcache and ratcache results Anton Blanchard
2002-03-14 13:21     ` [Lse-tech] Re: 10.31 second kernel compile Momchil Velikov
2002-03-14 18:33       ` Daniel Phillips
2002-03-15 12:16         ` Chris Wedgwood
2002-03-16  5:12           ` Anton Blanchard
2002-03-15 18:20         ` Linus Torvalds
2002-03-16 11:55           ` Paul Mackerras
2002-03-16 17:25             ` Rik van Riel
2002-03-16 17:57             ` yodaiken
2002-03-16 18:06             ` Linus Torvalds
2002-03-16 18:35               ` yodaiken
2002-03-16 18:45                 ` Linus Torvalds
2002-03-16 18:57                   ` yodaiken
2002-03-16 19:16                     ` Linus Torvalds
2002-03-16 19:43                       ` David Mosberger
2002-03-16 19:58                         ` Linus Torvalds
2002-03-16 20:08                           ` yodaiken
2002-03-16 20:23                             ` Linus Torvalds
2002-03-16 20:36                           ` David Mosberger
2002-03-16 20:46                             ` Linus Torvalds
2002-03-17  1:09                               ` Paul Mackerras
2002-03-17  2:08                                 ` Linus Torvalds
2002-03-16 19:53                       ` yodaiken
2002-03-16 20:02                         ` Linus Torvalds
2002-03-16 20:25                           ` yodaiken
2002-03-27  1:07                       ` Richard Henderson
2002-03-16 20:53               ` Alan Cox
2002-03-18  3:07             ` David S. Miller
2002-03-16 15:24           ` Daniel Phillips
2002-03-16 19:01             ` Linus Torvalds
2002-03-16 22:25               ` Daniel Phillips
2002-03-19 16:35                 ` Bill Davidsen
2002-03-14 19:05       ` Linus Torvalds
2002-03-19 16:40         ` Bill Davidsen
2002-03-14 18:21     ` Hanna Linder [this message]
2002-03-16  5:27       ` Anton Blanchard
2002-03-15  7:12   ` Chris Wedgwood
2002-03-16  6:15 ` 7.52 " Anton Blanchard
2002-03-16  6:42   ` [Lse-tech] " Gerrit Huizenga
2002-03-17 12:34     ` Anton Blanchard
2002-03-17 22:09       ` Theodore Tso
2002-03-18  7:04         ` Jeff Garzik
2002-03-19 18:28           ` Theodore Tso
2002-03-16  8:05   ` Linus Torvalds
2002-03-16 11:04     ` Paul Mackerras
2002-03-16 18:32       ` Linus Torvalds
2002-03-17  2:00         ` Paul Mackerras
2002-03-17  2:40           ` Linus Torvalds
2002-03-17  2:50             ` M. Edward Borasky
2002-03-18 15:08               ` 0.73 " snpe
2002-03-18 19:42           ` 7.52 " Cort Dougan
2002-03-18 20:04             ` Linus Torvalds
2002-03-18 20:23               ` Linus Torvalds
2002-03-18 21:50                 ` Rene Herman
2002-03-18 22:36                 ` Cort Dougan
2002-03-18 22:47                   ` Linus Torvalds
2002-03-18 22:56                     ` Cort Dougan
2002-03-18 23:52                     ` Paul Mackerras
2002-03-19  0:57                       ` Dave Jones
2002-03-19  3:35                         ` Jeff Garzik
2002-03-19  0:22                     ` David S. Miller
2002-03-19  0:27                       ` Cort Dougan
2002-03-19  0:27                         ` David S. Miller
2002-03-19  0:36                           ` Cort Dougan
2002-03-19  0:38                             ` David S. Miller
2002-03-19  1:28                               ` Davide Libenzi
2002-03-19  2:42                 ` Paul Mackerras
2002-03-27  2:53                 ` Richard Henderson
2002-04-02  4:32                   ` Linus Torvalds
2002-04-02 10:50                 ` Pablo Alcaraz
2002-03-18 21:34               ` Cort Dougan
2002-03-18 22:00                 ` Linus Torvalds
2002-03-18 19:37       ` Cort Dougan
2002-03-16 11:54     ` yodaiken
2002-03-16 17:37   ` [Lse-tech] " Martin J. Bligh
2002-03-16 18:57     ` Daniel Egger
2002-03-17  8:18       ` Mike Galbraith
2002-03-17 15:29         ` Martin J. Bligh
2002-03-17  1:45     ` Keith Owens
2002-03-17 13:54     ` David Woodhouse
2002-03-19 16:49     ` Bill Davidsen
     [not found] <20020316113536.A19495@hq.fsmlabs.com.suse.lists.linux.kernel>
     [not found] ` <Pine.LNX.4.33.0203161037160.31913-100000@penguin.transmeta.com.suse.lists.linux.kernel>
     [not found]   ` <20020316115726.B19495@hq.fsmlabs.com.suse.lists.linux.kernel>
2002-03-16 19:32     ` [Lse-tech] Re: 10.31 " Andi Kleen
2002-03-16 19:57       ` yodaiken
2002-03-16 20:05         ` Andi Kleen
2002-03-16 20:12           ` yodaiken
2002-03-16 20:27             ` Richard Gooch
2002-03-16 20:47               ` yodaiken
2002-03-16 21:05                 ` Richard Gooch
2002-03-16 23:34                   ` yodaiken
2002-03-17 13:48                   ` Rik van Riel
2002-03-16 20:34             ` Linus Torvalds
2002-03-16 21:39               ` yodaiken
2002-03-16 21:49                 ` Linus Torvalds
2002-03-17 14:38                   ` Kai Henningsen
2002-03-17 18:20                     ` Alan Cox
2002-03-16 22:00                 ` Alan Cox
2002-03-16 21:49                   ` Linus Torvalds
2002-03-16 23:10                   ` yodaiken
2002-03-17  1:17                     ` rddunlap
2002-03-17  3:34                     ` Alan Cox
2002-03-17 14:52                     ` Kai Henningsen
2002-03-17 21:00                       ` yodaiken
2002-03-19 12:06                 ` Pavel Machek
2002-03-19 21:12                   ` yodaiken
2002-03-19 22:09                     ` Chris Friesen
2002-03-19 22:15                       ` yodaiken
2002-03-20  4:25                     ` Bill Davidsen
2002-03-17  2:50           ` Chris Wedgwood
2002-03-17  3:43             ` Alan Cox
2002-03-17  4:12               ` Chris Wedgwood
2002-03-17  4:31                 ` Alan Cox
2002-03-16 20:14         ` Linus Torvalds
2002-03-16 20:22           ` Andi Kleen
2002-03-19  4:34             ` Rusty Russell
2002-03-16 20:36           ` Richard Gooch
2002-03-16 20:38             ` Linus Torvalds
2002-03-16 20:51               ` Richard Gooch
2002-03-17 13:23           ` Rik van Riel
2002-03-17 18:16             ` Linus Torvalds
2002-03-17 23:01               ` Davide Libenzi
2002-03-18  0:53                 ` Rik van Riel
2002-03-18  1:13                   ` Davide Libenzi
2002-03-18  1:31                     ` Linus Torvalds
2002-03-18  1:56                       ` Davide Libenzi
2002-03-18  1:40                     ` Mike Fedyk
2002-03-18  1:48                       ` Davide Libenzi
2002-03-24 21:12           ` Rogier Wolff
2002-03-24 21:35             ` Andrew Morton
2002-03-24 22:54               ` Nick Craig-Wood
2002-03-24 23:41                 ` Andi Kleen
2002-03-25  6:40               ` Martin J. Bligh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=13360000.1016130109@w-hlinder.des \
    --to=hannal@us.ibm.com \
    --cc=Martin.Bligh@us.ibm.com \
    --cc=anton@samba.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lse-tech@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.