public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC] dcache scalability patch (2.4.17)
@ 2002-07-12 14:09 Maneesh Soni
  2002-07-12 14:13 ` [Lse-tech] " Christoph Hellwig
  2002-07-12 14:29 ` Alexander Viro
  0 siblings, 2 replies; 18+ messages in thread
From: Maneesh Soni @ 2002-07-12 14:09 UTC (permalink / raw)
  To: LKML; +Cc: lse-tech, Al Viro

Here is the dcache scalability patch (cleaned up) as disscussed in 
the previous post to lkml by Dipankar. The patch uses RCU for doing fast
dcache lookup. It also does lazy updates to lru list of dentries to
avoid doing write operations while doing lookup.

Following changes were done in this version

o Removed the d_next_hash hack. Dentries are unhashed using list_del 
  instead of list_del_init, and that too using d_drop interface  
o Used d_drop_locked and d_unhashed instead of directly manipulating hash list 
  using list macros 
o Changed DCACHE_UNLINKED to DCACHE_UNHASHED 

Regards,
Maneesh

diff -urN linux-2.4.17-base/fs/autofs4/root.c linux-2.4.17-dc8/fs/autofs4/root.c
--- linux-2.4.17-base/fs/autofs4/root.c	Tue Oct 24 10:27:38 2000
+++ linux-2.4.17-dc8/fs/autofs4/root.c	Fri Jul 12 10:59:38 2002
@@ -403,7 +403,7 @@
 		spin_unlock(&dcache_lock);
 		return -ENOTEMPTY;
 	}
-	list_del_init(&dentry->d_hash);
+	d_drop_locked(dentry);
 	spin_unlock(&dcache_lock);
 
 	dput(ino->dentry);
diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
--- linux-2.4.17-base/fs/dcache.c	Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/dcache.c	Fri Jul 12 16:18:39 2002
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 
 #include <asm/uaccess.h>
+#include <linux/rcupdate.h>
 
 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
@@ -55,14 +56,21 @@
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};
 
+static void d_callback(void *arg)
+{
+	struct dentry * dentry = (struct dentry *)arg;
+
+	if (dname_external(dentry)) 
+		kfree((void *) dentry->d_name.name);
+	kmem_cache_free(dentry_cache, dentry); 
+}
+
 /* no dcache_lock, please */
 static inline void d_free(struct dentry *dentry)
 {
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
-	if (dname_external(dentry)) 
-		kfree(dentry->d_name.name);
-	kmem_cache_free(dentry_cache, dentry); 
+	call_rcu(&dentry->d_rcu, d_callback, dentry);
 	dentry_stat.nr_dentry--;
 }
 
@@ -124,9 +132,13 @@
 	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
 		return;
 
-	/* dput on a free dentry? */
-	if (!list_empty(&dentry->d_lru))
-		BUG();
+	spin_lock(&dentry->d_lock);
+        if (atomic_read(&dentry->d_count)) {
+                spin_unlock(&dentry->d_lock);
+                spin_unlock(&dcache_lock);
+                return;
+        }
+
 	/*
 	 * AV: ->d_delete() is _NOT_ allowed to block now.
 	 */
@@ -135,18 +147,28 @@
 			goto unhash_it;
 	}
 	/* Unreachable? Get rid of it */
-	if (list_empty(&dentry->d_hash))
+	if (d_unhashed(dentry))
 		goto kill_it;
-	list_add(&dentry->d_lru, &dentry_unused);
-	dentry_stat.nr_unused++;
+
+	if (list_empty(&dentry->d_lru)) {
+		dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
+		list_add(&dentry->d_lru, &dentry_unused);
+		dentry_stat.nr_unused++;
+	}
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	return;
 
 unhash_it:
-	list_del_init(&dentry->d_hash);
+	__d_drop(dentry);
 
 kill_it: {
 		struct dentry *parent;
+		spin_unlock(&dentry->d_lock);
+		if (!list_empty(&dentry->d_lru)) {
+			list_del(&dentry->d_lru);
+			dentry_stat.nr_unused--;
+		}
 		list_del(&dentry->d_child);
 		/* drops the lock, at that point nobody can reach this dentry */
 		dentry_iput(dentry);
@@ -177,7 +199,7 @@
 	 * If it's already been dropped, return OK.
 	 */
 	spin_lock(&dcache_lock);
-	if (list_empty(&dentry->d_hash)) {
+	if (d_unhashed(dentry)) {
 		spin_unlock(&dcache_lock);
 		return 0;
 	}
@@ -201,15 +223,18 @@
 	 * we might still populate it if it was a
 	 * working directory or similar).
 	 */
+	spin_lock(&dentry->d_lock);
 	if (atomic_read(&dentry->d_count) > 1) {
 		if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			return -EBUSY;
 		}
 	}
-
-	list_del_init(&dentry->d_hash);
+	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
+
 	return 0;
 }
 
@@ -217,11 +242,14 @@
 
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
+	spin_lock(&dentry->d_lock);
 	atomic_inc(&dentry->d_count);
+	dentry->d_vfs_flags |= DCACHE_REFERENCED;
 	if (atomic_read(&dentry->d_count) == 1) {
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
 	}
+	spin_unlock(&dentry->d_lock);
 	return dentry;
 }
 
@@ -252,8 +280,8 @@
 		tmp = next;
 		next = tmp->next;
 		alias = list_entry(tmp, struct dentry, d_alias);
-		if (!list_empty(&alias->d_hash)) {
-			__dget_locked(alias);
+		if (!d_unhashed(alias)) {
+			dget(alias);
 			spin_unlock(&dcache_lock);
 			return alias;
 		}
@@ -263,7 +291,7 @@
 }
 
 /*
- *	Try to kill dentries associated with this inode.
+ *     Try to kill dentries associated with this inode.
  * WARNING: you must own a reference to inode.
  */
 void d_prune_aliases(struct inode *inode)
@@ -274,13 +302,16 @@
 	tmp = head;
 	while ((tmp = tmp->next) != head) {
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+		spin_lock(&dentry->d_lock);
 		if (!atomic_read(&dentry->d_count)) {
-			__dget_locked(dentry);
+			__dget(dentry);
+			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
-			d_drop(dentry);
 			dput(dentry);
 			goto restart;
 		}
+		spin_unlock(&dentry->d_lock);
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -295,7 +326,8 @@
 {
 	struct dentry * parent;
 
-	list_del_init(&dentry->d_hash);
+	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
 	list_del(&dentry->d_child);
 	dentry_iput(dentry);
 	parent = dentry->d_parent;
@@ -330,19 +362,20 @@
 		if (tmp == &dentry_unused)
 			break;
 		list_del_init(tmp);
+		dentry_stat.nr_unused--;
 		dentry = list_entry(tmp, struct dentry, d_lru);
 
+		spin_lock(&dentry->d_lock);
 		/* If the dentry was recently referenced, don't free it. */
 		if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
 			dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
-			list_add(&dentry->d_lru, &dentry_unused);
+			if (!atomic_read(&dentry->d_count)) {
+				list_add(&dentry->d_lru, &dentry_unused);
+				dentry_stat.nr_unused++;
+			}
+			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		dentry_stat.nr_unused--;
-
-		/* Unused dentry with a count? */
-		if (atomic_read(&dentry->d_count))
-			BUG();
 
 		prune_one_dentry(dentry);
 		if (!--count)
@@ -405,10 +438,13 @@
 		dentry = list_entry(tmp, struct dentry, d_lru);
 		if (dentry->d_sb != sb)
 			continue;
-		if (atomic_read(&dentry->d_count))
-			continue;
+		spin_lock(&dentry->d_lock);
 		dentry_stat.nr_unused--;
 		list_del_init(tmp);
+		if (atomic_read(&dentry->d_count)) {
+			spin_unlock(&dentry->d_lock);
+			continue;
+		}
 		prune_one_dentry(dentry);
 		goto repeat;
 	}
@@ -488,11 +524,13 @@
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 		next = tmp->next;
+		list_del_init(&dentry->d_lru);
+		spin_lock(&dentry->d_lock);
 		if (!atomic_read(&dentry->d_count)) {
-			list_del(&dentry->d_lru);
 			list_add(&dentry->d_lru, dentry_unused.prev);
 			found++;
 		}
+		spin_unlock(&dentry->d_lock);
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -606,8 +644,9 @@
 	str[name->len] = 0;
 
 	atomic_set(&dentry->d_count, 1);
-	dentry->d_vfs_flags = 0;
+	dentry->d_vfs_flags = DCACHE_UNHASHED;
 	dentry->d_flags = 0;
+	dentry->d_lock = SPIN_LOCK_UNLOCKED;
 	dentry->d_inode = NULL;
 	dentry->d_parent = NULL;
 	dentry->d_sb = NULL;
@@ -708,8 +747,9 @@
 	const unsigned char *str = name->name;
 	struct list_head *head = d_hash(parent,hash);
 	struct list_head *tmp;
+	struct dentry * found = NULL;
 
-	spin_lock(&dcache_lock);
+	/* rcu_read_lock(); for pre-emptible kernel */
 	tmp = head->next;
 	for (;;) {
 		struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -729,13 +769,16 @@
 			if (memcmp(dentry->d_name.name, str, len))
 				continue;
 		}
-		__dget_locked(dentry);
-		dentry->d_vfs_flags |= DCACHE_REFERENCED;
-		spin_unlock(&dcache_lock);
-		return dentry;
+		spin_lock(&dentry->d_lock);
+		if (!(dentry->d_vfs_flags & DCACHE_UNHASHED)) {
+			found = __dget(dentry);
+		}
+		spin_unlock(&dentry->d_lock);
+		/* rcu_read_unlock(); for pre-emptible kernel */
+		return found;
 	}
-	spin_unlock(&dcache_lock);
-	return NULL;
+	/* rcu_read_unlock(); for pre-emptible kernel */
+	return found;
 }
 
 /**
@@ -774,7 +817,7 @@
 	lhp = base = d_hash(dparent, dentry->d_name.hash);
 	while ((lhp = lhp->next) != base) {
 		if (dentry == list_entry(lhp, struct dentry, d_hash)) {
-			__dget_locked(dentry);
+			dget(dentry);
 			spin_unlock(&dcache_lock);
 			return 1;
 		}
@@ -834,9 +877,12 @@
 void d_rehash(struct dentry * entry)
 {
 	struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-	if (!list_empty(&entry->d_hash)) BUG();
 	spin_lock(&dcache_lock);
+	spin_lock(&entry->d_lock);
+	if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG();
 	list_add(&entry->d_hash, list);
+	entry->d_vfs_flags &= ~DCACHE_UNHASHED;
+	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
@@ -909,7 +955,7 @@
 	list_add(&dentry->d_hash, &target->d_hash);
 
 	/* Unhash the target: dput() will then get rid of it */
-	list_del_init(&target->d_hash);
+	d_drop_locked(target);
 
 	list_del(&dentry->d_child);
 	list_del(&target->d_child);
@@ -951,7 +997,7 @@
 
 	*--end = '\0';
 	buflen--;
-	if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
+	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
 		buflen -= 10;
 		end -= 10;
 		memcpy(end, " (deleted)", 10);
@@ -1034,7 +1080,7 @@
 	error = -ENOENT;
 	/* Has the current directory has been unlinked? */
 	spin_lock(&dcache_lock);
-	if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
+	if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
 		unsigned long len;
 		char * cwd;
 
diff -urN linux-2.4.17-base/fs/intermezzo/journal.c linux-2.4.17-dc8/fs/intermezzo/journal.c
--- linux-2.4.17-base/fs/intermezzo/journal.c	Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/intermezzo/journal.c	Mon Jul  8 16:18:43 2002
@@ -186,7 +186,7 @@
 
         *--end = '\0';
         buflen--;
-        if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) {
+        if (dentry->d_parent != dentry && d_unhashed(dentry)) {
                 buflen -= 10;
                 end -= 10;
                 memcpy(end, " (deleted)", 10);
diff -urN linux-2.4.17-base/fs/nfsd/nfsfh.c linux-2.4.17-dc8/fs/nfsd/nfsfh.c
--- linux-2.4.17-base/fs/nfsd/nfsfh.c	Thu Oct  4 11:29:22 2001
+++ linux-2.4.17-dc8/fs/nfsd/nfsfh.c	Mon Jul  8 16:18:43 2002
@@ -348,7 +348,7 @@
 	spin_lock(&dcache_lock);
 	list_for_each(lp, &child->d_inode->i_dentry) {
 		struct dentry *tmp = list_entry(lp,struct dentry, d_alias);
-		if (!list_empty(&tmp->d_hash) &&
+		if (!d_unhashed(tmp) &&
 		    tmp->d_parent == parent) {
 			child = dget_locked(tmp);
 			spin_unlock(&dcache_lock);
diff -urN linux-2.4.17-base/fs/readdir.c linux-2.4.17-dc8/fs/readdir.c
--- linux-2.4.17-base/fs/readdir.c	Mon Aug 13 03:29:08 2001
+++ linux-2.4.17-dc8/fs/readdir.c	Mon Jul  8 16:18:43 2002
@@ -79,7 +79,7 @@
 			while(1) {
 				struct dentry *de = list_entry(list, struct dentry, d_child);
 
-				if (!list_empty(&de->d_hash) && de->d_inode) {
+  				if (!d_unhashed(de) && de->d_inode) {
 					spin_unlock(&dcache_lock);
 					if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
 						break;
diff -urN linux-2.4.17-base/include/linux/dcache.h linux-2.4.17-dc8/include/linux/dcache.h
--- linux-2.4.17-base/include/linux/dcache.h	Fri Nov 23 01:16:18 2001
+++ linux-2.4.17-dc8/include/linux/dcache.h	Fri Jul 12 10:59:09 2002
@@ -5,6 +5,8 @@
 
 #include <asm/atomic.h>
 #include <linux/mount.h>
+#include <linux/rcupdate.h>
+#include <asm/system.h>
 
 /*
  * linux/include/linux/dcache.h
@@ -65,11 +67,13 @@
 
 struct dentry {
 	atomic_t d_count;
+	spinlock_t d_lock;		/* lock for d_count & d_vfs_flags */
+	unsigned long d_vfs_flags;	/* moved here to be on same cacheline */
 	unsigned int d_flags;
 	struct inode  * d_inode;	/* Where the name belongs to - NULL is negative */
 	struct dentry * d_parent;	/* parent directory */
 	struct list_head d_hash;	/* lookup hash list */
-	struct list_head d_lru;		/* d_count = 0 LRU list */
+	struct list_head d_lru;		/* LRU list */
 	struct list_head d_child;	/* child of parent list */
 	struct list_head d_subdirs;	/* our children */
 	struct list_head d_alias;	/* inode alias list */
@@ -78,8 +82,8 @@
 	unsigned long d_time;		/* used by d_revalidate */
 	struct dentry_operations  *d_op;
 	struct super_block * d_sb;	/* The root of the dentry tree */
-	unsigned long d_vfs_flags;
 	void * d_fsdata;		/* fs-specific data */
+	struct rcu_head d_rcu;
 	unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
 };
 
@@ -123,10 +127,23 @@
 					 * s_nfsd_free_path semaphore will be down
 					 */
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
+#define DCACHE_UNHASHED		0x0010	
 
 extern spinlock_t dcache_lock;
 
 /**
+ *	d_unhashed -	is dentry hashed
+ *	@dentry: entry to check
+ *
+ *	Returns true if the dentry passed is not currently hashed.
+ */
+ 
+static __inline__ int d_unhashed(struct dentry *dentry)
+{
+	return (dentry->d_vfs_flags & DCACHE_UNHASHED);
+}
+
+/**
  * d_drop - drop a dentry
  * @dentry: dentry to drop
  *
@@ -143,14 +160,32 @@
  * timeouts or autofs deletes).
  */
 
+static __inline__ void __d_drop(struct dentry * dentry)
+{
+	if (!d_unhashed(dentry)) {
+		dentry->d_vfs_flags |= DCACHE_UNHASHED;
+		wmb();
+		list_del(&dentry->d_hash);
+		wmb();
+	}
+}
+
 static __inline__ void d_drop(struct dentry * dentry)
 {
 	spin_lock(&dcache_lock);
-	list_del(&dentry->d_hash);
-	INIT_LIST_HEAD(&dentry->d_hash);
+	spin_lock(&dentry->d_lock);
+	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
+static __inline__ void d_drop_locked(struct dentry * dentry)
+{
+	spin_lock(&dentry->d_lock);
+	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
+}
+
 static __inline__ int dname_external(struct dentry *d)
 {
 	return d->d_name.name != d->d_iname; 
@@ -243,27 +278,25 @@
 static __inline__ struct dentry * dget(struct dentry *dentry)
 {
 	if (dentry) {
-		if (!atomic_read(&dentry->d_count))
-			BUG();
+		spin_lock(&dentry->d_lock);
 		atomic_inc(&dentry->d_count);
+		dentry->d_vfs_flags |= DCACHE_REFERENCED;
+		spin_unlock(&dentry->d_lock);
 	}
 	return dentry;
 }
 
-extern struct dentry * dget_locked(struct dentry *);
-
-/**
- *	d_unhashed -	is dentry hashed
- *	@dentry: entry to check
- *
- *	Returns true if the dentry passed is not currently hashed.
- */
- 
-static __inline__ int d_unhashed(struct dentry *dentry)
+static __inline__ struct dentry * __dget(struct dentry *dentry)
 {
-	return list_empty(&dentry->d_hash);
+	if (dentry) {
+		atomic_inc(&dentry->d_count);
+		dentry->d_vfs_flags |= DCACHE_REFERENCED;
+	}
+	return dentry;
 }
 
+extern struct dentry * dget_locked(struct dentry *);
+
 extern void dput(struct dentry *);
 
 static __inline__ int d_mountpoint(struct dentry *dentry)

-- 
Maneesh Soni
IBM Linux Technology Center, 
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: maneesh@in.ibm.com
http://lse.sourceforge.net/locking/rcupdate.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2002-07-15  8:17 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-07-12 14:09 [RFC] dcache scalability patch (2.4.17) Maneesh Soni
2002-07-12 14:13 ` [Lse-tech] " Christoph Hellwig
2002-07-12 16:20   ` Dipankar Sarma
2002-07-15  8:10   ` Maneesh Soni
2002-07-15  8:25     ` Maneesh Soni
2002-07-12 14:29 ` Alexander Viro
2002-07-12 16:10   ` [Lse-tech] " Dipankar Sarma
2002-07-12 18:02     ` Dipankar Sarma
2002-07-12 18:10       ` Hanna Linder
2002-07-12 17:35   ` Paul Menage
2002-07-13  8:52     ` Alexander Viro
2002-07-13 17:25       ` Paul Menage
2002-07-13 17:33         ` Alexander Viro
2002-07-13 17:51           ` Paul Menage
2002-07-13 17:54           ` Alexander Viro
2002-07-15  0:10       ` Linus Torvalds
2002-07-12 22:50   ` Hanna Linder
2002-07-15  7:49   ` Maneesh Soni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox