linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pavel Emelyanov <xemul@parallels.com>
To: Hugh Dickins <hughd@google.com>, Nick Piggin <npiggin@kernel.dk>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Rik van Riel <riel@redhat.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Alexa
Cc: linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: [PATCH 3/13] vfs: Make the rename_lock per-sb
Date: Tue, 03 May 2011 16:16:21 +0400	[thread overview]
Message-ID: <4DBFF215.6070009@parallels.com> (raw)
In-Reply-To: <4DBFF1AD.90303@parallels.com>

There are two things I try to get with this patch.

The first one -- renames on different sb-s will not depend on each other.
The second one -- I plan to factor out the dcache shrinking code (see
further patches). In order to make the srink_dcache_for_umount work OK
on a dying SB it's better to have this lock non-global.

There's some trickery in the prepend_path -- this one has to relock
the rename lock as it ascends the vfsmount tree. This relock seems OK,
as the dentry path of each mount renames solid, and even if we "race"
with the rename on some of the super blocks, the overall path we get
will be either the one we had before rename or them one we had after
one, i.e. -- some existing one.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>

---
 drivers/staging/pohmelfs/path_entry.c |    4 +-
 fs/autofs4/waitq.c                    |    6 ++--
 fs/dcache.c                           |   64 ++++++++++++++++----------------
 fs/nfs/namespace.c                    |    6 ++--
 fs/super.c                            |    1 +
 include/linux/dcache.h                |    2 -
 include/linux/fs.h                    |    1 +
 kernel/auditsc.c                      |    4 +-
 8 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index 400a9fc..d3de52e 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -99,7 +99,7 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
 rename_retry:
 	len = 1; /* Root slash */
 	d = first;
-	seq = read_seqbegin(&rename_lock);
+	seq = read_seqbegin(&root->d_sb->s_rename_lock);
 	rcu_read_lock();
 
 	if (!IS_ROOT(d) && d_unhashed(d))
@@ -110,7 +110,7 @@ rename_retry:
 		d = d->d_parent;
 	}
 	rcu_read_unlock();
-	if (read_seqretry(&rename_lock, seq))
+	if (read_seqretry(&root->d_sb->s_rename_lock, seq))
 		goto rename_retry;
 
 	dput(root);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 5601005..920db0b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -195,7 +195,7 @@ rename_retry:
 	buf = *name;
 	len = 0;
 
-	seq = read_seqbegin(&rename_lock);
+	seq = read_seqbegin(&sbi->sb->s_rename_lock);
 	rcu_read_lock();
 	spin_lock(&autofs4_lock);
 	for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
@@ -204,7 +204,7 @@ rename_retry:
 	if (!len || --len > NAME_MAX) {
 		spin_unlock(&autofs4_lock);
 		rcu_read_unlock();
-		if (read_seqretry(&rename_lock, seq))
+		if (read_seqretry(&sbi->sb->s_rename_lock, seq))
 			goto rename_retry;
 		return 0;
 	}
@@ -220,7 +220,7 @@ rename_retry:
 	}
 	spin_unlock(&autofs4_lock);
 	rcu_read_unlock();
-	if (read_seqretry(&rename_lock, seq))
+	if (read_seqretry(&sbi->sb->s_rename_lock, seq))
 		goto rename_retry;
 
 	return len;
diff --git a/fs/dcache.c b/fs/dcache.c
index 485fc4a..976a917 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -79,10 +79,6 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
-
-EXPORT_SYMBOL(rename_lock);
-
 static struct kmem_cache *dentry_cache __read_mostly;
 
 /*
@@ -1031,7 +1027,7 @@ int have_submounts(struct dentry *parent)
 	unsigned seq;
 	int locked = 0;
 
-	seq = read_seqbegin(&rename_lock);
+	seq = read_seqbegin(&parent->d_sb->s_rename_lock);
 again:
 	this_parent = parent;
 
@@ -1078,7 +1074,7 @@ resume:
 		/* might go back up the wrong parent if we have had a rename
 		 * or deletion */
 		if (this_parent != child->d_parent ||
-			 (!locked && read_seqretry(&rename_lock, seq))) {
+			 (!locked && read_seqretry(&parent->d_sb->s_rename_lock, seq))) {
 			spin_unlock(&this_parent->d_lock);
 			rcu_read_unlock();
 			goto rename_retry;
@@ -1088,21 +1084,21 @@ resume:
 		goto resume;
 	}
 	spin_unlock(&this_parent->d_lock);
-	if (!locked && read_seqretry(&rename_lock, seq))
+	if (!locked && read_seqretry(&parent->d_sb->s_rename_lock, seq))
 		goto rename_retry;
 	if (locked)
-		read_sequnlock(&rename_lock);
+		read_sequnlock(&parent->d_sb->s_rename_lock);
 	return 0; /* No mount points found in tree */
 positive:
-	if (!locked && read_seqretry(&rename_lock, seq))
+	if (!locked && read_seqretry(&parent->d_sb->s_rename_lock, seq))
 		goto rename_retry;
 	if (locked)
-		read_sequnlock(&rename_lock);
+		read_sequnlock(&parent->d_sb->s_rename_lock);
 	return 1;
 
 rename_retry:
 	locked = 1;
-	read_seqlock(&rename_lock);
+	read_seqlock(&parent->d_sb->s_rename_lock);
 	goto again;
 }
 EXPORT_SYMBOL(have_submounts);
@@ -1129,7 +1125,7 @@ static int select_parent(struct dentry * parent)
 	int found = 0;
 	int locked = 0;
 
-	seq = read_seqbegin(&rename_lock);
+	seq = read_seqbegin(&parent->d_sb->s_rename_lock);
 again:
 	this_parent = parent;
 	spin_lock(&this_parent->d_lock);
@@ -1193,7 +1189,7 @@ resume:
 		/* might go back up the wrong parent if we have had a rename
 		 * or deletion */
 		if (this_parent != child->d_parent ||
-			(!locked && read_seqretry(&rename_lock, seq))) {
+			(!locked && read_seqretry(&parent->d_sb->s_rename_lock, seq))) {
 			spin_unlock(&this_parent->d_lock);
 			rcu_read_unlock();
 			goto rename_retry;
@@ -1204,17 +1200,17 @@ resume:
 	}
 out:
 	spin_unlock(&this_parent->d_lock);
-	if (!locked && read_seqretry(&rename_lock, seq))
+	if (!locked && read_seqretry(&parent->d_sb->s_rename_lock, seq))
 		goto rename_retry;
 	if (locked)
-		read_sequnlock(&rename_lock);
+		read_sequnlock(&parent->d_sb->s_rename_lock);
 	return found;
 
 rename_retry:
 	if (found)
 		return found;
 	locked = 1;
-	read_seqlock(&rename_lock);
+	read_seqlock(&parent->d_sb->s_rename_lock);
 	goto again;
 }
 
@@ -1871,11 +1867,11 @@ struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
 	unsigned seq;
 
         do {
-                seq = read_seqbegin(&rename_lock);
+                seq = read_seqbegin(&parent->d_sb->s_rename_lock);
                 dentry = __d_lookup(parent, name);
                 if (dentry)
 			break;
-	} while (read_seqretry(&rename_lock, seq));
+	} while (read_seqretry(&parent->d_sb->s_rename_lock, seq));
 	return dentry;
 }
 EXPORT_SYMBOL(d_lookup);
@@ -2237,8 +2233,9 @@ void d_move(struct dentry * dentry, struct dentry * target)
 
 	BUG_ON(d_ancestor(dentry, target));
 	BUG_ON(d_ancestor(target, dentry));
+	BUG_ON(dentry->d_sb != target->d_sb);
 
-	write_seqlock(&rename_lock);
+	write_seqlock(&dentry->d_sb->s_rename_lock);
 
 	dentry_lock_for_move(dentry, target);
 
@@ -2285,7 +2282,7 @@ void d_move(struct dentry * dentry, struct dentry * target)
 	spin_unlock(&target->d_lock);
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
-	write_sequnlock(&rename_lock);
+	write_sequnlock(&dentry->d_sb->s_rename_lock);
 }
 EXPORT_SYMBOL(d_move);
 
@@ -2499,8 +2496,8 @@ static int prepend_path(const struct path *path, struct path *root,
 	bool slash = false;
 	int error = 0;
 
-	read_seqlock(&rename_lock);
 	br_read_lock(vfsmount_lock);
+	read_seqlock(&vfsmnt->mnt_sb->s_rename_lock);
 	while (dentry != root->dentry || vfsmnt != root->mnt) {
 		struct dentry * parent;
 
@@ -2510,7 +2507,9 @@ static int prepend_path(const struct path *path, struct path *root,
 				goto global_root;
 			}
 			dentry = vfsmnt->mnt_mountpoint;
+			read_sequnlock(&vfsmnt->mnt_sb->s_rename_lock);
 			vfsmnt = vfsmnt->mnt_parent;
+			read_seqlock(&vfsmnt->mnt_sb->s_rename_lock);
 			continue;
 		}
 		parent = dentry->d_parent;
@@ -2531,8 +2530,8 @@ out:
 	if (!error && !slash)
 		error = prepend(buffer, buflen, "/", 1);
 
+	read_sequnlock(&vfsmnt->mnt_sb->s_rename_lock);
 	br_read_unlock(vfsmount_lock);
-	read_sequnlock(&rename_lock);
 	return error;
 
 global_root:
@@ -2703,6 +2702,7 @@ static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
 	char *end = buf + buflen;
 	char *retval;
+	struct super_block *sb = dentry->d_sb;
 
 	prepend(&end, &buflen, "\0", 1);
 	if (buflen < 1)
@@ -2711,7 +2711,7 @@ static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 	retval = end-1;
 	*retval = '/';
 
-	read_seqlock(&rename_lock);
+	read_seqlock(&sb->s_rename_lock);
 	while (!IS_ROOT(dentry)) {
 		struct dentry *parent = dentry->d_parent;
 		int error;
@@ -2726,10 +2726,10 @@ static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 		retval = end;
 		dentry = parent;
 	}
-	read_sequnlock(&rename_lock);
+	read_sequnlock(&sb->s_rename_lock);
 	return retval;
 Elong:
-	read_sequnlock(&rename_lock);
+	read_sequnlock(&sb->s_rename_lock);
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
@@ -2848,7 +2848,7 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 
 	do {
 		/* for restarting inner loop in case of seq retry */
-		seq = read_seqbegin(&rename_lock);
+		seq = read_seqbegin(&old_dentry->d_sb->s_rename_lock);
 		/*
 		 * Need rcu_readlock to protect against the d_parent trashing
 		 * due to d_move
@@ -2859,7 +2859,7 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 		else
 			result = 0;
 		rcu_read_unlock();
-	} while (read_seqretry(&rename_lock, seq));
+	} while (read_seqretry(&old_dentry->d_sb->s_rename_lock, seq));
 
 	return result;
 }
@@ -2896,7 +2896,7 @@ void d_genocide(struct dentry *root)
 	unsigned seq;
 	int locked = 0;
 
-	seq = read_seqbegin(&rename_lock);
+	seq = read_seqbegin(&root->d_sb->s_rename_lock);
 again:
 	this_parent = root;
 	spin_lock(&this_parent->d_lock);
@@ -2943,7 +2943,7 @@ resume:
 		/* might go back up the wrong parent if we have had a rename
 		 * or deletion */
 		if (this_parent != child->d_parent ||
-			 (!locked && read_seqretry(&rename_lock, seq))) {
+			 (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))) {
 			spin_unlock(&this_parent->d_lock);
 			rcu_read_unlock();
 			goto rename_retry;
@@ -2953,15 +2953,15 @@ resume:
 		goto resume;
 	}
 	spin_unlock(&this_parent->d_lock);
-	if (!locked && read_seqretry(&rename_lock, seq))
+	if (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))
 		goto rename_retry;
 	if (locked)
-		read_sequnlock(&rename_lock);
+		read_sequnlock(&root->d_sb->s_rename_lock);
 	return;
 
 rename_retry:
 	locked = 1;
-	read_seqlock(&rename_lock);
+	read_seqlock(&root->d_sb->s_rename_lock);
 	goto again;
 }
 
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f32b860..bca6732 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -58,7 +58,7 @@ rename_retry:
 	*--end = '\0';
 	buflen--;
 
-	seq = read_seqbegin(&rename_lock);
+	seq = read_seqbegin(&droot->d_sb->s_rename_lock);
 	rcu_read_lock();
 	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
@@ -71,7 +71,7 @@ rename_retry:
 		dentry = dentry->d_parent;
 	}
 	rcu_read_unlock();
-	if (read_seqretry(&rename_lock, seq))
+	if (read_seqretry(&droot->d_sb->s_rename_lock, seq))
 		goto rename_retry;
 	if (*end != '/') {
 		if (--buflen < 0)
@@ -90,7 +90,7 @@ rename_retry:
 	return end;
 Elong_unlock:
 	rcu_read_unlock();
-	if (read_seqretry(&rename_lock, seq))
+	if (read_seqretry(&droot->d_sb->s_rename_lock, seq))
 		goto rename_retry;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/super.c b/fs/super.c
index 7e9dd4c..0293e6e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -103,6 +103,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		s->s_count = 1;
 		atomic_set(&s->s_active, 1);
 		mutex_init(&s->s_vfs_rename_mutex);
+		seqlock_init(&s->s_rename_lock);
 		lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 		mutex_init(&s->s_dquot.dqio_mutex);
 		mutex_init(&s->s_dquot.dqonoff_mutex);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f958c19..8834f58 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -219,8 +219,6 @@ struct dentry_operations {
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
-extern seqlock_t rename_lock;
-
 static inline int dname_external(struct dentry *dentry)
 {
 	return dentry->d_name.name != dentry->d_iname;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e38b50a..63b4b7a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1414,6 +1414,7 @@ struct super_block {
 	 * even looking at it. You had been warned.
 	 */
 	struct mutex s_vfs_rename_mutex;	/* Kludge */
+	seqlock_t s_rename_lock;
 
 	/*
 	 * Filesystem subtype.  If non-empty the filesystem type field
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f49a031..a5e26bf 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1772,7 +1772,7 @@ retry:
 	drop = NULL;
 	d = dentry;
 	rcu_read_lock();
-	seq = read_seqbegin(&rename_lock);
+	seq = read_seqbegin(&dentry->d_sb->s_rename_lock);
 	for(;;) {
 		struct inode *inode = d->d_inode;
 		if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) {
@@ -1790,7 +1790,7 @@ retry:
 			break;
 		d = parent;
 	}
-	if (unlikely(read_seqretry(&rename_lock, seq) || drop)) {  /* in this order */
+	if (unlikely(read_seqretry(&dentry->d_sb->s_rename_lock, seq) || drop)) {  /* in this order */
 		rcu_read_unlock();
 		if (!drop) {
 			/* just a race with rename */
-- 
1.5.5.6

  parent reply	other threads:[~2011-05-03 12:16 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-03 12:14 [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 1/13] vfs: Lighten r/o rename_lock lockers Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 2/13] vfs: Factor out rename_lock locking Pavel Emelyanov
2011-05-03 12:16 ` Pavel Emelyanov [this message]
2011-05-03 12:16 ` [PATCH 4/13] vfs: Factor out tree (of four) shrinkers code Pavel Emelyanov
2011-05-03 12:17 ` [PATCH 5/13] vfs: Make dentry LRU list global Pavel Emelyanov
2011-05-03 12:17 ` [PATCH 6/13] vfs: Turn the nr_dentry into percpu_counter Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 7/13] vfs: Limit the number of dentries globally Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 8/13] vfs: Introduce the dentry mobs Pavel Emelyanov
2011-06-18 13:40   ` Andrea Arcangeli
2011-05-03 12:18 ` [PATCH 9/13] vfs: More than one mob management Pavel Emelyanov
2011-05-03 12:19 ` [PATCH 10/13] vfs: Routnes for setting mob size and getting stats Pavel Emelyanov
2011-05-03 12:19 ` [PATCH 11/13] vfs: Make shrink_dcache_memory prune dcache from all mobs Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 12/13] vfs: Mobs creation and mgmt API Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 13/13] vfs: Dentry mobs listing in proc Pavel Emelyanov
2011-05-06  1:05 ` [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Dave Chinner
2011-05-06 12:15   ` Pavel Emelyanov
2011-05-07  0:01     ` Dave Chinner
2011-05-10 11:18       ` Pavel Emelyanov
2011-06-18 13:30       ` Andrea Arcangeli
2011-06-20  0:49         ` Dave Chinner
2011-07-04  5:32           ` Pavel Emelyanov
2011-05-23  6:43 ` Pavel Emelyanov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4DBFF215.6070009@parallels.com \
    --to=xemul@parallels.com \
    --cc=aarcange@redhat.com \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hughd@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=npiggin@kernel.dk \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).