All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Emelyanov <xemul@parallels.com>
To: Hugh Dickins <hughd@google.com>, Nick Piggin <npiggin@kernel.dk>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Rik van Riel <riel@redhat.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Alexa
Cc: linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: [PATCH 4/13] vfs: Factor out tree (of four) shrinkers code
Date: Tue, 03 May 2011 16:16:55 +0400	[thread overview]
Message-ID: <4DBFF237.8070408@parallels.com> (raw)
In-Reply-To: <4DBFF1AD.90303@parallels.com>

There are 4 dcache shrinkers in dcache.c now.

* prune_dcache one, which shrinks the dentries from the LRU list;
* shrink_dcache_parent, which tries to remove unused dentries from a
subtree starting from a given point;
* shrink_dcache_sb, which wants to acheive the same result as the
previous one, but starting from the given sb root;
* shink_dcache_for_umount, which also kills the dcache for a super
block, but it believes that all the subtree is unreachable and unused
and thus uses lighter locking and more strict checks.

That said, 3 last shrinkers do the same and deserve being factored out.

The srhink_dcache_parent uses the same tree walk as I propose to
shrink the dentry subtree from leaves to the roots, so things do not
get worse after this change.

The shrink_dcache_for_umount_subtree does the same walk as I propose,
but it will take the rename_lock. This is OK, since the rename_lock
is now per-sb and is taken on a dead super block and thus isn't
contented.

The shink_dcache_sb is O(n) before and after the change and seems OK
as well.

The walker code I use was copy-n-paste-d from have_submounts routine.
Yes, it's worth merging them (and the dcache_genocide ;) ).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>

---
 fs/dcache.c |  365 +++++++++++++++++++++--------------------------------------
 1 files changed, 130 insertions(+), 235 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 976a917..67d6590 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -262,19 +262,6 @@ static void dentry_lru_del(struct dentry *dentry)
 	}
 }
 
-static void dentry_lru_move_tail(struct dentry *dentry)
-{
-	spin_lock(&dcache_lru_lock);
-	if (list_empty(&dentry->d_lru)) {
-		list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-		dentry->d_sb->s_nr_dentry_unused++;
-		dentry_stat.nr_unused++;
-	} else {
-		list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-	}
-	spin_unlock(&dcache_lru_lock);
-}
-
 /**
  * d_kill - kill dentry and return parent
  * @dentry: dentry to kill
@@ -649,6 +636,132 @@ restart:
 }
 EXPORT_SYMBOL(d_prune_aliases);
 
+static void isolate_shrinkable(struct dentry *dentry, struct list_head *list)
+{
+	if (!dentry->d_count) {
+		if (!IS_ROOT(dentry)) {
+			__d_drop(dentry);
+			list_del_init(&dentry->d_u.d_child);
+			dentry->d_parent->d_count--;
+			dentry->d_parent = dentry;
+		}
+
+		spin_lock(&dcache_lru_lock);
+		list_move_tail(&dentry->d_lru, list);
+		spin_unlock(&dcache_lru_lock);
+	} else
+		dentry_lru_del(dentry);
+}
+
+static void collect_shrinkable(struct dentry *root, struct list_head *list)
+{
+	struct dentry *this_parent;
+	struct list_head *next;
+	unsigned seq;
+	int locked = 0;
+
+	seq = read_seqbegin(&root->d_sb->s_rename_lock);
+again:
+	this_parent = root;
+	spin_lock(&this_parent->d_lock);
+repeat:
+	next = this_parent->d_subdirs.next;
+resume:
+	while (next != &this_parent->d_subdirs) {
+		struct list_head *tmp = next;
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+		next = tmp->next;
+
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		/*
+		 * Descend a level if the d_subdirs list is non-empty.
+		 */
+		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+			goto repeat;
+		}
+
+		isolate_shrinkable(dentry, list);
+
+		spin_unlock(&dentry->d_lock);
+	}
+	/*
+	 * All done at this level ... ascend and resume the search.
+	 */
+	if (this_parent != root) {
+		struct dentry *tmp;
+		struct dentry *child;
+
+		tmp = this_parent->d_parent;
+		rcu_read_lock();
+		spin_unlock(&this_parent->d_lock);
+		child = this_parent;
+		this_parent = tmp;
+		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+			(!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))) {
+			spin_unlock(&this_parent->d_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
+
+		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+		isolate_shrinkable(child, list);
+		spin_unlock(&child->d_lock);
+
+		goto resume;
+	}
+
+	spin_unlock(&this_parent->d_lock);
+	if (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))
+		goto rename_retry;
+	if (locked)
+		read_sequnlock(&root->d_sb->s_rename_lock);
+	return;
+
+rename_retry:
+	locked = 1;
+	read_seqlock(&root->d_sb->s_rename_lock);
+	goto again;
+}
+
+static void shrink_dcache_subtree(struct dentry *root, int with_root)
+{
+	LIST_HEAD(tmp);
+	struct dentry *de;
+
+	collect_shrinkable(root, &tmp);
+
+	if (with_root) {
+		if (root->d_count) {
+			printk("Root count is %d, kids:\n", root->d_count);
+			list_for_each_entry(de, &root->d_subdirs, d_u.d_child)
+				printk("  %s [%p, %d, %p, %s]\n",
+						de->d_name.name, de, de->d_count,
+						de->d_parent, de->d_parent->d_name.name);
+			BUG();
+		}
+
+		isolate_shrinkable(root, &tmp);
+	}
+
+	while (!list_empty(&tmp)) {
+		de = list_first_entry(&tmp, struct dentry, d_lru);
+		do {
+			spin_lock(&de->d_lock);
+			BUG_ON(de->d_count);
+			de = dentry_kill(de, 0);
+		} while (de);
+	}
+}
+
 /*
  * Try to throw away a dentry - free the inode, dput the parent.
  * Requires dentry->d_lock is held, and dentry->d_count == 0.
@@ -865,16 +978,7 @@ static void prune_dcache(int count)
  */
 void shrink_dcache_sb(struct super_block *sb)
 {
-	LIST_HEAD(tmp);
-
-	spin_lock(&dcache_lru_lock);
-	while (!list_empty(&sb->s_dentry_lru)) {
-		list_splice_init(&sb->s_dentry_lru, &tmp);
-		spin_unlock(&dcache_lru_lock);
-		shrink_dentry_list(&tmp);
-		spin_lock(&dcache_lru_lock);
-	}
-	spin_unlock(&dcache_lru_lock);
+	shrink_dcache_subtree(sb->s_root, 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
@@ -883,100 +987,6 @@ EXPORT_SYMBOL(shrink_dcache_sb);
  * - see the comments on shrink_dcache_for_umount() for a description of the
  *   locking
  */
-static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
-{
-	struct dentry *parent;
-	unsigned detached = 0;
-
-	BUG_ON(!IS_ROOT(dentry));
-
-	/* detach this root from the system */
-	spin_lock(&dentry->d_lock);
-	dentry_lru_del(dentry);
-	__d_drop(dentry);
-	spin_unlock(&dentry->d_lock);
-
-	for (;;) {
-		/* descend to the first leaf in the current subtree */
-		while (!list_empty(&dentry->d_subdirs)) {
-			struct dentry *loop;
-
-			/* this is a branch with children - detach all of them
-			 * from the system in one go */
-			spin_lock(&dentry->d_lock);
-			list_for_each_entry(loop, &dentry->d_subdirs,
-					    d_u.d_child) {
-				spin_lock_nested(&loop->d_lock,
-						DENTRY_D_LOCK_NESTED);
-				dentry_lru_del(loop);
-				__d_drop(loop);
-				spin_unlock(&loop->d_lock);
-			}
-			spin_unlock(&dentry->d_lock);
-
-			/* move to the first child */
-			dentry = list_entry(dentry->d_subdirs.next,
-					    struct dentry, d_u.d_child);
-		}
-
-		/* consume the dentries from this leaf up through its parents
-		 * until we find one with children or run out altogether */
-		do {
-			struct inode *inode;
-
-			if (dentry->d_count != 0) {
-				printk(KERN_ERR
-				       "BUG: Dentry %p{i=%lx,n=%s}"
-				       " still in use (%d)"
-				       " [unmount of %s %s]\n",
-				       dentry,
-				       dentry->d_inode ?
-				       dentry->d_inode->i_ino : 0UL,
-				       dentry->d_name.name,
-				       dentry->d_count,
-				       dentry->d_sb->s_type->name,
-				       dentry->d_sb->s_id);
-				BUG();
-			}
-
-			if (IS_ROOT(dentry)) {
-				parent = NULL;
-				list_del(&dentry->d_u.d_child);
-			} else {
-				parent = dentry->d_parent;
-				spin_lock(&parent->d_lock);
-				parent->d_count--;
-				list_del(&dentry->d_u.d_child);
-				spin_unlock(&parent->d_lock);
-			}
-
-			detached++;
-
-			inode = dentry->d_inode;
-			if (inode) {
-				dentry->d_inode = NULL;
-				list_del_init(&dentry->d_alias);
-				if (dentry->d_op && dentry->d_op->d_iput)
-					dentry->d_op->d_iput(dentry, inode);
-				else
-					iput(inode);
-			}
-
-			d_free(dentry);
-
-			/* finished when we fall off the top of the tree,
-			 * otherwise we ascend to the parent and move to the
-			 * next sibling if there is one */
-			if (!parent)
-				return;
-			dentry = parent;
-		} while (list_empty(&dentry->d_subdirs));
-
-		dentry = list_entry(dentry->d_subdirs.next,
-				    struct dentry, d_u.d_child);
-	}
-}
-
 /*
  * destroy the dentries attached to a superblock on unmounting
  * - we don't need to use dentry->d_lock because:
@@ -999,11 +1009,11 @@ void shrink_dcache_for_umount(struct super_block *sb)
 	spin_lock(&dentry->d_lock);
 	dentry->d_count--;
 	spin_unlock(&dentry->d_lock);
-	shrink_dcache_for_umount_subtree(dentry);
+	shrink_dcache_subtree(dentry, 1);
 
 	while (!hlist_bl_empty(&sb->s_anon)) {
 		dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
-		shrink_dcache_for_umount_subtree(dentry);
+		shrink_dcache_subtree(dentry, 1);
 	}
 }
 
@@ -1103,117 +1113,6 @@ rename_retry:
 }
 EXPORT_SYMBOL(have_submounts);
 
-/*
- * Search the dentry child list for the specified parent,
- * and move any unused dentries to the end of the unused
- * list for prune_dcache(). We descend to the next level
- * whenever the d_subdirs list is non-empty and continue
- * searching.
- *
- * It returns zero iff there are no unused children,
- * otherwise  it returns the number of children moved to
- * the end of the unused list. This may not be the total
- * number of unused children, because select_parent can
- * drop the lock and return early due to latency
- * constraints.
- */
-static int select_parent(struct dentry * parent)
-{
-	struct dentry *this_parent;
-	struct list_head *next;
-	unsigned seq;
-	int found = 0;
-	int locked = 0;
-
-	seq = read_seqbegin(&parent->d_sb->s_rename_lock);
-again:
-	this_parent = parent;
-	spin_lock(&this_parent->d_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
-		next = tmp->next;
-
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-
-		/* 
-		 * move only zero ref count dentries to the end 
-		 * of the unused list for prune_dcache
-		 */
-		if (!dentry->d_count) {
-			dentry_lru_move_tail(dentry);
-			found++;
-		} else {
-			dentry_lru_del(dentry);
-		}
-
-		/*
-		 * We can return to the caller if we have found some (this
-		 * ensures forward progress). We'll be coming back to find
-		 * the rest.
-		 */
-		if (found && need_resched()) {
-			spin_unlock(&dentry->d_lock);
-			goto out;
-		}
-
-		/*
-		 * Descend a level if the d_subdirs list is non-empty.
-		 */
-		if (!list_empty(&dentry->d_subdirs)) {
-			spin_unlock(&this_parent->d_lock);
-			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
-			this_parent = dentry;
-			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
-			goto repeat;
-		}
-
-		spin_unlock(&dentry->d_lock);
-	}
-	/*
-	 * All done at this level ... ascend and resume the search.
-	 */
-	if (this_parent != parent) {
-		struct dentry *tmp;
-		struct dentry *child;
-
-		tmp = this_parent->d_parent;
-		rcu_read_lock();
-		spin_unlock(&this_parent->d_lock);
-		child = this_parent;
-		this_parent = tmp;
-		spin_lock(&this_parent->d_lock);
-		/* might go back up the wrong parent if we have had a rename
-		 * or deletion */
-		if (this_parent != child->d_parent ||
-			(!locked && read_seqretry(&parent->d_sb->s_rename_lock, seq))) {
-			spin_unlock(&this_parent->d_lock);
-			rcu_read_unlock();
-			goto rename_retry;
-		}
-		rcu_read_unlock();
-		next = child->d_u.d_child.next;
-		goto resume;
-	}
-out:
-	spin_unlock(&this_parent->d_lock);
-	if (!locked && read_seqretry(&parent->d_sb->s_rename_lock, seq))
-		goto rename_retry;
-	if (locked)
-		read_sequnlock(&parent->d_sb->s_rename_lock);
-	return found;
-
-rename_retry:
-	if (found)
-		return found;
-	locked = 1;
-	read_seqlock(&parent->d_sb->s_rename_lock);
-	goto again;
-}
-
 /**
  * shrink_dcache_parent - prune dcache
  * @parent: parent of entries to prune
@@ -1223,11 +1122,7 @@ rename_retry:
  
 void shrink_dcache_parent(struct dentry * parent)
 {
-	struct super_block *sb = parent->d_sb;
-	int found;
-
-	while ((found = select_parent(parent)) != 0)
-		__shrink_dcache_sb(sb, &found, 0);
+	shrink_dcache_subtree(parent, 0);
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
 
-- 
1.5.5.6

  parent reply	other threads:[~2011-05-03 12:16 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-03 12:14 [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 1/13] vfs: Lighten r/o rename_lock lockers Pavel Emelyanov
2011-05-03 12:15 ` [PATCH 2/13] vfs: Factor out rename_lock locking Pavel Emelyanov
2011-05-03 12:16 ` [PATCH 3/13] vfs: Make the rename_lock per-sb Pavel Emelyanov
2011-05-03 12:16 ` Pavel Emelyanov [this message]
2011-05-03 12:17 ` [PATCH 5/13] vfs: Make dentry LRU list global Pavel Emelyanov
2011-05-03 12:17 ` [PATCH 6/13] vfs: Turn the nr_dentry into percpu_counter Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 7/13] vfs: Limit the number of dentries globally Pavel Emelyanov
2011-05-03 12:18 ` [PATCH 8/13] vfs: Introduce the dentry mobs Pavel Emelyanov
2011-06-18 13:40   ` Andrea Arcangeli
2011-05-03 12:18 ` [PATCH 9/13] vfs: More than one mob management Pavel Emelyanov
2011-05-03 12:19 ` [PATCH 10/13] vfs: Routnes for setting mob size and getting stats Pavel Emelyanov
2011-05-03 12:19 ` [PATCH 11/13] vfs: Make shrink_dcache_memory prune dcache from all mobs Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 12/13] vfs: Mobs creation and mgmt API Pavel Emelyanov
2011-05-03 12:20 ` [PATCH 13/13] vfs: Dentry mobs listing in proc Pavel Emelyanov
2011-05-06  1:05 ` [RFC][PATCH 0/13] Per-container dcache management (and a bit more) Dave Chinner
2011-05-06 12:15   ` Pavel Emelyanov
2011-05-07  0:01     ` Dave Chinner
2011-05-10 11:18       ` Pavel Emelyanov
2011-06-18 13:30       ` Andrea Arcangeli
2011-06-20  0:49         ` Dave Chinner
2011-07-04  5:32           ` Pavel Emelyanov
2011-05-23  6:43 ` Pavel Emelyanov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4DBFF237.8070408@parallels.com \
    --to=xemul@parallels.com \
    --cc=aarcange@redhat.com \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hughd@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=npiggin@kernel.dk \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.