All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vasily Averin <vvs@sw.ru>
To: David Howells <dhowells@redhat.com>
Cc: Neil Brown <neilb@suse.de>, Jan Blunck <jblunck@suse.de>,
	Olaf Hering <olh@suse.de>, Balbir Singh <balbir@in.ibm.com>,
	Kirill Korotaev <dev@openvz.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	devel@openvz.org, Andrew Morton <akpm@osdl.org>
Subject: [PATCH 2.6.19-rc3] VFS: per-sb dentry lru list
Date: Fri, 27 Oct 2006 18:05:50 +0400	[thread overview]
Message-ID: <4542123E.4030309@sw.ru> (raw)
In-Reply-To: <24249.1161951081@redhat.com>

From:	Vasily Averin <vvs@sw.ru>

Virtuozzo/OpenVZ linux kernel team has discovered that umount/remount can last
for hours looping in shrink_dcache_sb() without much successes. Since during
shrinking s_umount semaphore is taken lots of other unrelated operations like
sync can stop working until shrink finished.

It happens due to very long unused dentries list (>1 million of dentries),
so that it takes shrink_dcache_sb() longer then 1/HZ seconds to get to the
required dentry and after freeing this single dentry it reschedules and restarts.

The proposed fix prevents this issue by using per-sb dentry LRU list. It
provides very quickly search for the unused dentries for given super block thus
forcing shrinking always making good progress.

It was well tested on 2.6.9-based Virtuozzo/OpenVZ kernels, but the port to the
latest mainstream kernel is not tested.

Signed-off-by:	Kirill Korotaev <dev@openvz.org>
Signed-off-by:	Vasily Averin <vvs@sw.ru>
	
--- linux-2.6.19-rc3/fs/dcache.c.shrsb	2006-10-27 10:45:11.000000000 +0400
+++ linux-2.6.19-rc3/fs/dcache.c	2006-10-27 15:49:54.000000000 +0400
@@ -173,6 +173,7 @@ repeat:
   	if (list_empty(&dentry->d_lru)) {
   		dentry->d_flags |= DCACHE_REFERENCED;
   		list_add(&dentry->d_lru, &dentry_unused);
+		list_add(&dentry->d_sb_lru, &dentry->d_sb->s_dentry_unused);
   		dentry_stat.nr_unused++;
   	}
  	spin_unlock(&dentry->d_lock);
@@ -190,6 +191,7 @@ kill_it: {
 		 */
   		if (!list_empty(&dentry->d_lru)) {
   			list_del(&dentry->d_lru);
+			list_del(&dentry->d_sb_lru);
   			dentry_stat.nr_unused--;
   		}
   		list_del(&dentry->d_u.d_child);
@@ -270,6 +272,7 @@ static inline struct dentry * __dget_loc
 	if (!list_empty(&dentry->d_lru)) {
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
+		list_del_init(&dentry->d_sb_lru);
 	}
 	return dentry;
 }
@@ -398,6 +401,10 @@ static void prune_one_dentry(struct dent

 static void prune_dcache(int count, struct super_block *sb)
 {
+	struct list_head *lru_head;
+
+	lru_head = sb ? &sb->s_dentry_unused : &dentry_unused;
+
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
 		struct dentry *dentry;
@@ -406,25 +413,16 @@ static void prune_dcache(int count, stru

 		cond_resched_lock(&dcache_lock);

-		tmp = dentry_unused.prev;
-		if (sb) {
-			/* Try to find a dentry for this sb, but don't try
-			 * too hard, if they aren't near the tail they will
-			 * be moved down again soon
-			 */
-			int skip = count;
-			while (skip && tmp != &dentry_unused &&
-			    list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
-				skip--;
-				tmp = tmp->prev;
-			}
-		}
-		if (tmp == &dentry_unused)
+		tmp = lru_head->prev;
+		if (tmp == lru_head)
 			break;
-		list_del_init(tmp);
-		prefetch(dentry_unused.prev);
+
+		prefetch(lru_head->prev);
  		dentry_stat.nr_unused--;
-		dentry = list_entry(tmp, struct dentry, d_lru);
+		dentry = sb ? list_entry(tmp, struct dentry, d_sb_lru) :
+				list_entry(tmp, struct dentry, d_lru);
+		list_del_init(&dentry->d_lru);
+		list_del_init(&dentry->d_sb_lru);

  		spin_lock(&dentry->d_lock);
 		/*
@@ -440,6 +438,8 @@ static void prune_dcache(int count, stru
 		if (dentry->d_flags & DCACHE_REFERENCED) {
 			dentry->d_flags &= ~DCACHE_REFERENCED;
  			list_add(&dentry->d_lru, &dentry_unused);
+			list_add(&dentry->d_sb_lru,
+					&dentry->d_sb->s_dentry_unused);
  			dentry_stat.nr_unused++;
  			spin_unlock(&dentry->d_lock);
 			continue;
@@ -455,7 +455,8 @@ static void prune_dcache(int count, stru
 		 * If this dentry is for "my" filesystem, then I can prune it
 		 * without taking the s_umount lock (I already hold it).
 		 */
-		if (sb && dentry->d_sb == sb) {
+		if (sb) {
+			BUG_ON(dentry->d_sb != sb);
 			prune_one_dentry(dentry);
 			continue;
 		}
@@ -480,6 +481,8 @@ static void prune_dcache(int count, stru
 		spin_unlock(&dentry->d_lock);
 		/* Inserting dentry to tail of the list leads to cycle */
  		list_add(&dentry->d_lru, &dentry_unused);
+		list_add(&dentry->d_sb_lru,
+				&dentry->d_sb->s_dentry_unused);
 		dentry_stat.nr_unused++;
 	}
 	spin_unlock(&dcache_lock);
@@ -509,31 +512,15 @@ static void prune_dcache(int count, stru

 void shrink_dcache_sb(struct super_block * sb)
 {
-	struct list_head *tmp, *next;
-	struct dentry *dentry;
-
-	/*
-	 * Pass one ... move the dentries for the specified
-	 * superblock to the most recent end of the unused list.
-	 */
 	spin_lock(&dcache_lock);
-	list_for_each_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		list_move(tmp, &dentry_unused);
-	}
+	while (!list_empty(&sb->s_dentry_unused)) {
+		struct dentry *dentry;

-	/*
-	 * Pass two ... free the dentries for this superblock.
-	 */
-repeat:
-	list_for_each_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
+		dentry = list_entry((&sb->s_dentry_unused)->next,
+					struct dentry, d_sb_lru);
 		dentry_stat.nr_unused--;
-		list_del_init(tmp);
+		list_del_init(&dentry->d_lru);
+		list_del_init(&dentry->d_sb_lru);
 		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count)) {
 			spin_unlock(&dentry->d_lock);
@@ -541,7 +528,6 @@ repeat:
 		}
 		prune_one_dentry(dentry);
 		cond_resched_lock(&dcache_lock);
-		goto repeat;
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -563,6 +549,7 @@ static void shrink_dcache_for_umount_sub
 	if (!list_empty(&dentry->d_lru)) {
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
+		list_del_init(&dentry->d_sb_lru);
 	}
 	__d_drop(dentry);
 	spin_unlock(&dcache_lock);
@@ -580,6 +567,7 @@ static void shrink_dcache_for_umount_sub
 				if (!list_empty(&loop->d_lru)) {
 					dentry_stat.nr_unused--;
 					list_del_init(&loop->d_lru);
+					list_del_init(&loop->d_sb_lru);
 				}

 				__d_drop(loop);
@@ -766,6 +754,7 @@ resume:
 		if (!list_empty(&dentry->d_lru)) {
 			dentry_stat.nr_unused--;
 			list_del_init(&dentry->d_lru);
+			list_del_init(&dentry->d_sb_lru);
 		}
 		/*
 		 * move only zero ref count dentries to the end
@@ -773,6 +762,8 @@ resume:
 		 */
 		if (!atomic_read(&dentry->d_count)) {
 			list_add_tail(&dentry->d_lru, &dentry_unused);
+			list_add_tail(&dentry->d_sb_lru,
+					&dentry->d_sb->s_dentry_unused);
 			dentry_stat.nr_unused++;
 			found++;
 		}
@@ -892,6 +883,7 @@ struct dentry *d_alloc(struct dentry * p
 #endif
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
+	INIT_LIST_HEAD(&dentry->d_sb_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
 	INIT_LIST_HEAD(&dentry->d_alias);

--- linux-2.6.19-rc3/fs/super.c.shrsb	2006-10-24 10:29:13.000000000 +0400
+++ linux-2.6.19-rc3/fs/super.c	2006-10-27 15:36:33.000000000 +0400
@@ -69,6 +69,7 @@ static struct super_block *alloc_super(s
 		INIT_LIST_HEAD(&s->s_io);
 		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
+		INIT_LIST_HEAD(&s->s_dentry_unused);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
--- linux-2.6.19-rc3/include/linux/dcache.h.shrsb	2006-10-24 10:29:14.000000000
+0400
+++ linux-2.6.19-rc3/include/linux/dcache.h	2006-10-27 15:36:33.000000000 +0400
@@ -94,6 +94,7 @@ struct dentry {
 	struct qstr d_name;

 	struct list_head d_lru;		/* LRU list */
+	struct list_head d_sb_lru;	/* per-sb LRU list */
 	/*
 	 * d_child and d_rcu can share memory
 	 */
--- linux-2.6.19-rc3/include/linux/fs.h.shrsb	2006-10-24 10:29:14.000000000 +0400
+++ linux-2.6.19-rc3/include/linux/fs.h	2006-10-27 15:36:33.000000000 +0400
@@ -939,6 +939,7 @@ struct super_block {
 	struct list_head	s_dirty;	/* dirty inodes */
 	struct list_head	s_io;		/* parked for writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+	struct list_head	s_dentry_unused;
 	struct list_head	s_files;

 	struct block_device	*s_bdev;


  parent reply	other threads:[~2006-10-27 14:05 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-10-25 12:30 [Q] missing unused dentry in prune_dcache()? Vasily Averin
2006-10-25 13:51 ` David Howells
2006-10-25 13:58   ` Vasily Averin
2006-10-25 14:23     ` David Howells
2006-10-26 11:36       ` Vasily Averin
2006-10-26 13:25         ` David Howells
2006-10-27  8:05           ` Vasily Averin
2006-10-27 10:42             ` David Howells
2006-10-27 11:50               ` Vasily Averin
2006-10-27 12:11                 ` David Howells
2006-10-27 13:47                   ` Vasily Averin
2006-10-27 14:29                     ` David Howells
2006-10-27 14:39                       ` Kirill Korotaev
2006-10-27 14:05                   ` Vasily Averin [this message]
2006-10-27 18:06                     ` [PATCH 2.6.19-rc3] VFS: per-sb dentry lru list Andrew Morton
2006-10-30 14:24                       ` Vasily Averin
2006-10-30 15:08                         ` Eric Dumazet
2006-10-30 15:34                           ` Kirill Korotaev
2006-10-30 16:09                             ` Eric Dumazet
2006-10-30 15:14                       ` Kirill Korotaev
2006-10-30  4:24                     ` David Chinner
2006-10-30  6:28                       ` [Devel] " Kirill Korotaev
2006-10-31  4:38                         ` Neil Brown
2006-10-31 10:40                           ` David Howells
2006-11-01  6:32                             ` Neil Brown
2006-11-01 13:32                               ` Vasily Averin
2006-11-14  5:44                                 ` Neil Brown
2006-11-14  6:12                                   ` Vasily Averin
2006-10-31 13:08                           ` Vasily Averin
2006-11-01 10:55                           ` [Devel] " Kirill Korotaev
2006-11-14  4:51                             ` Neil Brown
2006-11-14  9:29                               ` David Howells
2006-10-27 13:42       ` [PATCH 2.6.19-rc3] VFS: missing unused dentry in prune_dcache() Vasily Averin
2006-10-27 14:24         ` David Howells
2006-10-26 11:49 ` [Q] missing unused dentry in prune_dcache()? Vasily Averin
2006-10-26 12:33   ` David Howells
2006-10-26 13:23   ` David Howells
2006-10-26 13:58     ` Vasily Averin
2006-10-26 14:07       ` David Howells
2006-10-27  6:32     ` Vasily Averin
2006-10-27  6:50       ` Vasily Averin
2006-10-27  9:36         ` David Howells
2006-10-31 13:24 ` [Q] missing ->d_delete() in shrink_dcache_for_umount()? Vasily Averin
2006-10-31 15:06   ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4542123E.4030309@sw.ru \
    --to=vvs@sw.ru \
    --cc=akpm@osdl.org \
    --cc=balbir@in.ibm.com \
    --cc=dev@openvz.org \
    --cc=devel@openvz.org \
    --cc=dhowells@redhat.com \
    --cc=jblunck@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=olh@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.