From: Vasily Averin <vvs@sw.ru>
To: David Howells <dhowells@redhat.com>
Cc: Neil Brown <neilb@suse.de>, Jan Blunck <jblunck@suse.de>,
Olaf Hering <olh@suse.de>, Balbir Singh <balbir@in.ibm.com>,
Kirill Korotaev <dev@openvz.org>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
devel@openvz.org, Andrew Morton <akpm@osdl.org>
Subject: [PATCH 2.6.19-rc3] VFS: per-sb dentry lru list
Date: Fri, 27 Oct 2006 18:05:50 +0400 [thread overview]
Message-ID: <4542123E.4030309@sw.ru> (raw)
In-Reply-To: <24249.1161951081@redhat.com>
From: Vasily Averin <vvs@sw.ru>
Virtuozzo/OpenVZ linux kernel team has discovered that umount/remount can last
for hours looping in shrink_dcache_sb() without much successes. Since during
shrinking s_umount semaphore is taken lots of other unrelated operations like
sync can stop working until shrink finished.
It happens due to very long unused dentries list (>1 million of dentries),
so that it takes shrink_dcache_sb() longer then 1/HZ seconds to get to the
required dentry and after freeing this single dentry it reschedules and restarts.
The proposed fix prevents this issue by using per-sb dentry LRU list. It
provides very quickly search for the unused dentries for given super block thus
forcing shrinking always making good progress.
It was well tested on 2.6.9-based Virtuozzo/OpenVZ kernels, but the port to the
latest mainstream kernel is not tested.
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Vasily Averin <vvs@sw.ru>
--- linux-2.6.19-rc3/fs/dcache.c.shrsb 2006-10-27 10:45:11.000000000 +0400
+++ linux-2.6.19-rc3/fs/dcache.c 2006-10-27 15:49:54.000000000 +0400
@@ -173,6 +173,7 @@ repeat:
if (list_empty(&dentry->d_lru)) {
dentry->d_flags |= DCACHE_REFERENCED;
list_add(&dentry->d_lru, &dentry_unused);
+ list_add(&dentry->d_sb_lru, &dentry->d_sb->s_dentry_unused);
dentry_stat.nr_unused++;
}
spin_unlock(&dentry->d_lock);
@@ -190,6 +191,7 @@ kill_it: {
*/
if (!list_empty(&dentry->d_lru)) {
list_del(&dentry->d_lru);
+ list_del(&dentry->d_sb_lru);
dentry_stat.nr_unused--;
}
list_del(&dentry->d_u.d_child);
@@ -270,6 +272,7 @@ static inline struct dentry * __dget_loc
if (!list_empty(&dentry->d_lru)) {
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
+ list_del_init(&dentry->d_sb_lru);
}
return dentry;
}
@@ -398,6 +401,10 @@ static void prune_one_dentry(struct dent
static void prune_dcache(int count, struct super_block *sb)
{
+ struct list_head *lru_head;
+
+ lru_head = sb ? &sb->s_dentry_unused : &dentry_unused;
+
spin_lock(&dcache_lock);
for (; count ; count--) {
struct dentry *dentry;
@@ -406,25 +413,16 @@ static void prune_dcache(int count, stru
cond_resched_lock(&dcache_lock);
- tmp = dentry_unused.prev;
- if (sb) {
- /* Try to find a dentry for this sb, but don't try
- * too hard, if they aren't near the tail they will
- * be moved down again soon
- */
- int skip = count;
- while (skip && tmp != &dentry_unused &&
- list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
- skip--;
- tmp = tmp->prev;
- }
- }
- if (tmp == &dentry_unused)
+ tmp = lru_head->prev;
+ if (tmp == lru_head)
break;
- list_del_init(tmp);
- prefetch(dentry_unused.prev);
+
+ prefetch(lru_head->prev);
dentry_stat.nr_unused--;
- dentry = list_entry(tmp, struct dentry, d_lru);
+ dentry = sb ? list_entry(tmp, struct dentry, d_sb_lru) :
+ list_entry(tmp, struct dentry, d_lru);
+ list_del_init(&dentry->d_lru);
+ list_del_init(&dentry->d_sb_lru);
spin_lock(&dentry->d_lock);
/*
@@ -440,6 +438,8 @@ static void prune_dcache(int count, stru
if (dentry->d_flags & DCACHE_REFERENCED) {
dentry->d_flags &= ~DCACHE_REFERENCED;
list_add(&dentry->d_lru, &dentry_unused);
+ list_add(&dentry->d_sb_lru,
+ &dentry->d_sb->s_dentry_unused);
dentry_stat.nr_unused++;
spin_unlock(&dentry->d_lock);
continue;
@@ -455,7 +455,8 @@ static void prune_dcache(int count, stru
* If this dentry is for "my" filesystem, then I can prune it
* without taking the s_umount lock (I already hold it).
*/
- if (sb && dentry->d_sb == sb) {
+ if (sb) {
+ BUG_ON(dentry->d_sb != sb);
prune_one_dentry(dentry);
continue;
}
@@ -480,6 +481,8 @@ static void prune_dcache(int count, stru
spin_unlock(&dentry->d_lock);
/* Inserting dentry to tail of the list leads to cycle */
list_add(&dentry->d_lru, &dentry_unused);
+ list_add(&dentry->d_sb_lru,
+ &dentry->d_sb->s_dentry_unused);
dentry_stat.nr_unused++;
}
spin_unlock(&dcache_lock);
@@ -509,31 +512,15 @@ static void prune_dcache(int count, stru
void shrink_dcache_sb(struct super_block * sb)
{
- struct list_head *tmp, *next;
- struct dentry *dentry;
-
- /*
- * Pass one ... move the dentries for the specified
- * superblock to the most recent end of the unused list.
- */
spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- list_move(tmp, &dentry_unused);
- }
+ while (!list_empty(&sb->s_dentry_unused)) {
+ struct dentry *dentry;
- /*
- * Pass two ... free the dentries for this superblock.
- */
-repeat:
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
+ dentry = list_entry((&sb->s_dentry_unused)->next,
+ struct dentry, d_sb_lru);
dentry_stat.nr_unused--;
- list_del_init(tmp);
+ list_del_init(&dentry->d_lru);
+ list_del_init(&dentry->d_sb_lru);
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count)) {
spin_unlock(&dentry->d_lock);
@@ -541,7 +528,6 @@ repeat:
}
prune_one_dentry(dentry);
cond_resched_lock(&dcache_lock);
- goto repeat;
}
spin_unlock(&dcache_lock);
}
@@ -563,6 +549,7 @@ static void shrink_dcache_for_umount_sub
if (!list_empty(&dentry->d_lru)) {
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
+ list_del_init(&dentry->d_sb_lru);
}
__d_drop(dentry);
spin_unlock(&dcache_lock);
@@ -580,6 +567,7 @@ static void shrink_dcache_for_umount_sub
if (!list_empty(&loop->d_lru)) {
dentry_stat.nr_unused--;
list_del_init(&loop->d_lru);
+ list_del_init(&loop->d_sb_lru);
}
__d_drop(loop);
@@ -766,6 +754,7 @@ resume:
if (!list_empty(&dentry->d_lru)) {
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
+ list_del_init(&dentry->d_sb_lru);
}
/*
* move only zero ref count dentries to the end
@@ -773,6 +762,8 @@ resume:
*/
if (!atomic_read(&dentry->d_count)) {
list_add_tail(&dentry->d_lru, &dentry_unused);
+ list_add_tail(&dentry->d_sb_lru,
+ &dentry->d_sb->s_dentry_unused);
dentry_stat.nr_unused++;
found++;
}
@@ -892,6 +883,7 @@ struct dentry *d_alloc(struct dentry * p
#endif
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
+ INIT_LIST_HEAD(&dentry->d_sb_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
--- linux-2.6.19-rc3/fs/super.c.shrsb 2006-10-24 10:29:13.000000000 +0400
+++ linux-2.6.19-rc3/fs/super.c 2006-10-27 15:36:33.000000000 +0400
@@ -69,6 +69,7 @@ static struct super_block *alloc_super(s
INIT_LIST_HEAD(&s->s_io);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_instances);
+ INIT_LIST_HEAD(&s->s_dentry_unused);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
init_rwsem(&s->s_umount);
--- linux-2.6.19-rc3/include/linux/dcache.h.shrsb 2006-10-24 10:29:14.000000000
+0400
+++ linux-2.6.19-rc3/include/linux/dcache.h 2006-10-27 15:36:33.000000000 +0400
@@ -94,6 +94,7 @@ struct dentry {
struct qstr d_name;
struct list_head d_lru; /* LRU list */
+ struct list_head d_sb_lru; /* per-sb LRU list */
/*
* d_child and d_rcu can share memory
*/
--- linux-2.6.19-rc3/include/linux/fs.h.shrsb 2006-10-24 10:29:14.000000000 +0400
+++ linux-2.6.19-rc3/include/linux/fs.h 2006-10-27 15:36:33.000000000 +0400
@@ -939,6 +939,7 @@ struct super_block {
struct list_head s_dirty; /* dirty inodes */
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
+ struct list_head s_dentry_unused;
struct list_head s_files;
struct block_device *s_bdev;
next prev parent reply other threads:[~2006-10-27 14:05 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-10-25 12:30 [Q] missing unused dentry in prune_dcache()? Vasily Averin
2006-10-25 13:51 ` David Howells
2006-10-25 13:58 ` Vasily Averin
2006-10-25 14:23 ` David Howells
2006-10-26 11:36 ` Vasily Averin
2006-10-26 13:25 ` David Howells
2006-10-27 8:05 ` Vasily Averin
2006-10-27 10:42 ` David Howells
2006-10-27 11:50 ` Vasily Averin
2006-10-27 12:11 ` David Howells
2006-10-27 13:47 ` Vasily Averin
2006-10-27 14:29 ` David Howells
2006-10-27 14:39 ` Kirill Korotaev
2006-10-27 14:05 ` Vasily Averin [this message]
2006-10-27 18:06 ` [PATCH 2.6.19-rc3] VFS: per-sb dentry lru list Andrew Morton
2006-10-30 14:24 ` Vasily Averin
2006-10-30 15:08 ` Eric Dumazet
2006-10-30 15:34 ` Kirill Korotaev
2006-10-30 16:09 ` Eric Dumazet
2006-10-30 15:14 ` Kirill Korotaev
2006-10-30 4:24 ` David Chinner
2006-10-30 6:28 ` [Devel] " Kirill Korotaev
2006-10-31 4:38 ` Neil Brown
2006-10-31 10:40 ` David Howells
2006-11-01 6:32 ` Neil Brown
2006-11-01 13:32 ` Vasily Averin
2006-11-14 5:44 ` Neil Brown
2006-11-14 6:12 ` Vasily Averin
2006-10-31 13:08 ` Vasily Averin
2006-11-01 10:55 ` [Devel] " Kirill Korotaev
2006-11-14 4:51 ` Neil Brown
2006-11-14 9:29 ` David Howells
2006-10-27 13:42 ` [PATCH 2.6.19-rc3] VFS: missing unused dentry in prune_dcache() Vasily Averin
2006-10-27 14:24 ` David Howells
2006-10-26 11:49 ` [Q] missing unused dentry in prune_dcache()? Vasily Averin
2006-10-26 12:33 ` David Howells
2006-10-26 13:23 ` David Howells
2006-10-26 13:58 ` Vasily Averin
2006-10-26 14:07 ` David Howells
2006-10-27 6:32 ` Vasily Averin
2006-10-27 6:50 ` Vasily Averin
2006-10-27 9:36 ` David Howells
2006-10-31 13:24 ` [Q] missing ->d_delete() in shrink_dcache_for_umount()? Vasily Averin
2006-10-31 15:06 ` David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4542123E.4030309@sw.ru \
--to=vvs@sw.ru \
--cc=akpm@osdl.org \
--cc=balbir@in.ibm.com \
--cc=dev@openvz.org \
--cc=devel@openvz.org \
--cc=dhowells@redhat.com \
--cc=jblunck@suse.de \
--cc=linux-kernel@vger.kernel.org \
--cc=neilb@suse.de \
--cc=olh@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox