From: Jan Blunck <jblunck@suse.de>
To: David Howells <dhowells@redhat.com>
Cc: neilb@suse.de, balbir@in.ibm.com, akpm@osdl.org,
aviro@redhat.com, dev@openvz.org, olh@suse.de,
linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH] Fix dcache race during umount
Date: Fri, 23 Jun 2006 15:28:04 +0200 [thread overview]
Message-ID: <20060623132804.GF11631@hasse.suse.de> (raw)
In-Reply-To: <15603.1150978967@warthog.cambridge.redhat.com>
[-- Attachment #1: Type: text/plain, Size: 545 bytes --]
On Thu, Jun 22, David Howells wrote:
> I'd like to propose an alternative to your patch to fix the dcache race
> between unmounting a filesystem and the memory shrinker.
>
> In my patch, generic_shutdown_super() is made to call shrink_dcache_sb()
> instead of shrink_dcache_anon(), and the latter function is discarded
> completely since it's no longer used.
I've updated my patches to Linus git repository from today. Additionally, I've
changed shrink_dcache_parent() to not use prune_dcache() anymore.
Comments are welcome.
Regards,
Jan
[-- Attachment #2: combined.diff --]
[-- Type: text/x-patch, Size: 10947 bytes --]
Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -387,101 +387,96 @@ static void prune_one_dentry(struct dent
* which are being unmounted.
*
* Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
+ * more memory. When we need to unmount something
+ * we call shrink_dcache_sb().
*
* This function may fail to free any resources if
* all the dentries are in use.
*/
-
-static void prune_dcache(int count, struct super_block *sb)
+static void prune_dcache(int count)
{
spin_lock(&dcache_lock);
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
- struct rw_semaphore *s_umount;
cond_resched_lock(&dcache_lock);
tmp = dentry_unused.prev;
- if (unlikely(sb)) {
- /* Try to find a dentry for this sb, but don't try
- * too hard, if they aren't near the tail they will
- * be moved down again soon
- */
- int skip = count;
- while (skip && tmp != &dentry_unused &&
- list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
- skip--;
- tmp = tmp->prev;
- }
- }
if (tmp == &dentry_unused)
break;
- list_del_init(tmp);
- prefetch(dentry_unused.prev);
- dentry_stat.nr_unused--;
+ /*
+ * We want to prefetch the predecessor of our predecessor since
+ * our predecessor is already accessed in list_del_init().
+ */
+ prefetch(tmp->prev->prev);
dentry = list_entry(tmp, struct dentry, d_lru);
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
- spin_lock(&dentry->d_lock);
+ spin_lock(&dentry->d_lock);
/*
* We found an inuse dentry which was not removed from
- * dentry_unused because of laziness during lookup. Do not free
+ * dentry_unused because of laziness during lookup. Do not free
* it - just keep it off the dentry_unused list.
*/
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- /* If the dentry was recently referenced, don't free it. */
- if (dentry->d_flags & DCACHE_REFERENCED) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- spin_unlock(&dentry->d_lock);
- continue;
- }
- /*
- * If the dentry is not DCACHED_REFERENCED, it is time
- * to remove it from the dcache, provided the super block is
- * NULL (which means we are trying to reclaim memory)
- * or this dentry belongs to the same super block that
- * we want to shrink.
- */
- /*
- * If this dentry is for "my" filesystem, then I can prune it
- * without taking the s_umount lock (I already hold it).
- */
- if (sb && dentry->d_sb == sb) {
- prune_one_dentry(dentry);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
continue;
}
/*
- * ...otherwise we need to be sure this filesystem isn't being
- * unmounted, otherwise we could race with
- * generic_shutdown_super(), and end up holding a reference to
- * an inode while the filesystem is unmounted.
- * So we try to get s_umount, and make sure s_root isn't NULL.
- * (Take a local copy of s_umount to avoid a use-after-free of
- * `dentry').
+ * If the dentry was recently referenced, or the dentry's
+ * filesystem is going to be unmounted, don't free it.
*/
- s_umount = &dentry->d_sb->s_umount;
- if (down_read_trylock(s_umount)) {
- if (dentry->d_sb->s_root != NULL) {
+ if (!(dentry->d_flags & DCACHE_REFERENCED) &&
+ likely(down_read_trylock(&dentry->d_sb->s_umount))) {
+ struct super_block *sb = dentry->d_sb;
+
+ if (dentry->d_sb->s_root) {
prune_one_dentry(dentry);
- up_read(s_umount);
+ up_read(&sb->s_umount);
continue;
}
- up_read(s_umount);
+ up_read(&sb->s_umount);
}
- spin_unlock(&dentry->d_lock);
- /* Cannot remove the first dentry, and it isn't appropriate
- * to move it to the head of the list, so give up, and try
- * later
+ /*
+ * Either the dentry was recently referenced or its filesystem
+ * is going to be unmounted. Add the dentry to the beginning of
+ * the LRU list so shrink_dcache_sb() can find it faster.
*/
- break;
+ dentry->d_flags &= ~DCACHE_REFERENCED;
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&dcache_lock);
+}
+
+/*
+ * __shrink_dcache_sb - Shrink the dentries for a superblock
+ * @sb: superblock
+ * @list: list of dentries
+ *
+ * Prune a list of dentries of a superblock.
+ */
+static void __shrink_dcache_sb(struct super_block *sb, struct list_head *list)
+{
+ struct dentry *dentry, *pos;
+
+ spin_lock(&dcache_lock);
+repeat:
+ list_for_each_entry_safe(dentry, pos, list, d_lru) {
+ BUG_ON(dentry->d_sb != sb);
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
+ prune_one_dentry(dentry);
+ cond_resched_lock(&dcache_lock);
+ goto repeat;
}
spin_unlock(&dcache_lock);
}
@@ -505,47 +500,31 @@ static void prune_dcache(int count, stru
*
* Shrink the dcache for the specified super block. This
* is used to free the dcache before unmounting a file
- * system
+ * system.
+ *
+ * This must be called with sb->s_umount locked.
*/
-
void shrink_dcache_sb(struct super_block * sb)
{
- struct list_head *tmp, *next;
- struct dentry *dentry;
+ struct dentry *dentry, *pos;
+ LIST_HEAD(list);
/*
* Pass one ... move the dentries for the specified
- * superblock to the most recent end of the unused list.
+ * superblock to the temporarily list.
*/
spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
+ list_for_each_entry_safe(dentry, pos, &dentry_unused, d_lru) {
if (dentry->d_sb != sb)
continue;
- list_del(tmp);
- list_add(tmp, &dentry_unused);
+ list_move(&dentry->d_lru, &list);
}
+ spin_unlock(&dcache_lock);
/*
* Pass two ... free the dentries for this superblock.
*/
-repeat:
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- dentry_stat.nr_unused--;
- list_del_init(tmp);
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- prune_one_dentry(dentry);
- cond_resched_lock(&dcache_lock);
- goto repeat;
- }
- spin_unlock(&dcache_lock);
+ __shrink_dcache_sb(sb, &list);
}
/*
@@ -614,7 +593,7 @@ positive:
* drop the lock and return early due to latency
* constraints.
*/
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry * parent, struct list_head *list)
{
struct dentry *this_parent = parent;
struct list_head *next;
@@ -638,7 +617,7 @@ resume:
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add(&dentry->d_lru, dentry_unused.prev);
+ list_add(&dentry->d_lru, list);
dentry_stat.nr_unused++;
found++;
}
@@ -681,50 +660,11 @@ out:
void shrink_dcache_parent(struct dentry * parent)
{
+ LIST_HEAD(list);
int found;
- while ((found = select_parent(parent)) != 0)
- prune_dcache(found, parent->d_sb);
-}
-
-/**
- * shrink_dcache_anon - further prune the cache
- * @head: head of d_hash list of dentries to prune
- *
- * Prune the dentries that are anonymous
- *
- * parsing d_hash list does not hlist_for_each_entry_rcu() as it
- * done under dcache_lock.
- *
- */
-void shrink_dcache_anon(struct super_block *sb)
-{
- struct hlist_node *lp;
- struct hlist_head *head = &sb->s_anon;
- int found;
- do {
- found = 0;
- spin_lock(&dcache_lock);
- hlist_for_each(lp, head) {
- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }
-
- /*
- * move only zero ref count dentries to the end
- * of the unused list for prune_dcache
- */
- if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
- }
- spin_unlock(&dcache_lock);
- prune_dcache(found, sb);
- } while(found);
+ while ((found = select_parent(parent, &list)) != 0)
+ __shrink_dcache_sb(parent->d_sb, &list);
}
/*
@@ -744,7 +684,7 @@ static int shrink_dcache_memory(int nr,
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
- prune_dcache(nr, NULL);
+ prune_dcache(nr);
}
return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
@@ -1659,19 +1599,38 @@ repeat:
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry,
+ d_u.d_child);
next = tmp->next;
+
if (d_unhashed(dentry)||!dentry->d_inode)
continue;
+
+ if (!list_empty(&dentry->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
+ /*
+ * We can lower the reference count here:
+ * - if the refcount is zero afterwards, the dentry hasn't got
+ * any children
+ * - if the recount isn't zero afterwards, we visit the
+ * chrildren next
+ * - because we always hold the dcache lock, nobody else can
+ * kill the unused dentries yet
+ */
+ if (atomic_dec_and_test(&dentry->d_count)) {
+ list_add_tail(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+
if (!list_empty(&dentry->d_subdirs)) {
this_parent = dentry;
goto repeat;
}
- atomic_dec(&dentry->d_count);
}
if (this_parent != root) {
next = this_parent->d_u.d_child.next;
- atomic_dec(&this_parent->d_count);
this_parent = this_parent->d_parent;
goto resume;
}
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -230,8 +230,7 @@ void generic_shutdown_super(struct super
if (root) {
sb->s_root = NULL;
- shrink_dcache_parent(root);
- shrink_dcache_anon(sb);
+ shrink_dcache_sb(sb);
dput(root);
fsync_super(sb);
lock_super(sb);
Index: linux-2.6/include/linux/dcache.h
===================================================================
--- linux-2.6.orig/include/linux/dcache.h
+++ linux-2.6/include/linux/dcache.h
@@ -217,7 +217,6 @@ extern struct dentry * d_alloc_anon(stru
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct super_block *);
extern int d_invalidate(struct dentry *);
/* only used at mount-time */
next prev parent reply other threads:[~2006-06-23 13:28 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-06-22 12:22 [PATCH] Fix dcache race during umount David Howells
2006-06-22 14:27 ` David Howells
2006-06-22 16:08 ` Jan Blunck
2006-06-22 16:44 ` Jan Blunck
2006-06-23 13:28 ` Jan Blunck [this message]
2006-06-24 5:23 ` Neil Brown
2006-06-24 9:16 ` David Howells
2006-06-24 13:33 ` [PATCH] Destroy the dentries contributed by a superblock on unmounting David Howells
2006-06-25 6:48 ` Neil Brown
2006-06-25 16:02 ` David Howells
2006-06-25 16:30 ` Nick Piggin
2006-06-26 6:05 ` Neil Brown
2006-06-26 11:21 ` David Howells
2006-06-27 0:53 ` Neil Brown
2006-06-27 10:17 ` David Howells
2006-06-27 22:55 ` Andrew Morton
2006-06-27 23:18 ` David Howells
[not found] <20060404110351.27364.patches@notabene>
2006-04-04 1:05 ` [PATCH] Fix dcache race during umount NeilBrown
2006-04-04 5:11 ` Balbir Singh
[not found] <20060403133804.27986.patches@notabene>
2006-04-03 3:40 ` NeilBrown
2006-04-03 18:12 ` Balbir Singh
2006-04-04 0:59 ` Neil Brown
2006-04-04 5:02 ` Balbir Singh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060623132804.GF11631@hasse.suse.de \
--to=jblunck@suse.de \
--cc=akpm@osdl.org \
--cc=aviro@redhat.com \
--cc=balbir@in.ibm.com \
--cc=dev@openvz.org \
--cc=dhowells@redhat.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=neilb@suse.de \
--cc=olh@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox