* [patch 1/5] vfs: remove whitespace noise from fs/dcache.c
2006-06-01 9:51 [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) jblunck
@ 2006-06-01 9:51 ` jblunck
2006-06-01 9:51 ` [patch 2/5] vfs: d_genocide() doesnt add dentries to unused list jblunck
` (5 subsequent siblings)
6 siblings, 0 replies; 37+ messages in thread
From: jblunck @ 2006-06-01 9:51 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, akpm, viro, dgc, balbir
[-- Attachment #1: patches.jbl/vfs-whitespace-cleanup.diff --]
[-- Type: text/plain, Size: 11850 bytes --]
Remove some whitespace noise from fs/dcache.c.
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
fs/dcache.c | 128 ++++++++++++++++++++++++++++++------------------------------
1 file changed, 64 insertions(+), 64 deletions(-)
Index: work-2.6/fs/dcache.c
===================================================================
--- work-2.6.orig/fs/dcache.c
+++ work-2.6/fs/dcache.c
@@ -74,7 +74,7 @@ static void d_callback(struct rcu_head *
if (dname_external(dentry))
kfree(dentry->d_name.name);
- kmem_cache_free(dentry_cache, dentry);
+ kmem_cache_free(dentry_cache, dentry);
}
/*
@@ -85,7 +85,7 @@ static void d_free(struct dentry *dentry
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- call_rcu(&dentry->d_u.d_rcu, d_callback);
+ call_rcu(&dentry->d_u.d_rcu, d_callback);
}
/*
@@ -113,7 +113,7 @@ static void dentry_iput(struct dentry *
}
}
-/*
+/*
* This is dput
*
* This is complicated by the fact that we do not want to put
@@ -132,7 +132,7 @@ static void dentry_iput(struct dentry *
/*
* dput - release a dentry
- * @dentry: dentry to release
+ * @dentry: dentry to release
*
* Release a dentry. This will drop the usage count and if appropriate
* call the dentry unlink method as well as removing it from the queues and
@@ -168,14 +168,14 @@ repeat:
goto unhash_it;
}
/* Unreachable? Get rid of it */
- if (d_unhashed(dentry))
+ if (d_unhashed(dentry))
goto kill_it;
- if (list_empty(&dentry->d_lru)) {
- dentry->d_flags |= DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- }
- spin_unlock(&dentry->d_lock);
+ if (list_empty(&dentry->d_lru)) {
+ dentry->d_flags |= DCACHE_REFERENCED;
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return;
@@ -188,11 +188,11 @@ kill_it: {
/* If dentry was on d_lru list
* delete it from there
*/
- if (!list_empty(&dentry->d_lru)) {
- list_del(&dentry->d_lru);
- dentry_stat.nr_unused--;
- }
- list_del(&dentry->d_u.d_child);
+ if (!list_empty(&dentry->d_lru)) {
+ list_del(&dentry->d_lru);
+ dentry_stat.nr_unused--;
+ }
+ list_del(&dentry->d_u.d_child);
dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
@@ -216,7 +216,7 @@ kill_it: {
*
* no dcache lock.
*/
-
+
int d_invalidate(struct dentry * dentry)
{
/*
@@ -308,7 +308,7 @@ static struct dentry * __d_find_alias(st
next = tmp->next;
prefetch(next);
alias = list_entry(tmp, struct dentry, d_alias);
- if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
+ if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (alias->d_flags & DCACHE_DISCONNECTED)
discon_alias = alias;
else if (!want_discon) {
@@ -391,7 +391,7 @@ static inline void prune_one_dentry(stru
* This function may fail to free any resources if
* all the dentries are in use.
*/
-
+
static void prune_dcache(int count)
{
spin_lock(&dcache_lock);
@@ -406,25 +406,25 @@ static void prune_dcache(int count)
break;
list_del_init(tmp);
prefetch(dentry_unused.prev);
- dentry_stat.nr_unused--;
+ dentry_stat.nr_unused--;
dentry = list_entry(tmp, struct dentry, d_lru);
- spin_lock(&dentry->d_lock);
+ spin_lock(&dentry->d_lock);
/*
* We found an inuse dentry which was not removed from
* dentry_unused because of laziness during lookup. Do not free
* it - just keep it off the dentry_unused list.
*/
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
continue;
}
/* If the dentry was recently referenced, don't free it. */
if (dentry->d_flags & DCACHE_REFERENCED) {
dentry->d_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- spin_unlock(&dentry->d_lock);
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ spin_unlock(&dentry->d_lock);
continue;
}
prune_one_dentry(dentry);
@@ -499,7 +499,7 @@ repeat:
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
*/
-
+
/**
* have_submounts - check for mounts over a dentry
* @parent: dentry to check.
@@ -507,7 +507,7 @@ repeat:
* Return true if the parent or its subdirectories contain
* a mount point
*/
-
+
int have_submounts(struct dentry *parent)
{
struct dentry *this_parent = parent;
@@ -579,8 +579,8 @@ resume:
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
}
- /*
- * move only zero ref count dentries to the end
+ /*
+ * move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
@@ -624,7 +624,7 @@ out:
*
* Prune the dcache to remove unused children of the parent dentry.
*/
-
+
void shrink_dcache_parent(struct dentry * parent)
{
int found;
@@ -657,8 +657,8 @@ void shrink_dcache_anon(struct hlist_hea
list_del_init(&this->d_lru);
}
- /*
- * move only zero ref count dentries to the end
+ /*
+ * move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
if (!atomic_read(&this->d_count)) {
@@ -703,25 +703,25 @@ static int shrink_dcache_memory(int nr,
* available. On a success the dentry is returned. The name passed in is
* copied and the copy passed in may be reused after this call.
*/
-
+
struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
{
struct dentry *dentry;
char *dname;
- dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
if (name->len > DNAME_INLINE_LEN-1) {
dname = kmalloc(name->len + 1, GFP_KERNEL);
if (!dname) {
- kmem_cache_free(dentry_cache, dentry);
+ kmem_cache_free(dentry_cache, dentry);
return NULL;
}
} else {
dname = dentry->d_iname;
- }
+ }
dentry->d_name.name = dname;
dentry->d_name.len = name->len;
@@ -786,7 +786,7 @@ struct dentry *d_alloc_name(struct dentr
* (or otherwise set) by the caller to indicate that it is now
* in use by the dcache.
*/
-
+
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!list_empty(&entry->d_alias));
@@ -861,7 +861,7 @@ EXPORT_SYMBOL(d_instantiate_unique);
* instantiated and returned. %NULL is returned if there is insufficient
* memory or the inode passed is %NULL.
*/
-
+
struct dentry * d_alloc_root(struct inode * root_inode)
{
struct dentry *res = NULL;
@@ -892,7 +892,7 @@ static inline struct hlist_head *d_hash(
* @inode: inode to allocate the dentry for
*
* This is similar to d_alloc_root. It is used by filesystems when
- * creating a dentry for a given inode, often in the process of
+ * creating a dentry for a given inode, often in the process of
* mapping a filehandle to a dentry. The returned dentry may be
* anonymous, or may have a full name (if the inode was already
* in the cache). The file system may need to make further
@@ -923,7 +923,7 @@ struct dentry * d_alloc_anon(struct inod
return NULL;
tmp->d_parent = tmp; /* make sure dput doesn't croak */
-
+
spin_lock(&dcache_lock);
res = __d_find_alias(inode, 0);
if (!res) {
@@ -1009,7 +1009,7 @@ struct dentry *d_splice_alias(struct ino
* finished using it. %NULL is returned on failure.
*
* __d_lookup is dcache_lock free. The hash list is protected using RCU.
- * Memory barriers are used while updating and doing lockless traversal.
+ * Memory barriers are used while updating and doing lockless traversal.
* To avoid races with d_move while rename is happening, d_lock is used.
*
* Overflows in memcmp(), while d_move, are avoided by keeping the length
@@ -1032,10 +1032,10 @@ struct dentry * d_lookup(struct dentry *
struct dentry * dentry = NULL;
unsigned long seq;
- do {
- seq = read_seqbegin(&rename_lock);
- dentry = __d_lookup(parent, name);
- if (dentry)
+ do {
+ seq = read_seqbegin(&rename_lock);
+ dentry = __d_lookup(parent, name);
+ if (dentry)
break;
} while (read_seqretry(&rename_lock, seq));
return dentry;
@@ -1052,7 +1052,7 @@ struct dentry * __d_lookup(struct dentry
struct dentry *dentry;
rcu_read_lock();
-
+
hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
struct qstr *qstr;
@@ -1094,10 +1094,10 @@ struct dentry * __d_lookup(struct dentry
break;
next:
spin_unlock(&dentry->d_lock);
- }
- rcu_read_unlock();
+ }
+ rcu_read_unlock();
- return found;
+ return found;
}
/**
@@ -1137,7 +1137,7 @@ out:
* This is used by ncpfs in its readdir implementation.
* Zero is returned in the dentry is invalid.
*/
-
+
int d_validate(struct dentry *dentry, struct dentry *dparent)
{
struct hlist_head *base;
@@ -1152,7 +1152,7 @@ int d_validate(struct dentry *dentry, st
spin_lock(&dcache_lock);
base = d_hash(dparent, dentry->d_name.hash);
- hlist_for_each(lhp,base) {
+ hlist_for_each(lhp,base) {
/* hlist_for_each_entry_rcu() not required for d_hash list
* as it is parsed under dcache_lock
*/
@@ -1179,7 +1179,7 @@ out:
* it from the hash queues and waiting for
* it to be deleted later when it has no users
*/
-
+
/**
* d_delete - delete a dentry
* @dentry: The dentry to delete
@@ -1187,7 +1187,7 @@ out:
* Turn the dentry into a negative dentry if possible, otherwise
* remove it from the hash queues so it can be deleted later
*/
-
+
void d_delete(struct dentry * dentry)
{
int isdir = 0;
@@ -1218,8 +1218,8 @@ void d_delete(struct dentry * dentry)
static void __d_rehash(struct dentry * entry, struct hlist_head *list)
{
- entry->d_flags &= ~DCACHE_UNHASHED;
- hlist_add_head_rcu(&entry->d_hash, list);
+ entry->d_flags &= ~DCACHE_UNHASHED;
+ hlist_add_head_rcu(&entry->d_hash, list);
}
/**
@@ -1228,7 +1228,7 @@ static void __d_rehash(struct dentry * e
*
* Adds a dentry to the hash according to its name.
*/
-
+
void d_rehash(struct dentry * entry)
{
struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
@@ -1302,7 +1302,7 @@ static void switch_names(struct dentry *
* up under the name it got deleted rather than the name that
* deleted it.
*/
-
+
/**
* d_move - move a dentry
* @dentry: entry to move
@@ -1560,7 +1560,7 @@ out:
* Returns 0 otherwise.
* Caller must ensure that "new_dentry" is pinned before calling is_subdir()
*/
-
+
int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
{
int result;
@@ -1571,7 +1571,7 @@ int is_subdir(struct dentry * new_dentry
* d_move
*/
rcu_read_lock();
- do {
+ do {
/* for restarting inner loop in case of seq retry */
new_dentry = saved;
result = 0;
@@ -1636,7 +1636,7 @@ resume:
* filesystems using synthetic inode numbers, and is necessary
* to keep getcwd() working.
*/
-
+
ino_t find_inode_number(struct dentry *dir, struct qstr *name)
{
struct dentry * dentry;
@@ -1689,10 +1689,10 @@ static void __init dcache_init(unsigned
{
int loop;
- /*
+ /*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
- * of the dcache.
+ * of the dcache.
*/
dentry_cache = kmem_cache_create("dentry_cache",
sizeof(struct dentry),
@@ -1700,7 +1700,7 @@ static void __init dcache_init(unsigned
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
NULL, NULL);
-
+
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
/* Hash may have been set up in dcache_init_early */
^ permalink raw reply [flat|nested] 37+ messages in thread* [patch 2/5] vfs: d_genocide() doesnt add dentries to unused list
2006-06-01 9:51 [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) jblunck
2006-06-01 9:51 ` [patch 1/5] vfs: remove whitespace noise from fs/dcache.c jblunck
@ 2006-06-01 9:51 ` jblunck
2006-06-01 9:51 ` [patch 3/5] vfs: remove shrink_dcache_anon() jblunck
` (4 subsequent siblings)
6 siblings, 0 replies; 37+ messages in thread
From: jblunck @ 2006-06-01 9:51 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, akpm, viro, dgc, balbir
[-- Attachment #1: patches.jbl/vfs-d_genocide-fix.diff --]
[-- Type: text/plain, Size: 1189 bytes --]
Calling d_genocide() is only lowering the reference count of the dentries but
doesn't add them to the unused list.
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
fs/dcache.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
Index: work-2.6/fs/dcache.c
===================================================================
--- work-2.6.orig/fs/dcache.c
+++ work-2.6/fs/dcache.c
@@ -1612,11 +1612,25 @@ resume:
this_parent = dentry;
goto repeat;
}
- atomic_dec(&dentry->d_count);
+ if (!list_empty(&dentry->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
+ if (atomic_dec_and_test(&dentry->d_count)) {
+ list_add(&dentry->d_lru, dentry_unused.prev);
+ dentry_stat.nr_unused++;
+ }
}
if (this_parent != root) {
next = this_parent->d_u.d_child.next;
- atomic_dec(&this_parent->d_count);
+ if (!list_empty(&this_parent->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&this_parent->d_lru);
+ }
+ if (atomic_dec_and_test(&this_parent->d_count)) {
+ list_add(&this_parent->d_lru, dentry_unused.prev);
+ dentry_stat.nr_unused++;
+ }
this_parent = this_parent->d_parent;
goto resume;
}
^ permalink raw reply [flat|nested] 37+ messages in thread* [patch 3/5] vfs: remove shrink_dcache_anon()
2006-06-01 9:51 [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) jblunck
2006-06-01 9:51 ` [patch 1/5] vfs: remove whitespace noise from fs/dcache.c jblunck
2006-06-01 9:51 ` [patch 2/5] vfs: d_genocide() doesnt add dentries to unused list jblunck
@ 2006-06-01 9:51 ` jblunck
2006-06-01 9:51 ` [patch 4/5] vfs: per superblock dentry stats jblunck
` (3 subsequent siblings)
6 siblings, 0 replies; 37+ messages in thread
From: jblunck @ 2006-06-01 9:51 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, akpm, viro, dgc, balbir
[-- Attachment #1: patches.jbl/vfs-remove-shrink_dcache_anon.diff --]
[-- Type: text/plain, Size: 5279 bytes --]
After shrink_dcache_anon() is unexported there is only one caller
(generic_shutdown_super()). Instead of calling shrink_dcache_anon() and
shrink_dcache_parent() it is better to call shrink_dcache_sb(). Therefore
shrink_dcache_sb() is extended by also reordering the anonymous dentries in
the unused list. This way we get rid of all unused dentries when we touch them
the first time instead of checking for DCACHE_REFERENCED and adding them back
to the list again.
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
fs/dcache.c | 100 ++++++++++++++++++++++++-------------------------
fs/super.c | 3 -
include/linux/dcache.h | 1
3 files changed, 51 insertions(+), 53 deletions(-)
Index: work-2.6/fs/dcache.c
===================================================================
--- work-2.6.orig/fs/dcache.c
+++ work-2.6/fs/dcache.c
@@ -384,9 +384,8 @@ static inline void prune_one_dentry(stru
* @count: number of entries to try and free
*
* Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
+ * more memory. When we need to unmount something
+ * we call shrink_dcache_sb().
*
* This function may fail to free any resources if
* all the dentries are in use.
@@ -432,6 +431,51 @@ static void prune_dcache(int count)
spin_unlock(&dcache_lock);
}
+
+/*
+ * parsing d_hash list does not hlist_for_each_entry_rcu() as it
+ * done under dcache_lock.
+ */
+static void select_anon(struct super_block *sb)
+{
+ struct dentry *dentry;
+ struct hlist_node *lp;
+
+ spin_lock(&dcache_lock);
+ hlist_for_each_entry(dentry, lp, &sb->s_anon, d_hash) {
+ if (!list_empty(&dentry->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
+
+ /*
+ * move only zero ref count dentries to the beginning
+ * (the most recent end) of the unused list
+ */
+ spin_lock(&dentry->d_lock);
+ if (!atomic_read(&dentry->d_count)) {
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&dcache_lock);
+}
+
+static void select_sb(struct super_block *sb)
+{
+ struct dentry *dentry, *pos;
+
+ spin_lock(&dcache_lock);
+ list_for_each_entry_safe(dentry, pos, &dentry_unused, d_lru) {
+ if (dentry->d_sb != sb)
+ continue;
+ list_del(&dentry->d_lru);
+ list_add(&dentry->d_lru, &dentry_unused);
+ }
+ spin_unlock(&dcache_lock);
+}
+
/*
* Shrink the dcache for the specified super block.
* This allows us to unmount a device without disturbing
@@ -463,18 +507,13 @@ void shrink_dcache_sb(struct super_block
* Pass one ... move the dentries for the specified
* superblock to the most recent end of the unused list.
*/
- spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- list_del(tmp);
- list_add(tmp, &dentry_unused);
- }
+ select_anon(sb);
+ select_sb(sb);
/*
* Pass two ... free the dentries for this superblock.
*/
+ spin_lock(&dcache_lock);
repeat:
list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
@@ -633,45 +672,6 @@ void shrink_dcache_parent(struct dentry
prune_dcache(found);
}
-/**
- * shrink_dcache_anon - further prune the cache
- * @head: head of d_hash list of dentries to prune
- *
- * Prune the dentries that are anonymous
- *
- * parsing d_hash list does not hlist_for_each_entry_rcu() as it
- * done under dcache_lock.
- *
- */
-void shrink_dcache_anon(struct hlist_head *head)
-{
- struct hlist_node *lp;
- int found;
- do {
- found = 0;
- spin_lock(&dcache_lock);
- hlist_for_each(lp, head) {
- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }
-
- /*
- * move only zero ref count dentries to the end
- * of the unused list for prune_dcache
- */
- if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
- }
- spin_unlock(&dcache_lock);
- prune_dcache(found);
- } while(found);
-}
-
/*
* Scan `nr' dentries and return the number which remain.
*
Index: work-2.6/fs/super.c
===================================================================
--- work-2.6.orig/fs/super.c
+++ work-2.6/fs/super.c
@@ -230,8 +230,7 @@ void generic_shutdown_super(struct super
if (root) {
sb->s_root = NULL;
- shrink_dcache_parent(root);
- shrink_dcache_anon(&sb->s_anon);
+ shrink_dcache_sb(sb);
dput(root);
fsync_super(sb);
lock_super(sb);
Index: work-2.6/include/linux/dcache.h
===================================================================
--- work-2.6.orig/include/linux/dcache.h
+++ work-2.6/include/linux/dcache.h
@@ -217,7 +217,6 @@ extern struct dentry * d_alloc_anon(stru
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct hlist_head *);
extern int d_invalidate(struct dentry *);
/* only used at mount-time */
^ permalink raw reply [flat|nested] 37+ messages in thread* [patch 4/5] vfs: per superblock dentry stats
2006-06-01 9:51 [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) jblunck
` (2 preceding siblings ...)
2006-06-01 9:51 ` [patch 3/5] vfs: remove shrink_dcache_anon() jblunck
@ 2006-06-01 9:51 ` jblunck
2006-06-01 9:51 ` [patch 5/5] vfs: per superblock dentry unused list jblunck
` (2 subsequent siblings)
6 siblings, 0 replies; 37+ messages in thread
From: jblunck @ 2006-06-01 9:51 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, akpm, viro, dgc, balbir
[-- Attachment #1: patches.jbl/vfs-per-sb-dentry_stat.diff --]
[-- Type: text/plain, Size: 8278 bytes --]
This patch adds per superblock dentry statistics about unused and absolute
number of dentries.
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
fs/dcache.c | 47 ++++++++++++++++++++++++++---------------------
fs/super.c | 1 +
include/linux/dcache.h | 21 +++++++++++++++++----
include/linux/fs.h | 1 +
kernel/sysctl.c | 2 +-
5 files changed, 46 insertions(+), 26 deletions(-)
Index: work-2.6/fs/dcache.c
===================================================================
--- work-2.6.orig/fs/dcache.c
+++ work-2.6/fs/dcache.c
@@ -64,7 +64,7 @@ static struct hlist_head *dentry_hashtab
static LIST_HEAD(dentry_unused);
/* Statistics gathering. */
-struct dentry_stat_t dentry_stat = {
+struct dentry_stat global_dentry_stat = {
.age_limit = 45,
};
@@ -173,7 +173,7 @@ repeat:
if (list_empty(&dentry->d_lru)) {
dentry->d_flags |= DCACHE_REFERENCED;
list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ dentry_stat_inc(dentry->d_sb, nr_unused);
}
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
@@ -190,11 +190,13 @@ kill_it: {
*/
if (!list_empty(&dentry->d_lru)) {
list_del(&dentry->d_lru);
- dentry_stat.nr_unused--;
+ dentry_stat_dec(dentry->d_sb, nr_unused);
}
list_del(&dentry->d_u.d_child);
- dentry_stat.nr_dentry--; /* For d_free, below */
- /*drops the locks, at that point nobody can reach this dentry */
+ /* For d_free, below */
+ dentry_stat_dec(dentry->d_sb, nr_dentry);
+ /* drops the locks, at that point nobody can reach this
+ * dentry anymore */
dentry_iput(dentry);
parent = dentry->d_parent;
d_free(dentry);
@@ -268,7 +270,7 @@ static inline struct dentry * __dget_loc
{
atomic_inc(&dentry->d_count);
if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
+ dentry_stat_dec(dentry->d_sb, nr_unused);
list_del_init(&dentry->d_lru);
}
return dentry;
@@ -370,7 +372,7 @@ static inline void prune_one_dentry(stru
__d_drop(dentry);
list_del(&dentry->d_u.d_child);
- dentry_stat.nr_dentry--; /* For d_free, below */
+ dentry_stat_dec(dentry->d_sb, nr_dentry); /* For d_free, below */
dentry_iput(dentry);
parent = dentry->d_parent;
d_free(dentry);
@@ -403,10 +405,10 @@ static void prune_dcache(int count)
tmp = dentry_unused.prev;
if (tmp == &dentry_unused)
break;
- list_del_init(tmp);
prefetch(dentry_unused.prev);
- dentry_stat.nr_unused--;
dentry = list_entry(tmp, struct dentry, d_lru);
+ dentry_stat_dec(dentry->d_sb, nr_unused);
+ list_del_init(&dentry->d_lru);
spin_lock(&dentry->d_lock);
/*
@@ -422,7 +424,7 @@ static void prune_dcache(int count)
if (dentry->d_flags & DCACHE_REFERENCED) {
dentry->d_flags &= ~DCACHE_REFERENCED;
list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ dentry_stat_inc(dentry->d_sb, nr_unused);
spin_unlock(&dentry->d_lock);
continue;
}
@@ -444,7 +446,7 @@ static void select_anon(struct super_blo
spin_lock(&dcache_lock);
hlist_for_each_entry(dentry, lp, &sb->s_anon, d_hash) {
if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
+ dentry_stat_dec(sb, nr_unused);
list_del_init(&dentry->d_lru);
}
@@ -455,7 +457,7 @@ static void select_anon(struct super_blo
spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ dentry_stat_inc(sb, nr_unused);
}
spin_unlock(&dentry->d_lock);
}
@@ -519,7 +521,7 @@ repeat:
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
- dentry_stat.nr_unused--;
+ dentry_stat_dec(sb, nr_unused);
list_del_init(tmp);
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count)) {
@@ -615,7 +617,7 @@ resume:
next = tmp->next;
if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
+ dentry_stat_dec(dentry->d_sb, nr_unused);
list_del_init(&dentry->d_lru);
}
/*
@@ -624,7 +626,7 @@ resume:
*/
if (!atomic_read(&dentry->d_count)) {
list_add(&dentry->d_lru, dentry_unused.prev);
- dentry_stat.nr_unused++;
+ dentry_stat_inc(dentry->d_sb, nr_unused);
found++;
}
@@ -691,7 +693,7 @@ static int shrink_dcache_memory(int nr,
return -1;
prune_dcache(nr);
}
- return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ return (global_dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
/**
@@ -756,7 +758,7 @@ struct dentry *d_alloc(struct dentry * p
spin_lock(&dcache_lock);
if (parent)
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
- dentry_stat.nr_dentry++;
+ dentry_stat_inc(dentry->d_sb, nr_dentry);
spin_unlock(&dcache_lock);
return dentry;
@@ -932,6 +934,9 @@ struct dentry * d_alloc_anon(struct inod
tmp = NULL;
spin_lock(&res->d_lock);
res->d_sb = inode->i_sb;
+ /* Add to the sb dentry_stat here only,
+ * the global data is updated in d_alloc */
+ res->d_sb->s_dentry_stat.nr_dentry++;
res->d_parent = res;
res->d_inode = inode;
res->d_flags |= DCACHE_DISCONNECTED;
@@ -1613,23 +1618,23 @@ resume:
goto repeat;
}
if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
+ dentry_stat_dec(dentry->d_sb, nr_unused);
list_del_init(&dentry->d_lru);
}
if (atomic_dec_and_test(&dentry->d_count)) {
list_add(&dentry->d_lru, dentry_unused.prev);
- dentry_stat.nr_unused++;
+ dentry_stat_inc(dentry->d_sb, nr_unused);
}
}
if (this_parent != root) {
next = this_parent->d_u.d_child.next;
if (!list_empty(&this_parent->d_lru)) {
- dentry_stat.nr_unused--;
+ dentry_stat_dec(this_parent->d_sb, nr_unused);
list_del_init(&this_parent->d_lru);
}
if (atomic_dec_and_test(&this_parent->d_count)) {
list_add(&this_parent->d_lru, dentry_unused.prev);
- dentry_stat.nr_unused++;
+ dentry_stat_inc(this_parent->d_sb, nr_unused);
}
this_parent = this_parent->d_parent;
goto resume;
Index: work-2.6/fs/super.c
===================================================================
--- work-2.6.orig/fs/super.c
+++ work-2.6/fs/super.c
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(v
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
+ s->s_dentry_stat.age_limit = 45;
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
down_write(&s->s_umount);
Index: work-2.6/include/linux/dcache.h
===================================================================
--- work-2.6.orig/include/linux/dcache.h
+++ work-2.6/include/linux/dcache.h
@@ -36,14 +36,27 @@ struct qstr {
const unsigned char *name;
};
-struct dentry_stat_t {
+struct dentry_stat {
int nr_dentry;
int nr_unused;
- int age_limit; /* age in seconds */
- int want_pages; /* pages requested by system */
+ int age_limit; /* age in seconds */
int dummy[2];
};
-extern struct dentry_stat_t dentry_stat;
+extern struct dentry_stat global_dentry_stat;
+
+#define dentry_stat_inc(sb, x) \
+do { \
+ global_dentry_stat.x++; \
+ if (likely(sb)) \
+ (sb)->s_dentry_stat.x++;\
+} while(0)
+
+#define dentry_stat_dec(sb, x) \
+do { \
+ global_dentry_stat.x--; \
+ if (likely(sb)) \
+ (sb)->s_dentry_stat.x--;\
+} while(0)
/* Name hashing routines. Initial hash value */
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
Index: work-2.6/include/linux/fs.h
===================================================================
--- work-2.6.orig/include/linux/fs.h
+++ work-2.6/include/linux/fs.h
@@ -847,6 +847,7 @@ struct super_block {
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_files;
+ struct dentry_stat s_dentry_stat;
struct block_device *s_bdev;
struct list_head s_instances;
Index: work-2.6/kernel/sysctl.c
===================================================================
--- work-2.6.orig/kernel/sysctl.c
+++ work-2.6/kernel/sysctl.c
@@ -958,7 +958,7 @@ static ctl_table fs_table[] = {
{
.ctl_name = FS_DENTRY,
.procname = "dentry-state",
- .data = &dentry_stat,
+ .data = &global_dentry_stat,
.maxlen = 6*sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec,
^ permalink raw reply [flat|nested] 37+ messages in thread* [patch 5/5] vfs: per superblock dentry unused list
2006-06-01 9:51 [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) jblunck
` (3 preceding siblings ...)
2006-06-01 9:51 ` [patch 4/5] vfs: per superblock dentry stats jblunck
@ 2006-06-01 9:51 ` jblunck
2006-06-02 1:06 ` [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) Andrew Morton
2006-06-05 1:30 ` Neil Brown
6 siblings, 0 replies; 37+ messages in thread
From: jblunck @ 2006-06-01 9:51 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, akpm, viro, dgc, balbir
[-- Attachment #1: vfs-per-sb-dentry_unused.diff --]
[-- Type: text/plain, Size: 9354 bytes --]
This patch adds per superblock dentry unused lists. This should speedup the
umounting/remounting of filesystems when there are a lot of dentries on the
unused lists.
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
fs/dcache.c | 158 ++++++++++++++++++++++++-----------------------------
fs/super.c | 1
include/linux/fs.h | 2
3 files changed, 75 insertions(+), 86 deletions(-)
Index: work-2.6/fs/dcache.c
===================================================================
--- work-2.6.orig/fs/dcache.c
+++ work-2.6/fs/dcache.c
@@ -61,7 +61,6 @@ static kmem_cache_t *dentry_cache __read
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);
/* Statistics gathering. */
struct dentry_stat global_dentry_stat = {
@@ -167,13 +166,18 @@ repeat:
if (dentry->d_op->d_delete(dentry))
goto unhash_it;
}
+ /* Kill dentry without superblock */
+ if (unlikely(!dentry->d_sb))
+ goto unhash_it;
/* Unreachable? Get rid of it */
if (d_unhashed(dentry))
goto kill_it;
if (list_empty(&dentry->d_lru)) {
dentry->d_flags |= DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
+ list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_unused);
dentry_stat_inc(dentry->d_sb, nr_unused);
+ dentry->d_sb->s_dentry_unused_age = jiffies +
+ (dentry->d_sb->s_dentry_stat.age_limit * HZ);
}
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
@@ -382,19 +386,22 @@ static inline void prune_one_dentry(stru
}
/**
- * prune_dcache - shrink the dcache
+ * prune_dcache_sb - prune the dcache for a superblock
+ * @sb: superblock
* @count: number of entries to try and free
*
- * Shrink the dcache. This is done when we need
- * more memory. When we need to unmount something
- * we call shrink_dcache_sb().
+ * Prune the dcache for the specified super block. This
+ * is called from prune_dcache().
*
- * This function may fail to free any resources if
- * all the dentries are in use.
+ * You need to have a reference to the super block and
+ * should have sb->s_umount locked. This function may fail
+ * to free any resources if all the dentries are in use.
*/
-
-static void prune_dcache(int count)
+static void prune_dcache_sb(struct super_block *sb, int count)
{
+ if (count <= 0)
+ return;
+
spin_lock(&dcache_lock);
for (; count ; count--) {
struct dentry *dentry;
@@ -402,10 +409,10 @@ static void prune_dcache(int count)
cond_resched_lock(&dcache_lock);
- tmp = dentry_unused.prev;
- if (tmp == &dentry_unused)
+ tmp = sb->s_dentry_unused.prev;
+ if (tmp == &sb->s_dentry_unused)
break;
- prefetch(dentry_unused.prev);
+ prefetch(sb->s_dentry_unused.prev);
dentry = list_entry(tmp, struct dentry, d_lru);
dentry_stat_dec(dentry->d_sb, nr_unused);
list_del_init(&dentry->d_lru);
@@ -423,7 +430,7 @@ static void prune_dcache(int count)
/* If the dentry was recently referenced, don't free it. */
if (dentry->d_flags & DCACHE_REFERENCED) {
dentry->d_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
+ list_add(&dentry->d_lru, &sb->s_dentry_unused);
dentry_stat_inc(dentry->d_sb, nr_unused);
spin_unlock(&dentry->d_lock);
continue;
@@ -433,64 +440,57 @@ static void prune_dcache(int count)
spin_unlock(&dcache_lock);
}
-
-/*
- * parsing d_hash list does not hlist_for_each_entry_rcu() as it
- * done under dcache_lock.
+/**
+ * prune_dcache - shrink the dcache
+ * @count: number of entries to try and free
+ *
+ * Prune the dcache. This is done when we need
+ * more memory.
+ *
+ * This function may fail to free any resources if
+ * all the dentries are in use.
*/
-static void select_anon(struct super_block *sb)
+static void prune_dcache(int count)
{
- struct dentry *dentry;
- struct hlist_node *lp;
+ struct super_block *sb;
+ int unused = global_dentry_stat.nr_unused;
- spin_lock(&dcache_lock);
- hlist_for_each_entry(dentry, lp, &sb->s_anon, d_hash) {
- if (!list_empty(&dentry->d_lru)) {
- dentry_stat_dec(sb, nr_unused);
- list_del_init(&dentry->d_lru);
- }
+ if (count <= 0)
+ return;
- /*
- * move only zero ref count dentries to the beginning
- * (the most recent end) of the unused list
- */
- spin_lock(&dentry->d_lock);
- if (!atomic_read(&dentry->d_count)) {
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat_inc(sb, nr_unused);
+ spin_lock(&sb_lock);
+ restart:
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ if (down_read_trylock(&sb->s_umount)) {
+ if (sb->s_root) {
+ int tmp;
+
+ /*
+ * Try to be fair to the unused lists:
+ * sb_count/sb_unused ~ count/global_unused
+ *
+ * Additionally, if the age_limit of the
+ * superblock is expired shrink at least one
+ * dentry from the superblock
+ */
+ tmp = sb->s_dentry_stat.nr_unused /
+ ((unused / count) + 1);
+ if (!tmp && time_after(jiffies,
+ sb->s_dentry_unused_age))
+ tmp = 1;
+ prune_dcache_sb(sb, tmp);
+ }
+ up_read(&sb->s_umount);
}
- spin_unlock(&dentry->d_lock);
- }
- spin_unlock(&dcache_lock);
-}
-
-static void select_sb(struct super_block *sb)
-{
- struct dentry *dentry, *pos;
-
- spin_lock(&dcache_lock);
- list_for_each_entry_safe(dentry, pos, &dentry_unused, d_lru) {
- if (dentry->d_sb != sb)
- continue;
- list_del(&dentry->d_lru);
- list_add(&dentry->d_lru, &dentry_unused);
+ spin_lock(&sb_lock);
+ if (__put_super_and_need_restart(sb))
+ goto restart;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&sb_lock);
}
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list. On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them. The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
-
/**
* shrink_dcache_sb - shrink dcache for a superblock
* @sb: superblock
@@ -499,30 +499,16 @@ static void select_sb(struct super_block
* is used to free the dcache before unmounting a file
* system
*/
-
void shrink_dcache_sb(struct super_block * sb)
{
- struct list_head *tmp, *next;
- struct dentry *dentry;
-
- /*
- * Pass one ... move the dentries for the specified
- * superblock to the most recent end of the unused list.
- */
- select_anon(sb);
- select_sb(sb);
+ struct dentry *dentry, *pos;
- /*
- * Pass two ... free the dentries for this superblock.
- */
spin_lock(&dcache_lock);
repeat:
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
+ list_for_each_entry_safe(dentry, pos, &sb->s_dentry_unused, d_lru) {
+ BUG_ON(dentry->d_sb != sb);
dentry_stat_dec(sb, nr_unused);
- list_del_init(tmp);
+ list_del_init(&dentry->d_lru);
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count)) {
spin_unlock(&dentry->d_lock);
@@ -625,7 +611,7 @@ resume:
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add(&dentry->d_lru, dentry_unused.prev);
+ list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_unused);
dentry_stat_inc(dentry->d_sb, nr_unused);
found++;
}
@@ -671,7 +657,7 @@ void shrink_dcache_parent(struct dentry
int found;
while ((found = select_parent(parent)) != 0)
- prune_dcache(found);
+ prune_dcache_sb(parent->d_sb, found);
}
/*
@@ -1622,7 +1608,7 @@ resume:
list_del_init(&dentry->d_lru);
}
if (atomic_dec_and_test(&dentry->d_count)) {
- list_add(&dentry->d_lru, dentry_unused.prev);
+ list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_unused);
dentry_stat_inc(dentry->d_sb, nr_unused);
}
}
@@ -1633,7 +1619,7 @@ resume:
list_del_init(&this_parent->d_lru);
}
if (atomic_dec_and_test(&this_parent->d_count)) {
- list_add(&this_parent->d_lru, dentry_unused.prev);
+ list_add(&this_parent->d_lru, &this_parent->d_sb->s_dentry_unused);
dentry_stat_inc(this_parent->d_sb, nr_unused);
}
this_parent = this_parent->d_parent;
Index: work-2.6/fs/super.c
===================================================================
--- work-2.6.orig/fs/super.c
+++ work-2.6/fs/super.c
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(v
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
+ INIT_LIST_HEAD(&s->s_dentry_unused);
s->s_dentry_stat.age_limit = 45;
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
Index: work-2.6/include/linux/fs.h
===================================================================
--- work-2.6.orig/include/linux/fs.h
+++ work-2.6/include/linux/fs.h
@@ -847,7 +847,9 @@ struct super_block {
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_files;
+ struct list_head s_dentry_unused;
struct dentry_stat s_dentry_stat;
+ unsigned long s_dentry_unused_age;
struct block_device *s_bdev;
struct list_head s_instances;
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-01 9:51 [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) jblunck
` (4 preceding siblings ...)
2006-06-01 9:51 ` [patch 5/5] vfs: per superblock dentry unused list jblunck
@ 2006-06-02 1:06 ` Andrew Morton
2006-06-02 2:23 ` David Chinner
2006-06-05 1:30 ` Neil Brown
6 siblings, 1 reply; 37+ messages in thread
From: Andrew Morton @ 2006-06-02 1:06 UTC (permalink / raw)
To: jblunck; +Cc: linux-kernel, linux-fsdevel, viro, dgc, balbir
On Thu, 01 Jun 2006 11:51:25 +0200
jblunck@suse.de wrote:
> This is an attempt to have per-superblock unused dentry lists.
Fairly significant clashes with
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.17-rc5/2.6.17-rc5-mm2/broken-out/fix-dcache-race-during-umount.patch
I guess Neil's patch will go into the 2.6.18 tree, so you'd be best off
working against that.
Also, you're making what appears to be a quite deep design change to a
pretty important part of the memory reclaim code and all the info we have
is this:
+ /*
+ * Try to be fair to the unused lists:
+ * sb_count/sb_unused ~ count/global_unused
+ *
+ * Additionally, if the age_limit of the
+ * superblock is expired shrink at least one
+ * dentry from the superblock
+ */
+ tmp = sb->s_dentry_stat.nr_unused /
+ ((unused / count) + 1);
+ if (!tmp && time_after(jiffies,
+ sb->s_dentry_unused_age))
+ tmp = 1;
Please, we'll need much much more description of what this is trying to
achieve, why it exists, analysis, testing results, etc, etc. Coz my
immediate reaction is "wtf is that, and what will that do to my computer?".
In particular, `jiffies' has near-to-zero correlation with the rate of
creation and reclaim of these objects, so it looks highly inappropriate
that it's in there. If anything can be used to measure "time" in this code
it is the number of scanned entries, not jiffies.
But I cannot say more, because I do not know what that code is doing, nor
what problem it is trying to solve. The patch changelog would be an
appropriate place for that info ;)
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-02 1:06 ` [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) Andrew Morton
@ 2006-06-02 2:23 ` David Chinner
2006-06-02 2:49 ` Andrew Morton
0 siblings, 1 reply; 37+ messages in thread
From: David Chinner @ 2006-06-02 2:23 UTC (permalink / raw)
To: Andrew Morton; +Cc: jblunck, linux-kernel, linux-fsdevel, viro, dgc, balbir
On Thu, Jun 01, 2006 at 06:06:59PM -0700, Andrew Morton wrote:
> On Thu, 01 Jun 2006 11:51:25 +0200
> jblunck@suse.de wrote:
>
> > This is an attempt to have per-superblock unused dentry lists.
>
> Fairly significant clashes with
> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.17-rc5/2.6.17-rc5-mm2/broken-out/fix-dcache-race-during-umount.patch
>
> I guess Neil's patch will go into the 2.6.18 tree, so you'd be best off
> working against that.
Though this patch series fixes the same problem in a much cleaner
way. It effectively obsoletes Neil's fix.
> Also, you're making what appears to be a quite deep design change to a
> pretty important part of the memory reclaim code and all the info we have
> is this:
>
>
> + /*
> + * Try to be fair to the unused lists:
> + * sb_count/sb_unused ~ count/global_unused
> + *
> + * Additionally, if the age_limit of the
> + * superblock is expired shrink at least one
> + * dentry from the superblock
> + */
> + tmp = sb->s_dentry_stat.nr_unused /
> + ((unused / count) + 1);
> + if (!tmp && time_after(jiffies,
> + sb->s_dentry_unused_age))
> + tmp = 1;
>
>
> Please, we'll need much much more description of what this is trying to
> achieve, why it exists, analysis, testing results, etc, etc. Coz my
> immediate reaction is "wtf is that, and what will that do to my computer?".
Discussed in this thread:
http://marc.theaimsgroup.com/?l=linux-fsdevel&m=114890371801114&w=2
Short summary of the problem: due to SHRINK_BATCH resolution, a proportional
reclaim based on "count" across all superblocks will not shrink anything on
lists 2 orders of magnitude smaller than the longest list as tmp will evaluate
as zero. Hence to prevent small unused lists from never being reclaimed and
pinning memory until >90% of the dentry cache has been reclaimed we need to
turn them over slowly. However, if we turn them over too quickly, the dentry
cache does no caching for small filesystems.
This is not a problem a single global unused list has...
> In particular, `jiffies' has near-to-zero correlation with the rate of
> creation and reclaim of these objects, so it looks highly inappropriate
> that it's in there. If anything can be used to measure "time" in this code
> it is the number of scanned entries, not jiffies.
Sure, but SHRINK_BATCH resolution basically makes it impossible to reconcile
lists of vastly different lengths. If the shrinker simply called us
with the entire count it now hands us in batches, I doubt that this would be
an issue.
In the mean time, we need some other method to ensure we do eventually free
up these items on small lists. The above implements an idle timer used to
determine when we start to turn over a small cache. Maybe if we wrap it in:
> + if (!tmp && dentry_lru_idle(sb))
> + tmp = 1;
with a more appropriate comment it would make more sense?
Suggestions on other ways to resolve the problem are welcome....
Cheers,
Dave.
--
Dave Chinner
R&D Software Enginner
SGI Australian Software Group
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-02 2:23 ` David Chinner
@ 2006-06-02 2:49 ` Andrew Morton
2006-06-02 4:17 ` David Chinner
2006-06-02 15:33 ` Jan Blunck
0 siblings, 2 replies; 37+ messages in thread
From: Andrew Morton @ 2006-06-02 2:49 UTC (permalink / raw)
To: David Chinner; +Cc: jblunck, linux-kernel, linux-fsdevel, viro, dgc, balbir
On Fri, 2 Jun 2006 12:23:39 +1000
David Chinner <dgc@sgi.com> wrote:
> On Thu, Jun 01, 2006 at 06:06:59PM -0700, Andrew Morton wrote:
> > On Thu, 01 Jun 2006 11:51:25 +0200
> > jblunck@suse.de wrote:
> >
> > > This is an attempt to have per-superblock unused dentry lists.
> >
> > Fairly significant clashes with
> > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.17-rc5/2.6.17-rc5-mm2/broken-out/fix-dcache-race-during-umount.patch
> >
> > I guess Neil's patch will go into the 2.6.18 tree, so you'd be best off
> > working against that.
>
> Though this patch series fixes the same problem in a much cleaner
> way. It effectively obsoletes Neil's fix.
OK.
> > Also, you're making what appears to be a quite deep design change to a
> > pretty important part of the memory reclaim code and all the info we have
> > is this:
> >
> >
> > + /*
> > + * Try to be fair to the unused lists:
> > + * sb_count/sb_unused ~ count/global_unused
> > + *
> > + * Additionally, if the age_limit of the
> > + * superblock is expired shrink at least one
> > + * dentry from the superblock
> > + */
> > + tmp = sb->s_dentry_stat.nr_unused /
> > + ((unused / count) + 1);
> > + if (!tmp && time_after(jiffies,
> > + sb->s_dentry_unused_age))
> > + tmp = 1;
> >
> >
> > Please, we'll need much much more description of what this is trying to
> > achieve, why it exists, analysis, testing results, etc, etc. Coz my
> > immediate reaction is "wtf is that, and what will that do to my computer?".
>
> Discussed in this thread:
>
> http://marc.theaimsgroup.com/?l=linux-fsdevel&m=114890371801114&w=2
>
> Short summary of the problem: due to SHRINK_BATCH resolution, a proportional
> reclaim based on "count" across all superblocks will not shrink anything on
> lists 2 orders of magnitude smaller than the longest list as tmp will evaluate
> as zero. Hence to prevent small unused lists from never being reclaimed and
> pinning memory until >90% of the dentry cache has been reclaimed we need to
> turn them over slowly. However, if we turn them over too quickly, the dentry
> cache does no caching for small filesystems.
>
> This is not a problem a single global unused list has...
Reasonable. Whatever we do needs to be fully communicated in the comment
text please.
> > In particular, `jiffies' has near-to-zero correlation with the rate of
> > creation and reclaim of these objects, so it looks highly inappropriate
> > that it's in there. If anything can be used to measure "time" in this code
> > it is the number of scanned entries, not jiffies.
>
> Sure, but SHRINK_BATCH resolution basically makes it impossible to reconcile
> lists of vastly different lengths. If the shrinker simply called us
> with the entire count it now hands us in batches, I doubt that this would be
> an issue.
>
> In the mean time, we need some other method to ensure we do eventually free
> up these items on small lists. The above implements an idle timer used to
> determine when we start to turn over a small cache. Maybe if we wrap it in:
>
> > + if (!tmp && dentry_lru_idle(sb))
> > + tmp = 1;
>
> with a more appropriate comment it would make more sense?
>
> Suggestions on other ways to resolve the problem are welcome....
Don't do a divide?
sb->s_scan_count += count;
...
tmp = sb->s_dentry_stat.nr_unused /
(global_dentry_stat.nr_unused / sb->s_scan_count + 1);
if (tmp) {
sb->s_scan_count -= <can't be bothered doing the arith ;)>;
prune_dcache_sb(sb, tmp);
}
That could go weird on us if there are sudden swings in
sb->s_dentry_stat.nr_unused or global_dentry_stat.nr_unused, but
appropriate boundary checking should fix that?
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-02 2:49 ` Andrew Morton
@ 2006-06-02 4:17 ` David Chinner
2006-06-02 15:33 ` Jan Blunck
1 sibling, 0 replies; 37+ messages in thread
From: David Chinner @ 2006-06-02 4:17 UTC (permalink / raw)
To: Andrew Morton; +Cc: jblunck, linux-kernel, linux-fsdevel, viro, balbir
On Thu, Jun 01, 2006 at 07:49:12PM -0700, Andrew Morton wrote:
> On Fri, 2 Jun 2006 12:23:39 +1000 David Chinner <dgc@sgi.com> wrote:
> > On Thu, Jun 01, 2006 at 06:06:59PM -0700, Andrew Morton wrote:
> > > Please, we'll need much much more description of what this is trying to
> > > achieve, why it exists, analysis, testing results, etc, etc. Coz my
> > > immediate reaction is "wtf is that, and what will that do to my computer?".
> >
> > Discussed in this thread:
> >
> > http://marc.theaimsgroup.com/?l=linux-fsdevel&m=114890371801114&w=2
> >
> > Short summary of the problem: due to SHRINK_BATCH resolution, a proportional
> > reclaim based on "count" across all superblocks will not shrink anything on
> > lists 2 orders of magnitude smaller than the longest list as tmp will evaluate
> > as zero. Hence to prevent small unused lists from never being reclaimed and
> > pinning memory until >90% of the dentry cache has been reclaimed we need to
> > turn them over slowly. However, if we turn them over too quickly, the dentry
> > cache does no caching for small filesystems.
> >
> > This is not a problem a single global unused list has...
>
> Reasonable. Whatever we do needs to be fully communicated in the comment
> text please.
*nod*
> > > In particular, `jiffies' has near-to-zero correlation with the rate of
> > > creation and reclaim of these objects, so it looks highly inappropriate
> > > that it's in there. If anything can be used to measure "time" in this code
> > > it is the number of scanned entries, not jiffies.
> >
> > Sure, but SHRINK_BATCH resolution basically makes it impossible to reconcile
> > lists of vastly different lengths. If the shrinker simply called us
> > with the entire count it now hands us in batches, I doubt that this would be
> > an issue.
> >
> > In the mean time, we need some other method to ensure we do eventually free
> > up these items on small lists. The above implements an idle timer used to
> > determine when we start to turn over a small cache. Maybe if we wrap it in:
> >
> > > + if (!tmp && dentry_lru_idle(sb))
> > > + tmp = 1;
> >
> > with a more appropriate comment it would make more sense?
> >
> > Suggestions on other ways to resolve the problem are welcome....
>
> Don't do a divide?
>
> sb->s_scan_count += count;
> ...
> tmp = sb->s_dentry_stat.nr_unused /
> (global_dentry_stat.nr_unused / sb->s_scan_count + 1);
> if (tmp) {
> sb->s_scan_count -= <can't be bothered doing the arith ;)>;
> prune_dcache_sb(sb, tmp);
> }
>
> That could go weird on us if there are sudden swings in
> sb->s_dentry_stat.nr_unused or global_dentry_stat.nr_unused, but
> appropriate boundary checking should fix that?
Hmm - I'll have to run some numbers through this what the curves
look like and determine what we need to do on the decrement. The
count accumulation does solve the resolution problem, though...
Thanks, Andrew.
Cheers,
Dave.
--
Dave Chinner
R&D Software Enginner
SGI Australian Software Group
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-02 2:49 ` Andrew Morton
2006-06-02 4:17 ` David Chinner
@ 2006-06-02 15:33 ` Jan Blunck
1 sibling, 0 replies; 37+ messages in thread
From: Jan Blunck @ 2006-06-02 15:33 UTC (permalink / raw)
To: Andrew Morton; +Cc: David Chinner, linux-kernel, linux-fsdevel, viro, balbir
On Thu, Jun 01, Andrew Morton wrote:
> > Discussed in this thread:
> >
> > http://marc.theaimsgroup.com/?l=linux-fsdevel&m=114890371801114&w=2
> >
> > Short summary of the problem: due to SHRINK_BATCH resolution, a proportional
> > reclaim based on "count" across all superblocks will not shrink anything on
> > lists 2 orders of magnitude smaller than the longest list as tmp will evaluate
> > as zero. Hence to prevent small unused lists from never being reclaimed and
> > pinning memory until >90% of the dentry cache has been reclaimed we need to
> > turn them over slowly. However, if we turn them over too quickly, the dentry
> > cache does no caching for small filesystems.
> >
> > This is not a problem a single global unused list has...
>
> Reasonable. Whatever we do needs to be fully communicated in the comment
> text please.
>
Yes, you are right. As I expected that this isn't the final patch I was a
little bit too lazy. Will do that for the next version.
> > > In particular, `jiffies' has near-to-zero correlation with the rate of
> > > creation and reclaim of these objects, so it looks highly inappropriate
> > > that it's in there. If anything can be used to measure "time" in this code
> > > it is the number of scanned entries, not jiffies.
Ouch! Totally missed that. The measurement should be kind of round-based
instead.
> Don't do a divide?
>
> sb->s_scan_count += count;
> ...
> tmp = sb->s_dentry_stat.nr_unused /
> (global_dentry_stat.nr_unused / sb->s_scan_count + 1);
> if (tmp) {
> sb->s_scan_count -= <can't be bothered doing the arith ;)>;
> prune_dcache_sb(sb, tmp);
> }
>
> That could go weird on us if there are sudden swings in
> sb->s_dentry_stat.nr_unused or global_dentry_stat.nr_unused, but
> appropriate boundary checking should fix that?
if (tmp) {
sb->s_scan_count -= count;
sb->s_scan_count -= sb->s_scan_count ? min(sb->s_scan_count, count/2) : 0;
prune_dcache_sb(sb, tmp);
}
if (!sb->s_dentry_stat.nr_unused)
sb->s_scan_count = 0;
In a normal situations, s_scan_count should be zero (add count and subtract it
again).
s_scan_count is increasing when we don't prune anything from that
superblock. If we finally reach the point where the s_scan_count is that high
that we actually prune some dentries, we slowly (count/2) decrease the
s_scan_count level again.
If the superblock doesn't have any unused dentries we reset the s_scan_count to
zero.
So s_scan_count is some kind of badness counter. I hope that this will still
be good enough for you, David.
Jan
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-01 9:51 [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) jblunck
` (5 preceding siblings ...)
2006-06-02 1:06 ` [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version) Andrew Morton
@ 2006-06-05 1:30 ` Neil Brown
2006-06-16 15:51 ` Jan Blunck
6 siblings, 1 reply; 37+ messages in thread
From: Neil Brown @ 2006-06-05 1:30 UTC (permalink / raw)
To: jblunck; +Cc: linux-kernel, linux-fsdevel, akpm, viro, dgc, balbir
On Thursday June 1, jblunck@suse.de wrote:
> This patch series is an updated version of my original patches.
>
> This is an attempt to have per-superblock unused dentry lists. Since dentries
> are lazy-removed from the unused list, one big list doesn't scale very good
> wrt systems with a hugh dentry cache. The dcache shrinkers spend a long time
> traversing the list under the dcache spinlock.
>
> The patches introduce an additional list_head per superblock holding only the
> dentries of the specific superblock. The next dentry can be found quickly so
> the shrinkers don't need to hold the dcache lock for long.
>
> One nice side-effect: the "busy inodes after unmount" race is fixed because
> prune_dcache() is getting the s_umount lock before it starts working on the
> superblock's dentries.
>
> Comments?
I'm wondering if we need to be as intrusive as this patch is, though
I'm not 100% certain of the details of the performance problems that
you are fixing, so I could be wrong...
It seems to me that the 'real' problem is that prune_dcache is used in
two very different circumstances.
Firstly, it is used by shrink_dcache_memory where we simply want to
free the 'n' oldest entries. We don't care which filesystem they
belong to.
Secondly, it is used by shrink_dcache_parent and shrink_dcache_anon
when we want to free some specific dentries. This is done by moving
those dentries to the 'old' end of the list and then calling
prune_dcache.
I understand that this is where problem is because the selected
dentries don't stay at the end of the list very long in some
circumstances. In particular, other filesystems' dentries get mixed
in.
You have addressed this by having multiple unused lists so the
dentries of other filesystems don't get mixed in.
It seems to me that an alternate approach would be:
- get select_parent and shrink_dcache_anon to move the selected
dentries on to some new list.
- pass this list to prune_dcache
- splice any remainder back on to the global list when finished.
This would remove the noise from other filesystems that is causing the
problems, without creating the need to try to shrink multiple lists in
parallel which - due to significantly different sizes - seems to be a
problem.
Following is a patch to try to spell out more clearly what I mean. It
compiles, but has not been tested (or even very thoroughly reviewed).
Would this address the problem that you are measuring?
NeilBrown
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./fs/dcache.c | 56 ++++++++++++++++++++++++++++++--------------------------
1 file changed, 30 insertions(+), 26 deletions(-)
diff ./fs/dcache.c~current~ ./fs/dcache.c
--- ./fs/dcache.c~current~ 2006-06-05 11:07:53.000000000 +1000
+++ ./fs/dcache.c 2006-06-05 11:29:20.000000000 +1000
@@ -383,8 +383,8 @@ static void prune_one_dentry(struct dent
/**
* prune_dcache - shrink the dcache
* @count: number of entries to try and free
- * @sb: if given, ignore dentries for other superblocks
- * which are being unmounted.
+ * @list: If given, remove from this list instead of
+ * from dentry_unused.
*
* Shrink the dcache. This is done when we need
* more memory, or simply when we need to unmount
@@ -393,11 +393,23 @@ static void prune_one_dentry(struct dent
*
* This function may fail to free any resources if
* all the dentries are in use.
+ *
+ * Any dentries that were not removed due to the @count
+ * limit will be splice on to the end of dentry_unused,
+ * so they should already be founded in dentry_stat.nr_unused.
*/
-static void prune_dcache(int count, struct super_block *sb)
+static void prune_dcache(int count, struct list_head *list)
{
+ int have_list = list != NULL;
+ struct list_head alt_head;
spin_lock(&dcache_lock);
+ if (list == NULL) {
+ /* use the dentry_unused list */
+ list_add(&alt_head, &dentry_unused);
+ list_del_init(&dentry_unused);
+ list = &alt_head;
+ }
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
@@ -405,23 +417,11 @@ static void prune_dcache(int count, stru
cond_resched_lock(&dcache_lock);
- tmp = dentry_unused.prev;
- if (unlikely(sb)) {
- /* Try to find a dentry for this sb, but don't try
- * too hard, if they aren't near the tail they will
- * be moved down again soon
- */
- int skip = count;
- while (skip && tmp != &dentry_unused &&
- list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
- skip--;
- tmp = tmp->prev;
- }
- }
- if (tmp == &dentry_unused)
+ tmp = list->prev;
+ if (tmp == list)
break;
list_del_init(tmp);
- prefetch(dentry_unused.prev);
+ prefetch(list->prev);
dentry_stat.nr_unused--;
dentry = list_entry(tmp, struct dentry, d_lru);
@@ -454,7 +454,7 @@ static void prune_dcache(int count, stru
* If this dentry is for "my" filesystem, then I can prune it
* without taking the s_umount lock (I already hold it).
*/
- if (sb && dentry->d_sb == sb) {
+ if (have_list) {
prune_one_dentry(dentry);
continue;
}
@@ -489,6 +489,8 @@ static void prune_dcache(int count, stru
*/
break;
}
+ /* split any remaining entries back onto dentry_unused */
+ list_splice(list, dentry_unused.prev);
spin_unlock(&dcache_lock);
}
@@ -607,7 +609,7 @@ positive:
/*
* Search the dentry child list for the specified parent,
- * and move any unused dentries to the end of the unused
+ * and move any unused dentries to the end of a new unused
* list for prune_dcache(). We descend to the next level
* whenever the d_subdirs list is non-empty and continue
* searching.
@@ -619,7 +621,7 @@ positive:
* drop the lock and return early due to latency
* constraints.
*/
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry * parent, struct list_head *new)
{
struct dentry *this_parent = parent;
struct list_head *next;
@@ -643,7 +645,7 @@ resume:
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add_tail(&dentry->d_lru, &dentry_unused);
+ list_add_tail(&dentry->d_lru, new);
dentry_stat.nr_unused++;
found++;
}
@@ -687,9 +689,10 @@ out:
void shrink_dcache_parent(struct dentry * parent)
{
int found;
+ LIST_HEAD(list);
- while ((found = select_parent(parent)) != 0)
- prune_dcache(found, parent->d_sb);
+ while ((found = select_parent(parent, &list)) != 0)
+ prune_dcache(found, &list);
}
/**
@@ -708,6 +711,7 @@ void shrink_dcache_anon(struct super_blo
struct hlist_head *head = &sb->s_anon;
int found;
do {
+ LIST_HEAD(list);
found = 0;
spin_lock(&dcache_lock);
hlist_for_each(lp, head) {
@@ -722,13 +726,13 @@ void shrink_dcache_anon(struct super_blo
* of the unused list for prune_dcache
*/
if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
+ list_add_tail(&this->d_lru, &list);
dentry_stat.nr_unused++;
found++;
}
}
spin_unlock(&dcache_lock);
- prune_dcache(found, sb);
+ prune_dcache(found, &list);
} while(found);
}
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-05 1:30 ` Neil Brown
@ 2006-06-16 15:51 ` Jan Blunck
2006-06-16 22:25 ` Neil Brown
0 siblings, 1 reply; 37+ messages in thread
From: Jan Blunck @ 2006-06-16 15:51 UTC (permalink / raw)
To: Neil Brown; +Cc: linux-kernel, linux-fsdevel, akpm, viro, dgc, balbir
On Mon, Jun 05, Neil Brown wrote:
> I understand that this is where problem is because the selected
> dentries don't stay at the end of the list very long in some
> circumstances. In particular, other filesystems' dentries get mixed
> in.
No. The problem is that the LRU list is too long and therefore unmounting
seems to take ages.
> You have addressed this by having multiple unused lists so the
> dentries of other filesystems don't get mixed in.
>
> It seems to me that an alternate approach would be:
>
> - get select_parent and shrink_dcache_anon to move the selected
> dentries on to some new list.
> - pass this list to prune_dcache
> - splice any remainder back on to the global list when finished.
I had this idea too. At least select_parent could use something like that. But
that wouldn't help in the umount situation.
Jan
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-16 15:51 ` Jan Blunck
@ 2006-06-16 22:25 ` Neil Brown
2006-06-18 23:56 ` David Chinner
2006-06-19 9:34 ` Jan Blunck
0 siblings, 2 replies; 37+ messages in thread
From: Neil Brown @ 2006-06-16 22:25 UTC (permalink / raw)
To: Jan Blunck; +Cc: linux-kernel, linux-fsdevel, akpm, viro, dgc, balbir
On Friday June 16, jblunck@suse.de wrote:
> On Mon, Jun 05, Neil Brown wrote:
>
> > I understand that this is where problem is because the selected
> > dentries don't stay at the end of the list very long in some
> > circumstances. In particular, other filesystems' dentries get mixed
> > in.
>
> No. The problem is that the LRU list is too long and therefore unmounting
> seems to take ages.
>
But I cannot see that the whole LRU list needs to be scanned during
unmount.
The only thing that does that is shrink_dcache_sb, which is used:
in do_remount_sb
in __invalidate_device
in a few filesystems (autofs, coda, smbfs)
and not when unmounting the filesystem (despite the comment).
(This is in 2.6.17-ec6-mm2).
I can see that shrink_dcache_sb could take a long time and should be
fixed, which should be as simple as replacing it with
shrink_dcache_parent; shrink_dcache_anon.
But I'm still puzzled as to why a long dcache LRU slows down
unmounting.
Can you give more details?
Thanks,
NeilBrown
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-16 22:25 ` Neil Brown
@ 2006-06-18 23:56 ` David Chinner
2006-06-19 0:27 ` Neil Brown
2006-06-19 9:34 ` Jan Blunck
1 sibling, 1 reply; 37+ messages in thread
From: David Chinner @ 2006-06-18 23:56 UTC (permalink / raw)
To: Neil Brown
Cc: Jan Blunck, linux-kernel, linux-fsdevel, akpm, viro, dgc, balbir
On Sat, Jun 17, 2006 at 08:25:14AM +1000, Neil Brown wrote:
> On Friday June 16, jblunck@suse.de wrote:
> > On Mon, Jun 05, Neil Brown wrote:
> >
> > > I understand that this is where problem is because the selected
> > > dentries don't stay at the end of the list very long in some
> > > circumstances. In particular, other filesystems' dentries get mixed
> > > in.
> >
> > No. The problem is that the LRU list is too long and therefore unmounting
> > seems to take ages.
> >
>
> But I cannot see that the whole LRU list needs to be scanned during
> unmount.
...
> I can see that shrink_dcache_sb could take a long time and should be
> fixed, which should be as simple as replacing it with
> shrink_dcache_parent; shrink_dcache_anon.
But these are not guaranteed to reclaim all the dentries from a given
superblock. Yes, they move the dentries to the LRU, but other activity in the
system means that they may not get reclaimed during the subsequent calls
to prune_dcache() and hence they may live beyond the unmount....
> But I'm still puzzled as to why a long dcache LRU slows down
> unmounting.
>
> Can you give more details?
It's not the unmount that slows down - it's the fact that the dcache lock
is held for so long that rest of the system halts for time it takes
to run shrink_dcache_sb(). We've seen up to 50s to do a (touch fred; rm fred)
when the LRU has grown to several million dentries and shrink_dcache_sb()
is running. When this happens, it's not uncommon to see every CPU in the
machine spinning on the dcache_lock...
Cheers,
Dave.
--
Dave Chinner
Principal Engineer
SGI Australian Software Group
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-18 23:56 ` David Chinner
@ 2006-06-19 0:27 ` Neil Brown
2006-06-19 1:00 ` David Chinner
0 siblings, 1 reply; 37+ messages in thread
From: Neil Brown @ 2006-06-19 0:27 UTC (permalink / raw)
To: David Chinner; +Cc: Jan Blunck, linux-kernel, linux-fsdevel, akpm, viro, balbir
On Monday June 19, dgc@sgi.com wrote:
>
> > I can see that shrink_dcache_sb could take a long time and should be
> > fixed, which should be as simple as replacing it with
> > shrink_dcache_parent; shrink_dcache_anon.
>
> But these are not guaranteed to reclaim all the dentries from a given
> superblock. Yes, they move the dentries to the LRU, but other activity in the
> system means that they may not get reclaimed during the subsequent calls
> to prune_dcache() and hence they may live beyond the unmount....
>
My proposed patch earlier in this thread (I can post it again if you
like) addresses exactly this issue. Instead of moving dentries to the
global LRU, it moves them to a private LRU, and the calls prune_dcache
on that. So there is no room for other activity to get in the way of
prune_dcache doing what needs to be done.
I agree that using a single big LRU for everything doesn't work. I
just don't think we need (or want) separate LRUs for each superblock.
Rather we want separate temporary LRUs just for use when unmounting.
> > But I'm still puzzled as to why a long dcache LRU slows down
> > unmounting.
> >
> > Can you give more details?
>
> It's not the unmount that slows down - it's the fact that the dcache lock
> is held for so long that rest of the system halts for time it takes
> to run shrink_dcache_sb(). We've seen up to 50s to do a (touch fred; rm fred)
> when the LRU has grown to several million dentries and shrink_dcache_sb()
> is running. When this happens, it's not uncommon to see every CPU in the
> machine spinning on the dcache_lock...
Definitely a problem.
Maybe it was hoped that the call to cond_resched_lock(&dcache_lock)
would avoid this, but apparently not.
I still maintain that we should replace shrink_dcache_sb with calls to
shrink_dcache_anon and shrink_dcache_parent. That, together with my
previous patch, should fix this problem quite cleanly. If I send you
a combined patch against the latest -mm can you test?
Thanks,
NeilBrown
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 0:27 ` Neil Brown
@ 2006-06-19 1:00 ` David Chinner
2006-06-19 1:21 ` Neil Brown
0 siblings, 1 reply; 37+ messages in thread
From: David Chinner @ 2006-06-19 1:00 UTC (permalink / raw)
To: Neil Brown; +Cc: Jan Blunck, linux-kernel, linux-fsdevel, akpm, viro, balbir
On Mon, Jun 19, 2006 at 10:27:39AM +1000, Neil Brown wrote:
> On Monday June 19, dgc@sgi.com wrote:
> >
> > > I can see that shrink_dcache_sb could take a long time and should be
> > > fixed, which should be as simple as replacing it with
> > > shrink_dcache_parent; shrink_dcache_anon.
> >
> > But these are not guaranteed to reclaim all the dentries from a given
> > superblock. Yes, they move the dentries to the LRU, but other activity in the
> > system means that they may not get reclaimed during the subsequent calls
> > to prune_dcache() and hence they may live beyond the unmount....
> >
>
> My proposed patch earlier in this thread (I can post it again if you
> like) addresses exactly this issue. Instead of moving dentries to the
> global LRU, it moves them to a private LRU, and the calls prune_dcache
> on that. So there is no room for other activity to get in the way of
> prune_dcache doing what needs to be done.
Ok. That sounds like it would work.
> > > But I'm still puzzled as to why a long dcache LRU slows down
> > > unmounting.
> > >
> > > Can you give more details?
> >
> > It's not the unmount that slows down - it's the fact that the dcache lock
> > is held for so long that rest of the system halts for time it takes
> > to run shrink_dcache_sb(). We've seen up to 50s to do a (touch fred; rm fred)
> > when the LRU has grown to several million dentries and shrink_dcache_sb()
> > is running. When this happens, it's not uncommon to see every CPU in the
> > machine spinning on the dcache_lock...
>
> Definitely a problem.
> Maybe it was hoped that the call to cond_resched_lock(&dcache_lock)
> would avoid this, but apparently not.
The first pass over the LRU doesn't even do that....
> I still maintain that we should replace shrink_dcache_sb with calls to
> shrink_dcache_anon and shrink_dcache_parent. That, together with my
> previous patch, should fix this problem quite cleanly. If I send you
> a combined patch against the latest -mm can you test?
Ok. Send me the patch and I'll try to get some tests done on it...
Cheers,
Dave.
--
Dave Chinner
Principal Engineer
SGI Australian Software Group
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 1:00 ` David Chinner
@ 2006-06-19 1:21 ` Neil Brown
2006-06-19 2:04 ` Andrew Morton
2006-06-19 5:55 ` David Chinner
0 siblings, 2 replies; 37+ messages in thread
From: Neil Brown @ 2006-06-19 1:21 UTC (permalink / raw)
To: David Chinner; +Cc: Jan Blunck, linux-kernel, linux-fsdevel, akpm, viro, balbir
On Monday June 19, dgc@sgi.com wrote:
>
> Ok. Send me the patch and I'll try to get some tests done on it...
>
Below, thanks.
NeilBrown
-------------------------------------
Reduce contention in dentry_unused when unmounting.
When we unmount a filesystem we need to release all dentries.
We currently
- move a collection of dentries to the end of the dentry_unused list
- call prune_dcache to prune that number of dentries.
If lots of other dentries are added to the end of the list while
the prune_dcache proceeds (e.g. another filesystem is unmounted),
this can involve a lot of wasted time wandering through the
list looking for dentries that we had previously found.
This patch allows the dentry_unused list to temporarily be multiple
lists.
When unmounting, dentries that are found to require pruning are moved
to a temporary list, but accounted as though they were on dentry_unused.
Then this list is passed to prune_dcache for freeing. Any entries
that are not pruned for whatever reason are added to the end of
dentry_unused.
Also change shrink_dcache_sb to simply call shrink_dcache_parent and
shrink_dcache_anon. This avoids a long walk of the LRU.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./fs/dcache.c | 111 ++++++++++++++++++----------------------------------------
1 file changed, 35 insertions(+), 76 deletions(-)
diff .prev/fs/dcache.c ./fs/dcache.c
--- .prev/fs/dcache.c 2006-06-19 11:14:02.000000000 +1000
+++ ./fs/dcache.c 2006-06-19 11:19:29.000000000 +1000
@@ -383,8 +383,8 @@ static void prune_one_dentry(struct dent
/**
* prune_dcache - shrink the dcache
* @count: number of entries to try and free
- * @sb: if given, ignore dentries for other superblocks
- * which are being unmounted.
+ * @list: If given, remove from this list instead of
+ * from dentry_unused.
*
* Shrink the dcache. This is done when we need
* more memory, or simply when we need to unmount
@@ -393,11 +393,23 @@ static void prune_one_dentry(struct dent
*
* This function may fail to free any resources if
* all the dentries are in use.
+ *
+ * Any dentries that were not removed due to the @count
+ * limit will be splice on to the end of dentry_unused,
+ * so they should already be founded in dentry_stat.nr_unused.
*/
-static void prune_dcache(int count, struct super_block *sb)
+static void prune_dcache(int count, struct list_head *list)
{
+ int have_list = list != NULL;
+ struct list_head alt_head;
spin_lock(&dcache_lock);
+ if (list == NULL) {
+ /* use the dentry_unused list */
+ list_add(&alt_head, &dentry_unused);
+ list_del_init(&dentry_unused);
+ list = &alt_head;
+ }
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
@@ -405,23 +417,11 @@ static void prune_dcache(int count, stru
cond_resched_lock(&dcache_lock);
- tmp = dentry_unused.prev;
- if (unlikely(sb)) {
- /* Try to find a dentry for this sb, but don't try
- * too hard, if they aren't near the tail they will
- * be moved down again soon
- */
- int skip = count;
- while (skip && tmp != &dentry_unused &&
- list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
- skip--;
- tmp = tmp->prev;
- }
- }
- if (tmp == &dentry_unused)
+ tmp = list->prev;
+ if (tmp == list)
break;
list_del_init(tmp);
- prefetch(dentry_unused.prev);
+ prefetch(list->prev);
dentry_stat.nr_unused--;
dentry = list_entry(tmp, struct dentry, d_lru);
@@ -454,7 +454,7 @@ static void prune_dcache(int count, stru
* If this dentry is for "my" filesystem, then I can prune it
* without taking the s_umount lock (I already hold it).
*/
- if (sb && dentry->d_sb == sb) {
+ if (have_list) {
prune_one_dentry(dentry);
continue;
}
@@ -489,68 +489,25 @@ static void prune_dcache(int count, stru
*/
break;
}
+ /* split any remaining entries back onto dentry_unused */
+ list_splice(list, dentry_unused.prev);
spin_unlock(&dcache_lock);
}
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list. On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them. The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
-
/**
* shrink_dcache_sb - shrink dcache for a superblock
* @sb: superblock
*
* Shrink the dcache for the specified super block. This
- * is used to free the dcache before unmounting a file
- * system
+ * is used to reduce the dcache presence of a file system
+ * before re-mounting, and when invalidating the device
+ * holding a file system.
*/
void shrink_dcache_sb(struct super_block * sb)
{
- struct list_head *tmp, *next;
- struct dentry *dentry;
-
- /*
- * Pass one ... move the dentries for the specified
- * superblock to the most recent end of the unused list.
- */
- spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- list_move(tmp, &dentry_unused);
- }
-
- /*
- * Pass two ... free the dentries for this superblock.
- */
-repeat:
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- dentry_stat.nr_unused--;
- list_del_init(tmp);
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- prune_one_dentry(dentry);
- cond_resched_lock(&dcache_lock);
- goto repeat;
- }
- spin_unlock(&dcache_lock);
+ shrink_dcache_parent(sb->s_root);
+ shrink_dcache_anon(sb);
}
/*
@@ -607,7 +564,7 @@ positive:
/*
* Search the dentry child list for the specified parent,
- * and move any unused dentries to the end of the unused
+ * and move any unused dentries to the end of a new unused
* list for prune_dcache(). We descend to the next level
* whenever the d_subdirs list is non-empty and continue
* searching.
@@ -619,7 +576,7 @@ positive:
* drop the lock and return early due to latency
* constraints.
*/
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry * parent, struct list_head *new)
{
struct dentry *this_parent = parent;
struct list_head *next;
@@ -643,7 +600,7 @@ resume:
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add_tail(&dentry->d_lru, &dentry_unused);
+ list_add_tail(&dentry->d_lru, new);
dentry_stat.nr_unused++;
found++;
}
@@ -687,9 +644,10 @@ out:
void shrink_dcache_parent(struct dentry * parent)
{
int found;
+ LIST_HEAD(list);
- while ((found = select_parent(parent)) != 0)
- prune_dcache(found, parent->d_sb);
+ while ((found = select_parent(parent, &list)) != 0)
+ prune_dcache(found, &list);
}
/**
@@ -708,6 +666,7 @@ void shrink_dcache_anon(struct super_blo
struct hlist_head *head = &sb->s_anon;
int found;
do {
+ LIST_HEAD(list);
found = 0;
spin_lock(&dcache_lock);
hlist_for_each(lp, head) {
@@ -722,13 +681,13 @@ void shrink_dcache_anon(struct super_blo
* of the unused list for prune_dcache
*/
if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
+ list_add_tail(&this->d_lru, &list);
dentry_stat.nr_unused++;
found++;
}
}
spin_unlock(&dcache_lock);
- prune_dcache(found, sb);
+ prune_dcache(found, &list);
} while(found);
}
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 1:21 ` Neil Brown
@ 2006-06-19 2:04 ` Andrew Morton
2006-06-19 2:25 ` Neil Brown
2006-06-19 5:55 ` David Chinner
1 sibling, 1 reply; 37+ messages in thread
From: Andrew Morton @ 2006-06-19 2:04 UTC (permalink / raw)
To: Neil Brown; +Cc: dgc, jblunck, linux-kernel, linux-fsdevel, viro, balbir
On Mon, 19 Jun 2006 11:21:04 +1000
Neil Brown <neilb@suse.de> wrote:
> static void prune_dcache(int count, struct super_block *sb)
> +static void prune_dcache(int count, struct list_head *list)
> {
> + int have_list = list != NULL;
> + struct list_head alt_head;
> spin_lock(&dcache_lock);
> + if (list == NULL) {
> + /* use the dentry_unused list */
> + list_add(&alt_head, &dentry_unused);
> + list_del_init(&dentry_unused);
> + list = &alt_head;
> + }
This will make dentry_unused appear to be empty.
> for (; count ; count--) {
> struct dentry *dentry;
> struct list_head *tmp;
> @@ -405,23 +417,11 @@ static void prune_dcache(int count, stru
>
> cond_resched_lock(&dcache_lock);
And then it makes that apparent-emptiness globally visible.
Won't this cause concurrent unmounting or memory shrinking to malfunction?
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 2:04 ` Andrew Morton
@ 2006-06-19 2:25 ` Neil Brown
0 siblings, 0 replies; 37+ messages in thread
From: Neil Brown @ 2006-06-19 2:25 UTC (permalink / raw)
To: Andrew Morton; +Cc: dgc, jblunck, linux-kernel, linux-fsdevel, viro, balbir
On Sunday June 18, akpm@osdl.org wrote:
> On Mon, 19 Jun 2006 11:21:04 +1000
> Neil Brown <neilb@suse.de> wrote:
>
> > static void prune_dcache(int count, struct super_block *sb)
> > +static void prune_dcache(int count, struct list_head *list)
> > {
> > + int have_list = list != NULL;
> > + struct list_head alt_head;
> > spin_lock(&dcache_lock);
> > + if (list == NULL) {
> > + /* use the dentry_unused list */
> > + list_add(&alt_head, &dentry_unused);
> > + list_del_init(&dentry_unused);
> > + list = &alt_head;
> > + }
>
> This will make dentry_unused appear to be empty.
>
Yep. Appear.
> > for (; count ; count--) {
> > struct dentry *dentry;
> > struct list_head *tmp;
> > @@ -405,23 +417,11 @@ static void prune_dcache(int count, stru
> >
> > cond_resched_lock(&dcache_lock);
>
> And then it makes that apparent-emptiness globally visible.
>
But who will look? and will they care?
> Won't this cause concurrent unmounting or memory shrinking to malfunction?
I don't think so.
Unmounting always passes a list to prune_dcache, so dentry_unused
doesn't get emptied, so shrink_dcache_memory will not get confused.
If there are two concurrent calls to shrink_dcache_memory, then one
will find an empty list and do nothing, and it appears this is
possible - there is no locking between callers to shrink_slab.
That's probably not fatal, but it isn't ideal (I expect....).
I guess I don't need to create a separate list. It seemed cleaner but
does have this awkwardness.
The following patch on top of the previous one changes that behaviour.
I'm wondering now if maybe we should really have two different
'prune_dcache' functions.
One that works on a private list and doesn't bother with
DCACHE_REFERENCED or s_umount, and one that works on the global list
and does the awkward stuff. I might try that later and see what it
looks like.
NeilBrown
### Diffstat output
./fs/dcache.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff .prev/fs/dcache.c ./fs/dcache.c
--- .prev/fs/dcache.c 2006-06-19 11:19:29.000000000 +1000
+++ ./fs/dcache.c 2006-06-19 12:20:49.000000000 +1000
@@ -404,12 +404,10 @@ static void prune_dcache(int count, stru
int have_list = list != NULL;
struct list_head alt_head;
spin_lock(&dcache_lock);
- if (list == NULL) {
+ if (list == NULL)
/* use the dentry_unused list */
- list_add(&alt_head, &dentry_unused);
- list_del_init(&dentry_unused);
- list = &alt_head;
- }
+ list = &dentry_unused;
+
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
@@ -490,7 +488,8 @@ static void prune_dcache(int count, stru
break;
}
/* split any remaining entries back onto dentry_unused */
- list_splice(list, dentry_unused.prev);
+ if (have_list)
+ list_splice(list, dentry_unused.prev);
spin_unlock(&dcache_lock);
}
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 1:21 ` Neil Brown
2006-06-19 2:04 ` Andrew Morton
@ 2006-06-19 5:55 ` David Chinner
2006-06-19 6:33 ` Andrew Morton
1 sibling, 1 reply; 37+ messages in thread
From: David Chinner @ 2006-06-19 5:55 UTC (permalink / raw)
To: Neil Brown; +Cc: Jan Blunck, linux-kernel, linux-fsdevel, akpm, viro, balbir
On Mon, Jun 19, 2006 at 11:21:04AM +1000, Neil Brown wrote:
> On Monday June 19, dgc@sgi.com wrote:
> >
> > Ok. Send me the patch and I'll try to get some tests done on it...
>
> Below, thanks.
Neil,
I doubt I'm going to be able to run any tests for this patch on the
current -mm (2.6.17-rc6-mm2) any time soon. I get an interrupt
related BUG on boot in the sn2 console driver, and then a panic in
the scsi I/o completion code when running mkfs.xfs. Both are
reproducable.
I don't have any other non-sn2 machine with enough memory in it to
test the patch....
The boot warnings:
....
eth3 device: S2io Inc. Xframe 10 Gigabit Ethernet PCI-X (rev 03)
eth3 configuration: eth-id-00:0c:fc:00:02:c8
irq 60, desc: a0000001009a2d00, depth: 1, count: 0, unhandled: 0
->handle_irq(): 0000000000000000, 0x0
->chip(): a000000100a0fe40, irq_type_sn+0x0/0x80
->action(): e00000b007471b80
->action->handler(): a0000002059373d0, s2io_msi_handle+0x1510/0x660 [s2io] eth3
IP address: 192.168.1.248/24
Unexpected irq vector 0x3c on CPU 3!
BUG: warning at drivers/serial/sn_console.c:976/sn_sal_console_write()
Call Trace:
[<a0000001000125a0>] show_stack+0x40/0xa0
sp=e0000039ee2f7970 bsp=e0000039ee2f14b0
[<a000000100012e30>] dump_stack+0x30/0x60
sp=e0000039ee2f7b40 bsp=e0000039ee2f1498
[<a0000001004e5350>] sn_sal_console_write+0x270/0x3e0
sp=e0000039ee2f7b40 bsp=e0000039ee2f1420
[<a0000001000a1d40>] __call_console_drivers+0x160/0x1c0
sp=e0000039ee2f7b40 bsp=e0000039ee2f13e8
[<a0000001000a1e80>] _call_console_drivers+0xe0/0x100
sp=e0000039ee2f7b40 bsp=e0000039ee2f13b8
[<a0000001000a2460>] release_console_sem+0x2c0/0x460
sp=e0000039ee2f7b40 bsp=e0000039ee2f1378
[<a0000001000a2cf0>] vprintk+0x6f0/0x880
sp=e0000039ee2f7b40 bsp=e0000039ee2f12f0
[<a0000001000a1f10>] printk+0x70/0xa0
sp=e0000039ee2f7b80 bsp=e0000039ee2f1290
[<a00000010000f530>] ack_bad_irq+0x30/0x60
sp=e0000039ee2f7bc0 bsp=e0000039ee2f1270
[<a0000001000f7ab0>] handle_bad_irq+0x410/0x440
sp=e0000039ee2f7bc0 bsp=e0000039ee2f1230
[<a0000001000f7c80>] __do_IRQ+0x80/0x3c0
sp=e0000039ee2f7bc0 bsp=e0000039ee2f11d0
[<a00000010000f790>] ia64_handle_irq+0xb0/0x160
sp=e0000039ee2f7bc0 bsp=e0000039ee2f11a0
[<a00000010000b1a0>] ia64_leave_kernel+0x0/0x290
sp=e0000039ee2f7bc0 bsp=e0000039ee2f11a0
[<a000000100010e80>] default_idle+0x120/0x160
sp=e0000039ee2f7d90 bsp=e0000039ee2f1158
[<a000000100011d90>] cpu_idle+0x1d0/0x2c0
sp=e0000039ee2f7e30 bsp=e0000039ee2f1128
[<a000000100051b70>] start_secondary+0x350/0x380
sp=e0000039ee2f7e30 bsp=e0000039ee2f10e0
[<a000000100008650>] __end_ivt_text+0x330/0x360
sp=e0000039ee2f7e30 bsp=e0000039ee2f10e0
eth3: Link down done
.....
And the panic from mkfs.xfs:
budgie:~/dgc/jbod # sh jbd-mkfs
meta-data=/dev/mapper/dm1 isize=256 agcount=5, agsize=1114112 blks
= sectsz=512 attr=0
data = bsize=16384 blocks=5570560, imaxpct=25
= sunit=0 swidth=0 blks, unwritten=1
naming =version 2 bsize=16384
log =/dev/mapper/dm0 bsize=16384 blocks=8192, version=1
= sectsz=512 sunit=0 blks
realtime =none extsz=65536 blocks=0, rtextents=0
idle[0]: Oops 8813272891392 [1]
Modules linked in: ipv6 s2io sg
Pid: 0, CPU 1, comm: idle
psr : 0000101008022018 ifs : 800000000000038a ip : [<a00000010014c7c0>] Not tainted
ip is at kmem_freepages+0x100/0x200
unat: 0000000000000000 pfs : 000000000000050d rsc : 0000000000000003
rnat: 800000000000048d bsps: 0000000000000000 pr : 80000000e7a95669
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70033f
csd : 0000000000000000 ssd : 0000000000000000
b0 : a00000010014cf30 b6 : a000000100003320 b7 : a000000100105fc0
f6 : 1003e0000000000000045 f7 : 0ffdd8000000000000000
f8 : 1000589ffec9800000000 f9 : 10004a000000000000000
f10 : 1003e0000000000c1e09c f11 : 1003e00000000054d2444
r1 : a000000100d953c0 r2 : 0000000000000001 r3 : 0000000000000001
r8 : 0000000000000000 r9 : a0007fff5d4e0000 r10 : 0000000005bd8ff9
r11 : 00000000068f7ff8 r12 : e0000039ee2bfb50 r13 : e0000039ee2b8000
r14 : 0000000000d1efff r15 : 0000000000000080 r16 : ffffffffffffffff
r17 : a0007fff8b3a8000 r18 : a000000100baece8 r19 : 0000000000000208
r20 : e0000039ee3b98b8 r21 : 0000000000000000 r22 : 0000000000000080
r23 : ffffffffffffff7f r24 : 0000000000000000 r25 : e000003078274000
r26 : e00000347bffc000 r27 : 0000000000000000 r28 : e0000039ee2bfb60
r29 : e00000347bffc020 r30 : 00000000000021b9 r31 : 0000000000000000
Call Trace:
[<a0000001000125a0>] show_stack+0x40/0xa0
sp=e0000039ee2bf700 bsp=e0000039ee2b98c8
[<a000000100012dd0>] show_regs+0x7d0/0x800
sp=e0000039ee2bf8d0 bsp=e0000039ee2b9880
[<a000000100033510>] die+0x230/0x340
sp=e0000039ee2bf8d0 bsp=e0000039ee2b9838
[<a0000001000564e0>] ia64_do_page_fault+0x860/0x9c0
sp=e0000039ee2bf8f0 bsp=e0000039ee2b97d8
[<a00000010000b1a0>] ia64_leave_kernel+0x0/0x290
sp=e0000039ee2bf980 bsp=e0000039ee2b97d8
[<a00000010014c7c0>] kmem_freepages+0x100/0x200
sp=e0000039ee2bfb50 bsp=e0000039ee2b9788
[<a00000010014cf30>] slab_destroy+0x1b0/0x260
sp=e0000039ee2bfb50 bsp=e0000039ee2b9738
[<a00000010014d300>] free_block+0x320/0x3c0
sp=e0000039ee2bfb60 bsp=e0000039ee2b96e0
[<a00000010014ccf0>] __cache_free+0x430/0x4c0
sp=e0000039ee2bfb60 bsp=e0000039ee2b9688
[<a00000010014db80>] kmem_cache_free+0x120/0x180
sp=e0000039ee2bfb70 bsp=e0000039ee2b9658
[<a000000100105ff0>] mempool_free_slab+0x30/0x60
sp=e0000039ee2bfb70 bsp=e0000039ee2b9630
[<a000000100106210>] mempool_free+0x130/0x160
sp=e0000039ee2bfb70 bsp=e0000039ee2b95e8
[<a000000100638760>] dec_pending+0x2c0/0x2e0
sp=e0000039ee2bfb70 bsp=e0000039ee2b95a0
[<a000000100638a80>] clone_endio+0x140/0x180
sp=e0000039ee2bfb70 bsp=e0000039ee2b9558
[<a000000100168970>] bio_endio+0x110/0x140
sp=e0000039ee2bfb70 bsp=e0000039ee2b9520
[<a0000001003dc2b0>] __end_that_request_first+0x390/0xac0
sp=e0000039ee2bfb70 bsp=e0000039ee2b9498
[<a0000001003dca10>] end_that_request_chunk+0x30/0x60
sp=e0000039ee2bfb70 bsp=e0000039ee2b9468
[<a000000100565d30>] scsi_end_request+0x50/0x1e0
sp=e0000039ee2bfb70 bsp=e0000039ee2b9420
[<a0000001005663f0>] scsi_io_completion+0x3b0/0x8a0
sp=e0000039ee2bfb70 bsp=e0000039ee2b9388
[<a0000001005dd220>] sd_rw_intr+0x480/0x4a0
sp=e0000039ee2bfb80 bsp=e0000039ee2b9338
[<a00000010055a5f0>] scsi_finish_command+0x150/0x180
sp=e0000039ee2bfba0 bsp=e0000039ee2b9308
[<a000000100567910>] scsi_softirq_done+0x270/0x2a0
sp=e0000039ee2bfba0 bsp=e0000039ee2b92e0
[<a0000001003d8e60>] blk_done_softirq+0x160/0x1c0
sp=e0000039ee2bfbb0 bsp=e0000039ee2b92c8
[<a0000001000afff0>] __do_softirq+0xd0/0x240
sp=e0000039ee2bfbc0 bsp=e0000039ee2b9250
[<a0000001000b01e0>] do_softirq+0x80/0xe0
sp=e0000039ee2bfbc0 bsp=e0000039ee2b91e8
[<a0000001000b0520>] irq_exit+0x80/0xc0
sp=e0000039ee2bfbc0 bsp=e0000039ee2b91d0
[<a00000010000f810>] ia64_handle_irq+0x130/0x160
sp=e0000039ee2bfbc0 bsp=e0000039ee2b91a0
[<a00000010000b1a0>] ia64_leave_kernel+0x0/0x290
sp=e0000039ee2bfbc0 bsp=e0000039ee2b91a0
[<a000000100407600>] __copy_user+0x20/0x960
sp=e0000039ee2bfd90 bsp=e0000039ee2b9178
[<a000000100010e20>] default_idle+0xc0/0x160
sp=e0000039ee2bfd90 bsp=e0000039ee2b9158
[<a000000100011d90>] cpu_idle+0x1d0/0x2c0
sp=e0000039ee2bfe30 bsp=e0000039ee2b9128
[<a000000100051b70>] start_secondary+0x350/0x380
sp=e0000039ee2bfe30 bsp=e0000039ee2b90e0
[<a000000100008650>] __end_ivt_text+0x330/0x360
sp=e0000039ee2bfe30 bsp=e0000039ee2b90e0
<0>Kernel panic - not syncing: Aiee, killing interrupt handler!
Cheers,
Dave.
--
Dave Chinner
Principal Engineer
SGI Australian Software Group
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 5:55 ` David Chinner
@ 2006-06-19 6:33 ` Andrew Morton
2006-06-19 8:30 ` David Chinner
2006-06-19 10:48 ` Thomas Gleixner
0 siblings, 2 replies; 37+ messages in thread
From: Andrew Morton @ 2006-06-19 6:33 UTC (permalink / raw)
To: David Chinner, Ingo Molnar, Thomas Gleixner
Cc: neilb, jblunck, linux-kernel, linux-fsdevel, viro, balbir
On Mon, 19 Jun 2006 15:55:23 +1000
David Chinner <dgc@sgi.com> wrote:
> On Mon, Jun 19, 2006 at 11:21:04AM +1000, Neil Brown wrote:
> > On Monday June 19, dgc@sgi.com wrote:
> > >
> > > Ok. Send me the patch and I'll try to get some tests done on it...
> >
> > Below, thanks.
>
> Neil,
>
> I doubt I'm going to be able to run any tests for this patch on the
> current -mm (2.6.17-rc6-mm2) any time soon. I get an interrupt
> related BUG on boot in the sn2 console driver, and then a panic in
> the scsi I/o completion code when running mkfs.xfs. Both are
> reproducable.
Thanks.
> I don't have any other non-sn2 machine with enough memory in it to
> test the patch....
>
> The boot warnings:
>
> ....
> eth3 device: S2io Inc. Xframe 10 Gigabit Ethernet PCI-X (rev 03)
> eth3 configuration: eth-id-00:0c:fc:00:02:c8
> irq 60, desc: a0000001009a2d00, depth: 1, count: 0, unhandled: 0
> ->handle_irq(): 0000000000000000, 0x0
> ->chip(): a000000100a0fe40, irq_type_sn+0x0/0x80
> ->action(): e00000b007471b80
> ->action->handler(): a0000002059373d0, s2io_msi_handle+0x1510/0x660 [s2io] eth3
> IP address: 192.168.1.248/24
> Unexpected irq vector 0x3c on CPU 3!
I guess that's where things start to go bad. genirq changes?
> BUG: warning at drivers/serial/sn_console.c:976/sn_sal_console_write()
There's a warning patch in -mm which shouts when someone does a busywait
delay of over a millisecond, and sn_sal_console_write() is doing
mdelay(150).
And it's doing that mdelay because it thinks port->sc_port.lock is already
held. I don't know why.
> Call Trace:
> [<a0000001000125a0>] show_stack+0x40/0xa0
> sp=e0000039ee2f7970 bsp=e0000039ee2f14b0
> [<a000000100012e30>] dump_stack+0x30/0x60
> sp=e0000039ee2f7b40 bsp=e0000039ee2f1498
> [<a0000001004e5350>] sn_sal_console_write+0x270/0x3e0
> sp=e0000039ee2f7b40 bsp=e0000039ee2f1420
> [<a0000001000a1d40>] __call_console_drivers+0x160/0x1c0
> sp=e0000039ee2f7b40 bsp=e0000039ee2f13e8
> [<a0000001000a1e80>] _call_console_drivers+0xe0/0x100
> sp=e0000039ee2f7b40 bsp=e0000039ee2f13b8
> [<a0000001000a2460>] release_console_sem+0x2c0/0x460
> sp=e0000039ee2f7b40 bsp=e0000039ee2f1378
> [<a0000001000a2cf0>] vprintk+0x6f0/0x880
> sp=e0000039ee2f7b40 bsp=e0000039ee2f12f0
> [<a0000001000a1f10>] printk+0x70/0xa0
> sp=e0000039ee2f7b80 bsp=e0000039ee2f1290
> [<a00000010000f530>] ack_bad_irq+0x30/0x60
> sp=e0000039ee2f7bc0 bsp=e0000039ee2f1270
> [<a0000001000f7ab0>] handle_bad_irq+0x410/0x440
> sp=e0000039ee2f7bc0 bsp=e0000039ee2f1230
> [<a0000001000f7c80>] __do_IRQ+0x80/0x3c0
> sp=e0000039ee2f7bc0 bsp=e0000039ee2f11d0
> [<a00000010000f790>] ia64_handle_irq+0xb0/0x160
> sp=e0000039ee2f7bc0 bsp=e0000039ee2f11a0
> [<a00000010000b1a0>] ia64_leave_kernel+0x0/0x290
> sp=e0000039ee2f7bc0 bsp=e0000039ee2f11a0
> [<a000000100010e80>] default_idle+0x120/0x160
> sp=e0000039ee2f7d90 bsp=e0000039ee2f1158
> [<a000000100011d90>] cpu_idle+0x1d0/0x2c0
> sp=e0000039ee2f7e30 bsp=e0000039ee2f1128
> [<a000000100051b70>] start_secondary+0x350/0x380
> sp=e0000039ee2f7e30 bsp=e0000039ee2f10e0
> [<a000000100008650>] __end_ivt_text+0x330/0x360
> sp=e0000039ee2f7e30 bsp=e0000039ee2f10e0
> eth3: Link down done
> .....
>
>
> And the panic from mkfs.xfs:
>
> budgie:~/dgc/jbod # sh jbd-mkfs
> meta-data=/dev/mapper/dm1 isize=256 agcount=5, agsize=1114112 blks
> = sectsz=512 attr=0
> data = bsize=16384 blocks=5570560, imaxpct=25
> = sunit=0 swidth=0 blks, unwritten=1
> naming =version 2 bsize=16384
> log =/dev/mapper/dm0 bsize=16384 blocks=8192, version=1
> = sectsz=512 sunit=0 blks
> realtime =none extsz=65536 blocks=0, rtextents=0
> idle[0]: Oops 8813272891392 [1]
> Modules linked in: ipv6 s2io sg
>
> Pid: 0, CPU 1, comm: idle
> psr : 0000101008022018 ifs : 800000000000038a ip : [<a00000010014c7c0>] Not tainted
> ip is at kmem_freepages+0x100/0x200
> unat: 0000000000000000 pfs : 000000000000050d rsc : 0000000000000003
> rnat: 800000000000048d bsps: 0000000000000000 pr : 80000000e7a95669
> ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70033f
> csd : 0000000000000000 ssd : 0000000000000000
> b0 : a00000010014cf30 b6 : a000000100003320 b7 : a000000100105fc0
> f6 : 1003e0000000000000045 f7 : 0ffdd8000000000000000
> f8 : 1000589ffec9800000000 f9 : 10004a000000000000000
> f10 : 1003e0000000000c1e09c f11 : 1003e00000000054d2444
> r1 : a000000100d953c0 r2 : 0000000000000001 r3 : 0000000000000001
> r8 : 0000000000000000 r9 : a0007fff5d4e0000 r10 : 0000000005bd8ff9
> r11 : 00000000068f7ff8 r12 : e0000039ee2bfb50 r13 : e0000039ee2b8000
> r14 : 0000000000d1efff r15 : 0000000000000080 r16 : ffffffffffffffff
> r17 : a0007fff8b3a8000 r18 : a000000100baece8 r19 : 0000000000000208
> r20 : e0000039ee3b98b8 r21 : 0000000000000000 r22 : 0000000000000080
> r23 : ffffffffffffff7f r24 : 0000000000000000 r25 : e000003078274000
> r26 : e00000347bffc000 r27 : 0000000000000000 r28 : e0000039ee2bfb60
> r29 : e00000347bffc020 r30 : 00000000000021b9 r31 : 0000000000000000
>
> Call Trace:
> [<a0000001000125a0>] show_stack+0x40/0xa0
> sp=e0000039ee2bf700 bsp=e0000039ee2b98c8
> [<a000000100012dd0>] show_regs+0x7d0/0x800
> sp=e0000039ee2bf8d0 bsp=e0000039ee2b9880
> [<a000000100033510>] die+0x230/0x340
> sp=e0000039ee2bf8d0 bsp=e0000039ee2b9838
> [<a0000001000564e0>] ia64_do_page_fault+0x860/0x9c0
> sp=e0000039ee2bf8f0 bsp=e0000039ee2b97d8
> [<a00000010000b1a0>] ia64_leave_kernel+0x0/0x290
> sp=e0000039ee2bf980 bsp=e0000039ee2b97d8
> [<a00000010014c7c0>] kmem_freepages+0x100/0x200
> sp=e0000039ee2bfb50 bsp=e0000039ee2b9788
> [<a00000010014cf30>] slab_destroy+0x1b0/0x260
> sp=e0000039ee2bfb50 bsp=e0000039ee2b9738
> [<a00000010014d300>] free_block+0x320/0x3c0
> sp=e0000039ee2bfb60 bsp=e0000039ee2b96e0
> [<a00000010014ccf0>] __cache_free+0x430/0x4c0
> sp=e0000039ee2bfb60 bsp=e0000039ee2b9688
> [<a00000010014db80>] kmem_cache_free+0x120/0x180
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9658
> [<a000000100105ff0>] mempool_free_slab+0x30/0x60
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9630
> [<a000000100106210>] mempool_free+0x130/0x160
> sp=e0000039ee2bfb70 bsp=e0000039ee2b95e8
> [<a000000100638760>] dec_pending+0x2c0/0x2e0
> sp=e0000039ee2bfb70 bsp=e0000039ee2b95a0
> [<a000000100638a80>] clone_endio+0x140/0x180
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9558
> [<a000000100168970>] bio_endio+0x110/0x140
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9520
> [<a0000001003dc2b0>] __end_that_request_first+0x390/0xac0
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9498
> [<a0000001003dca10>] end_that_request_chunk+0x30/0x60
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9468
> [<a000000100565d30>] scsi_end_request+0x50/0x1e0
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9420
> [<a0000001005663f0>] scsi_io_completion+0x3b0/0x8a0
> sp=e0000039ee2bfb70 bsp=e0000039ee2b9388
> [<a0000001005dd220>] sd_rw_intr+0x480/0x4a0
> sp=e0000039ee2bfb80 bsp=e0000039ee2b9338
> [<a00000010055a5f0>] scsi_finish_command+0x150/0x180
> sp=e0000039ee2bfba0 bsp=e0000039ee2b9308
> [<a000000100567910>] scsi_softirq_done+0x270/0x2a0
> sp=e0000039ee2bfba0 bsp=e0000039ee2b92e0
> [<a0000001003d8e60>] blk_done_softirq+0x160/0x1c0
> sp=e0000039ee2bfbb0 bsp=e0000039ee2b92c8
> [<a0000001000afff0>] __do_softirq+0xd0/0x240
> sp=e0000039ee2bfbc0 bsp=e0000039ee2b9250
> [<a0000001000b01e0>] do_softirq+0x80/0xe0
> sp=e0000039ee2bfbc0 bsp=e0000039ee2b91e8
> [<a0000001000b0520>] irq_exit+0x80/0xc0
> sp=e0000039ee2bfbc0 bsp=e0000039ee2b91d0
> [<a00000010000f810>] ia64_handle_irq+0x130/0x160
> sp=e0000039ee2bfbc0 bsp=e0000039ee2b91a0
> [<a00000010000b1a0>] ia64_leave_kernel+0x0/0x290
> sp=e0000039ee2bfbc0 bsp=e0000039ee2b91a0
> [<a000000100407600>] __copy_user+0x20/0x960
> sp=e0000039ee2bfd90 bsp=e0000039ee2b9178
> [<a000000100010e20>] default_idle+0xc0/0x160
> sp=e0000039ee2bfd90 bsp=e0000039ee2b9158
> [<a000000100011d90>] cpu_idle+0x1d0/0x2c0
> sp=e0000039ee2bfe30 bsp=e0000039ee2b9128
> [<a000000100051b70>] start_secondary+0x350/0x380
> sp=e0000039ee2bfe30 bsp=e0000039ee2b90e0
> [<a000000100008650>] __end_ivt_text+0x330/0x360
> sp=e0000039ee2bfe30 bsp=e0000039ee2b90e0
slab got messed up - I don't know what did this, either.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 6:33 ` Andrew Morton
@ 2006-06-19 8:30 ` David Chinner
2006-06-19 10:48 ` Thomas Gleixner
1 sibling, 0 replies; 37+ messages in thread
From: David Chinner @ 2006-06-19 8:30 UTC (permalink / raw)
To: Andrew Morton
Cc: Ingo Molnar, Thomas Gleixner, neilb, jblunck, linux-kernel,
linux-fsdevel, viro, balbir
On Sun, Jun 18, 2006 at 11:33:39PM -0700, Andrew Morton wrote:
> On Mon, 19 Jun 2006 15:55:23 +1000
> David Chinner <dgc@sgi.com> wrote:
> >
> > The boot warnings:
> >
> > ....
> > eth3 device: S2io Inc. Xframe 10 Gigabit Ethernet PCI-X (rev 03)
> > eth3 configuration: eth-id-00:0c:fc:00:02:c8
> > irq 60, desc: a0000001009a2d00, depth: 1, count: 0, unhandled: 0
> > ->handle_irq(): 0000000000000000, 0x0
> > ->chip(): a000000100a0fe40, irq_type_sn+0x0/0x80
> > ->action(): e00000b007471b80
> > ->action->handler(): a0000002059373d0, s2io_msi_handle+0x1510/0x660 [s2io] eth3
> > IP address: 192.168.1.248/24
> > Unexpected irq vector 0x3c on CPU 3!
>
> I guess that's where things start to go bad. genirq changes?
Seems likely. Plain 2.6.17-rc6 doesn't give this error.
> > BUG: warning at drivers/serial/sn_console.c:976/sn_sal_console_write()
>
> There's a warning patch in -mm which shouts when someone does a busywait
> delay of over a millisecond, and sn_sal_console_write() is doing
> mdelay(150).
>
> And it's doing that mdelay because it thinks port->sc_port.lock is already
> held. I don't know why.
Ok, that explains why I haven't seen it before...
> > ip is at kmem_freepages+0x100/0x200
....
>
> slab got messed up - I don't know what did this, either.
2.6.17-rc6 seems fine here as well. I've run with slab debuging
turned on and got the same panic, so it's not something obvious....
Cheers,
Dave.
--
Dave Chinner
Principal Engineer
SGI Australian Software Group
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 6:33 ` Andrew Morton
2006-06-19 8:30 ` David Chinner
@ 2006-06-19 10:48 ` Thomas Gleixner
2006-06-19 11:01 ` Andrew Morton
1 sibling, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2006-06-19 10:48 UTC (permalink / raw)
To: Andrew Morton
Cc: David Chinner, Ingo Molnar, neilb, jblunck, linux-kernel,
linux-fsdevel, viro, balbir
On Sun, 2006-06-18 at 23:33 -0700, Andrew Morton wrote:
> > ....
> > eth3 device: S2io Inc. Xframe 10 Gigabit Ethernet PCI-X (rev 03)
> > eth3 configuration: eth-id-00:0c:fc:00:02:c8
> > irq 60, desc: a0000001009a2d00, depth: 1, count: 0, unhandled: 0
> > ->handle_irq(): 0000000000000000, 0x0
> > ->chip(): a000000100a0fe40, irq_type_sn+0x0/0x80
> > ->action(): e00000b007471b80
> > ->action->handler(): a0000002059373d0, s2io_msi_handle+0x1510/0x660 [s2io] eth3
> > IP address: 192.168.1.248/24
> > Unexpected irq vector 0x3c on CPU 3!
>
> I guess that's where things start to go bad. genirq changes?
Hmm, The extra noisy printout is from geirq. The unhandled interrupt
should be unrelated.
The s2io driver enables interrupts in the card in start_nic() before
requesting the interrupt itself with request_irq(). So I suspect thats a
problem which has been there already, just the noisy printout makes it
more visible
tglx
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 10:48 ` Thomas Gleixner
@ 2006-06-19 11:01 ` Andrew Morton
2006-06-19 17:34 ` Ravinandan Arakali
0 siblings, 1 reply; 37+ messages in thread
From: Andrew Morton @ 2006-06-19 11:01 UTC (permalink / raw)
To: tglx, Ravinandan Arakali
Cc: dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel, viro,
balbir
On Mon, 19 Jun 2006 12:48:44 +0200
Thomas Gleixner <tglx@linutronix.de> wrote:
> On Sun, 2006-06-18 at 23:33 -0700, Andrew Morton wrote:
> > > ....
> > > eth3 device: S2io Inc. Xframe 10 Gigabit Ethernet PCI-X (rev 03)
> > > eth3 configuration: eth-id-00:0c:fc:00:02:c8
> > > irq 60, desc: a0000001009a2d00, depth: 1, count: 0, unhandled: 0
> > > ->handle_irq(): 0000000000000000, 0x0
> > > ->chip(): a000000100a0fe40, irq_type_sn+0x0/0x80
> > > ->action(): e00000b007471b80
> > > ->action->handler(): a0000002059373d0, s2io_msi_handle+0x1510/0x660 [s2io] eth3
> > > IP address: 192.168.1.248/24
> > > Unexpected irq vector 0x3c on CPU 3!
> >
> > I guess that's where things start to go bad. genirq changes?
>
> Hmm, The extra noisy printout is from geirq. The unhandled interrupt
> should be unrelated.
>
> The s2io driver enables interrupts in the card in start_nic() before
> requesting the interrupt itself with request_irq(). So I suspect thats a
> problem which has been there already, just the noisy printout makes it
> more visible
OK, that's not good. It would be strange for the NIC to be aserting an
interrupt in that window though - the machine would end up taking a zillion
interrupts and would disable the whole IRQ line.
Still. Ravinandan, could you take a look at fixing that up, please? Wire
up the IRQ handler before enabling interrupts?
We still don't know why these things happened.
^ permalink raw reply [flat|nested] 37+ messages in thread
* RE: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 11:01 ` Andrew Morton
@ 2006-06-19 17:34 ` Ravinandan Arakali
2006-06-20 0:37 ` Andrew Morton
0 siblings, 1 reply; 37+ messages in thread
From: Ravinandan Arakali @ 2006-06-19 17:34 UTC (permalink / raw)
To: 'Andrew Morton', tglx
Cc: dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel, viro,
balbir, Ananda. Raju (E-mail), Leonid. Grossman (E-mail)
Andrew,
This is a known problem and has been fixed in our internal source tree. We
will be submitting the patch soon.
Ravi
-----Original Message-----
From: Andrew Morton [mailto:akpm@osdl.org]
Sent: Monday, June 19, 2006 4:01 AM
To: tglx@linutronix.de; Ravinandan Arakali
Cc: dgc@sgi.com; mingo@elte.hu; neilb@suse.de; jblunck@suse.de;
linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
viro@zeniv.linux.org.uk; balbir@in.ibm.com
Subject: Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries
list (2nd version)
On Mon, 19 Jun 2006 12:48:44 +0200
Thomas Gleixner <tglx@linutronix.de> wrote:
> On Sun, 2006-06-18 at 23:33 -0700, Andrew Morton wrote:
> > > ....
> > > eth3 device: S2io Inc. Xframe 10 Gigabit Ethernet PCI-X (rev
03)
> > > eth3 configuration: eth-id-00:0c:fc:00:02:c8
> > > irq 60, desc: a0000001009a2d00, depth: 1, count: 0, unhandled: 0
> > > ->handle_irq(): 0000000000000000, 0x0
> > > ->chip(): a000000100a0fe40, irq_type_sn+0x0/0x80
> > > ->action(): e00000b007471b80
> > > ->action->handler(): a0000002059373d0, s2io_msi_handle+0x1510/0x660
[s2io] eth3
> > > IP address: 192.168.1.248/24
> > > Unexpected irq vector 0x3c on CPU 3!
> >
> > I guess that's where things start to go bad. genirq changes?
>
> Hmm, The extra noisy printout is from geirq. The unhandled interrupt
> should be unrelated.
>
> The s2io driver enables interrupts in the card in start_nic() before
> requesting the interrupt itself with request_irq(). So I suspect thats a
> problem which has been there already, just the noisy printout makes it
> more visible
OK, that's not good. It would be strange for the NIC to be aserting an
interrupt in that window though - the machine would end up taking a zillion
interrupts and would disable the whole IRQ line.
Still. Ravinandan, could you take a look at fixing that up, please? Wire
up the IRQ handler before enabling interrupts?
We still don't know why these things happened.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-19 17:34 ` Ravinandan Arakali
@ 2006-06-20 0:37 ` Andrew Morton
2006-06-20 21:34 ` Ravinandan Arakali
0 siblings, 1 reply; 37+ messages in thread
From: Andrew Morton @ 2006-06-20 0:37 UTC (permalink / raw)
To: ravinandan.arakali
Cc: tglx, dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel,
viro, balbir, ananda.raju, leonid.grossman, Jes Sorensen
"Ravinandan Arakali" <ravinandan.arakali@neterion.com> wrote:
>
> This is a known problem and has been fixed in our internal source tree. We
> will be submitting the patch soon.
Please send me a copy asap - I urgently need to get the -mm patches vaguely
stabilised.
I'm somewhat surprised that the sn2 failed so seriously - I thought Jes was
testing that fairly regularly.
I think we kinda-sorta have a handle on the s2io->IRQ problem (although the
unexpectedly-held console spinlock is a mystery).
The scsi->BIO->mempool->slab crash is also a mystery.
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* RE: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-20 0:37 ` Andrew Morton
@ 2006-06-20 21:34 ` Ravinandan Arakali
2006-06-20 22:10 ` Andrew Morton
0 siblings, 1 reply; 37+ messages in thread
From: Ravinandan Arakali @ 2006-06-20 21:34 UTC (permalink / raw)
To: 'Andrew Morton'
Cc: tglx, dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel,
viro, balbir, ananda.raju, leonid.grossman,
'Jes Sorensen'
Do you want the patch to be submitted to netdev(the mailing list that we
usually submit to) ?
Ravi
-----Original Message-----
From: Andrew Morton [mailto:akpm@osdl.org]
Sent: Monday, June 19, 2006 5:37 PM
To: ravinandan.arakali@neterion.com
Cc: tglx@linutronix.de; dgc@sgi.com; mingo@elte.hu; neilb@suse.de;
jblunck@suse.de; linux-kernel@vger.kernel.org;
linux-fsdevel@vger.kernel.org; viro@zeniv.linux.org.uk;
balbir@in.ibm.com; ananda.raju@neterion.com;
leonid.grossman@neterion.com; Jes Sorensen
Subject: Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries
list (2nd version)
"Ravinandan Arakali" <ravinandan.arakali@neterion.com> wrote:
>
> This is a known problem and has been fixed in our internal source tree. We
> will be submitting the patch soon.
Please send me a copy asap - I urgently need to get the -mm patches vaguely
stabilised.
I'm somewhat surprised that the sn2 failed so seriously - I thought Jes was
testing that fairly regularly.
I think we kinda-sorta have a handle on the s2io->IRQ problem (although the
unexpectedly-held console spinlock is a mystery).
The scsi->BIO->mempool->slab crash is also a mystery.
Thanks.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-20 21:34 ` Ravinandan Arakali
@ 2006-06-20 22:10 ` Andrew Morton
2006-06-20 23:56 ` Ravinandan Arakali
0 siblings, 1 reply; 37+ messages in thread
From: Andrew Morton @ 2006-06-20 22:10 UTC (permalink / raw)
To: ravinandan.arakali
Cc: tglx, dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel,
viro, balbir, ananda.raju, leonid.grossman, jes
"Ravinandan Arakali" <ravinandan.arakali@neterion.com> wrote:
>
> Do you want the patch to be submitted to netdev(the mailing list that we
> usually submit to) ?
If the patches are a formal submission then please send them to Jeff,
netdev and I'd like a cc too.
If they are not considered quite ready for submission then please just send
them to myself, thanks. A netdev cc would be appropriate as well. Basically
I just want to get that sn2 machine to boot.
Generally it's better to cc too many mailing lists and individuals than too
few.
^ permalink raw reply [flat|nested] 37+ messages in thread
* RE: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-20 22:10 ` Andrew Morton
@ 2006-06-20 23:56 ` Ravinandan Arakali
2006-06-21 0:18 ` Andrew Morton
0 siblings, 1 reply; 37+ messages in thread
From: Ravinandan Arakali @ 2006-06-20 23:56 UTC (permalink / raw)
To: 'Andrew Morton'
Cc: tglx, dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel,
viro, balbir, ananda.raju, leonid.grossman, jes
Andrew,
The formal submission will take some more time.
To boot your system, can you use the driver available on our website ? This
has the fix you are looking for. URL is
http://www.neterion.com/support/xframe_customers.html
Ravi
-----Original Message-----
From: Andrew Morton [mailto:akpm@osdl.org]
Sent: Tuesday, June 20, 2006 3:10 PM
To: ravinandan.arakali@neterion.com
Cc: tglx@linutronix.de; dgc@sgi.com; mingo@elte.hu; neilb@suse.de;
jblunck@suse.de; linux-kernel@vger.kernel.org;
linux-fsdevel@vger.kernel.org; viro@zeniv.linux.org.uk;
balbir@in.ibm.com; ananda.raju@neterion.com;
leonid.grossman@neterion.com; jes@trained-monkey.org
Subject: Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries
list (2nd version)
"Ravinandan Arakali" <ravinandan.arakali@neterion.com> wrote:
>
> Do you want the patch to be submitted to netdev(the mailing list that we
> usually submit to) ?
If the patches are a formal submission then please send them to Jeff,
netdev and I'd like a cc too.
If they are not considered quite ready for submission then please just send
them to myself, thanks. A netdev cc would be appropriate as well.
Basically
I just want to get that sn2 machine to boot.
Generally it's better to cc too many mailing lists and individuals than too
few.
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-20 23:56 ` Ravinandan Arakali
@ 2006-06-21 0:18 ` Andrew Morton
2006-06-21 0:31 ` Ravinandan Arakali
0 siblings, 1 reply; 37+ messages in thread
From: Andrew Morton @ 2006-06-21 0:18 UTC (permalink / raw)
To: ravinandan.arakali
Cc: tglx, dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel,
viro, balbir, ananda.raju, leonid.grossman, jes
"Ravinandan Arakali" <ravinandan.arakali@neterion.com> wrote:
>
> The formal submission will take some more time.
> To boot your system, can you use the driver available on our website ?
Not really - it's not my system and I'd need to monkey around and generate
a diff which I then cannot test.
The driver you have there (REL_2.0.14.5152_LX.tar.gz) doesn't actually
appear to fix the bug:
int s2io_open(struct net_device *dev)
{
nic_t *sp = dev->priv;
int err = 0;
/*
* Make sure you have link off by default every time
* Nic is initialized
*/
netif_carrier_off(dev);
sp->last_link_state = 0;
/* Initialize H/W and enable interrupts */
err = s2io_card_up(sp);
if (err) {
DBG_PRINT(ERR_DBG, "%s: H/W initialization failed\n",
dev->name);
if (err == -ENODEV)
goto hw_init_failed;
else
goto hw_enable_failed;
}
#ifdef CONFIG_PCI_MSI
/* Store the values of the MSIX table in the nic_t structure */
store_xmsi_data(sp);
/* After proper initialization of H/W, register ISR */
if (sp->intr_type == MSI) {
err = request_irq((int) sp->pdev->irq, s2io_msi_handle,
SA_SHIRQ, sp->name, dev);
It's still calling request_irq() _after_ "enable interrupts".
^ permalink raw reply [flat|nested] 37+ messages in thread* RE: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-21 0:18 ` Andrew Morton
@ 2006-06-21 0:31 ` Ravinandan Arakali
0 siblings, 0 replies; 37+ messages in thread
From: Ravinandan Arakali @ 2006-06-21 0:31 UTC (permalink / raw)
To: 'Andrew Morton'
Cc: tglx, dgc, mingo, neilb, jblunck, linux-kernel, linux-fsdevel,
viro, balbir, ananda.raju, leonid.grossman, jes
You are right. Driver on website does not have the fix yet. It's still in
our internal tree.
We will submit patch to netdev either later today or tomorrow morning.
Ravi
-----Original Message-----
From: Andrew Morton [mailto:akpm@osdl.org]
Sent: Tuesday, June 20, 2006 5:19 PM
To: ravinandan.arakali@neterion.com
Cc: tglx@linutronix.de; dgc@sgi.com; mingo@elte.hu; neilb@suse.de;
jblunck@suse.de; linux-kernel@vger.kernel.org;
linux-fsdevel@vger.kernel.org; viro@zeniv.linux.org.uk;
balbir@in.ibm.com; ananda.raju@neterion.com;
leonid.grossman@neterion.com; jes@trained-monkey.org
Subject: Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries
list (2nd version)
"Ravinandan Arakali" <ravinandan.arakali@neterion.com> wrote:
>
> The formal submission will take some more time.
> To boot your system, can you use the driver available on our website ?
Not really - it's not my system and I'd need to monkey around and generate
a diff which I then cannot test.
The driver you have there (REL_2.0.14.5152_LX.tar.gz) doesn't actually
appear to fix the bug:
int s2io_open(struct net_device *dev)
{
nic_t *sp = dev->priv;
int err = 0;
/*
* Make sure you have link off by default every time
* Nic is initialized
*/
netif_carrier_off(dev);
sp->last_link_state = 0;
/* Initialize H/W and enable interrupts */
err = s2io_card_up(sp);
if (err) {
DBG_PRINT(ERR_DBG, "%s: H/W initialization failed\n",
dev->name);
if (err == -ENODEV)
goto hw_init_failed;
else
goto hw_enable_failed;
}
#ifdef CONFIG_PCI_MSI
/* Store the values of the MSIX table in the nic_t structure */
store_xmsi_data(sp);
/* After proper initialization of H/W, register ISR */
if (sp->intr_type == MSI) {
err = request_irq((int) sp->pdev->irq, s2io_msi_handle,
SA_SHIRQ, sp->name, dev);
It's still calling request_irq() _after_ "enable interrupts".
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 0/5] [PATCH,RFC] vfs: per-superblock unused dentries list (2nd version)
2006-06-16 22:25 ` Neil Brown
2006-06-18 23:56 ` David Chinner
@ 2006-06-19 9:34 ` Jan Blunck
1 sibling, 0 replies; 37+ messages in thread
From: Jan Blunck @ 2006-06-19 9:34 UTC (permalink / raw)
To: Neil Brown; +Cc: linux-kernel, linux-fsdevel, akpm, viro, dgc, balbir
On Sat, Jun 17, Neil Brown wrote:
> But I cannot see that the whole LRU list needs to be scanned during
> unmount.
> The only thing that does that is shrink_dcache_sb, which is used:
> in do_remount_sb
> in __invalidate_device
> in a few filesystems (autofs, coda, smbfs)
> and not when unmounting the filesystem (despite the comment).
>
> (This is in 2.6.17-ec6-mm2).
>
> I can see that shrink_dcache_sb could take a long time and should be
> fixed, which should be as simple as replacing it with
> shrink_dcache_parent; shrink_dcache_anon.
I don't remember exactly, maybe it was remounting instead of
unmounting. Although I believe that we should call shrink_dcache_sb() instead
of shrink_dcache_anon()+parent() when unmounting. I don't see any reason why we
should shrink the dcache with depth-first traversal instead of just killing
every unused dentry that we find (not even talking about that DCACHE_REFERENCE
handling is nonesense in that case).
> But I'm still puzzled as to why a long dcache LRU slows down
> unmounting.
>
> Can you give more details?
I think David already answered that.
Jan
^ permalink raw reply [flat|nested] 37+ messages in thread