* [PATCH] Invalidate_inodes can be very slow
@ 2003-10-13 9:18 Kirill Korotaev
2003-10-13 9:53 ` William Lee Irwin III
0 siblings, 1 reply; 13+ messages in thread
From: Kirill Korotaev @ 2003-10-13 9:18 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 636 bytes --]
I'm not sure whom this patch should be send to, so I'm posting it here.
this patch fixes the problem that huge inode cache
can make invalidate_inodes() calls very long. Meanwhile
lock_kernel() and inode_lock are being held and system
can freeze for seconds.
I detected this problem on kernel with 4gb split. When inode cache
was 2.5Gb (1,000,000 of inodes in cache) it took seconds to umount
or turn quota off.
This patch is against 2.4.22 kernel. But as far as I can see the problem
still exists in 2.6 kernels.
This patch introduces per-super block inode list which makes
possible to scan sb's only inodes when doing umount.
Kirill
[-- Attachment #2: diff-invlinodes-20031013 --]
[-- Type: text/plain, Size: 4846 bytes --]
--- ./fs/super.c.ii 2003-08-25 15:44:43.000000000 +0400
+++ ./fs/super.c 2003-10-13 12:08:43.000000000 +0400
@@ -271,6 +271,7 @@ static struct super_block *alloc_super(v
INIT_LIST_HEAD(&s->s_locked_inodes);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_instances);
+ INIT_LIST_HEAD(&s->s_inodes);
init_rwsem(&s->s_umount);
sema_init(&s->s_lock, 1);
down_write(&s->s_umount);
--- ./fs/inode.c.ii 2003-08-25 15:44:43.000000000 +0400
+++ ./fs/inode.c 2003-10-13 12:18:25.000000000 +0400
@@ -604,7 +604,7 @@ static void dispose_list(struct list_hea
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
+static int invalidate_list(struct list_head *head, struct list_head * dispose)
{
struct list_head *next;
int busy = 0, count = 0;
@@ -617,12 +617,11 @@ static int invalidate_list(struct list_h
next = next->next;
if (tmp == head)
break;
- inode = list_entry(tmp, struct inode, i_list);
- if (inode->i_sb != sb)
- continue;
+ inode = list_entry(tmp, struct inode, i_sb_list);
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
list_del_init(&inode->i_hash);
+ list_del(&inode->i_sb_list);
list_del(&inode->i_list);
list_add(&inode->i_list, dispose);
inode->i_state |= I_FREEING;
@@ -659,10 +658,7 @@ int invalidate_inodes(struct super_block
LIST_HEAD(throw_away);
spin_lock(&inode_lock);
- busy = invalidate_list(&inode_in_use, sb, &throw_away);
- busy |= invalidate_list(&inode_unused, sb, &throw_away);
- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
+ busy = invalidate_list(&sb->s_inodes, &throw_away);
spin_unlock(&inode_lock);
dispose_list(&throw_away);
@@ -738,6 +734,7 @@ void prune_icache(int goal)
list_del(tmp);
list_del(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_hash);
+ list_del(&inode->i_sb_list);
list_add(tmp, freeable);
inode->i_state |= I_FREEING;
count++;
@@ -827,6 +824,7 @@ struct inode * new_inode(struct super_bl
spin_lock(&inode_lock);
inodes_stat.nr_inodes++;
list_add(&inode->i_list, &inode_in_use);
+ list_add(&inode->i_sb_list, &sb->s_inodes);
inode->i_ino = ++last_ino;
inode->i_state = 0;
spin_unlock(&inode_lock);
@@ -854,6 +852,7 @@ static struct inode * get_new_inode(stru
if (!old) {
inodes_stat.nr_inodes++;
list_add(&inode->i_list, &inode_in_use);
+ list_add(&inode->i_sb_list, &sb->s_inodes);
list_add(&inode->i_hash, head);
inode->i_ino = ino;
inode->i_state = I_LOCK;
@@ -1046,6 +1045,7 @@ void iput(struct inode *inode)
INIT_LIST_HEAD(&inode->i_hash);
list_del(&inode->i_list);
INIT_LIST_HEAD(&inode->i_list);
+ list_del(&inode->i_sb_list);
inode->i_state|=I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
@@ -1079,6 +1079,7 @@ void iput(struct inode *inode)
list_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
+ list_del(&inode->i_sb_list);
inode->i_state|=I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
@@ -1239,26 +1240,10 @@ void remove_dquot_ref(struct super_block
lock_kernel(); /* This lock is for quota code */
spin_lock(&inode_lock); /* This lock is for inodes code */
- list_for_each(act_head, &inode_in_use) {
- inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb == sb && IS_QUOTAINIT(inode))
- remove_inode_dquot_ref(inode, type, &tofree_head);
- }
- list_for_each(act_head, &inode_unused) {
- inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb == sb && IS_QUOTAINIT(inode))
- remove_inode_dquot_ref(inode, type, &tofree_head);
- }
- list_for_each(act_head, &sb->s_dirty) {
- inode = list_entry(act_head, struct inode, i_list);
+ list_for_each_entry(inode, act_head, &sb->s_inodes)
if (IS_QUOTAINIT(inode))
remove_inode_dquot_ref(inode, type, &tofree_head);
- }
- list_for_each(act_head, &sb->s_locked_inodes) {
- inode = list_entry(act_head, struct inode, i_list);
- if (IS_QUOTAINIT(inode))
- remove_inode_dquot_ref(inode, type, &tofree_head);
- }
+
spin_unlock(&inode_lock);
unlock_kernel();
--- ./include/linux/fs.h.ii 2003-08-25 15:44:44.000000000 +0400
+++ ./include/linux/fs.h 2003-10-13 12:23:53.000000000 +0400
@@ -438,6 +438,7 @@ struct block_device {
struct inode {
struct list_head i_hash;
struct list_head i_list;
+ struct list_head i_sb_list;
struct list_head i_dentry;
struct list_head i_dirty_buffers;
@@ -756,6 +757,7 @@ struct super_block {
int s_count;
atomic_t s_active;
+ struct list_head s_inodes; /* all inodes */
struct list_head s_dirty; /* dirty inodes */
struct list_head s_locked_inodes;/* inodes being synced */
struct list_head s_files;
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 9:18 [PATCH] Invalidate_inodes can be very slow Kirill Korotaev @ 2003-10-13 9:53 ` William Lee Irwin III 2003-10-13 10:46 ` Kirill Korotaev 2003-10-13 11:08 ` Andrew Morton 0 siblings, 2 replies; 13+ messages in thread From: William Lee Irwin III @ 2003-10-13 9:53 UTC (permalink / raw) To: Kirill Korotaev; +Cc: linux-kernel On Mon, Oct 13, 2003 at 01:18:09PM +0400, Kirill Korotaev wrote: > I'm not sure whom this patch should be send to, so I'm posting it here. > this patch fixes the problem that huge inode cache > can make invalidate_inodes() calls very long. Meanwhile > lock_kernel() and inode_lock are being held and system > can freeze for seconds. > I detected this problem on kernel with 4gb split. When inode cache > was 2.5Gb (1,000,000 of inodes in cache) it took seconds to umount > or turn quota off. > This patch is against 2.4.22 kernel. But as far as I can see the problem > still exists in 2.6 kernels. > This patch introduces per-super block inode list which makes > possible to scan sb's only inodes when doing umount. Untested brute-force forward port to 2.6.0-test7-bk4. No idea if the locking is correct or if list movement is done in all needed places. diff -prauN linux-2.6.0-test7-bk4/fs/inode.c inode-2.6.0-test7-bk4-1/fs/inode.c --- linux-2.6.0-test7-bk4/fs/inode.c 2003-10-08 12:24:51.000000000 -0700 +++ inode-2.6.0-test7-bk4-1/fs/inode.c 2003-10-13 02:38:59.000000000 -0700 @@ -285,7 +285,7 @@ static void dispose_list(struct list_hea /* * Invalidate all inodes for a device. */ -static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) +static int invalidate_list(struct list_head *head, struct list_head *dispose) { struct list_head *next; int busy = 0, count = 0; @@ -298,13 +298,11 @@ static int invalidate_list(struct list_h next = next->next; if (tmp == head) break; - inode = list_entry(tmp, struct inode, i_list); - if (inode->i_sb != sb) - continue; + inode = list_entry(tmp, struct inode, i_sb_list); invalidate_inode_buffers(inode); if (!atomic_read(&inode->i_count)) { hlist_del_init(&inode->i_hash); - list_del(&inode->i_list); + list_del(&inode->i_sb_list); list_add(&inode->i_list, dispose); inode->i_state |= I_FREEING; count++; @@ -340,10 +338,7 @@ int invalidate_inodes(struct super_block down(&iprune_sem); spin_lock(&inode_lock); - busy = invalidate_list(&inode_in_use, sb, &throw_away); - busy |= invalidate_list(&inode_unused, sb, &throw_away); - busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); - busy |= invalidate_list(&sb->s_io, sb, &throw_away); + busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); dispose_list(&throw_away); @@ -443,6 +438,7 @@ static void prune_icache(int nr_to_scan) continue; } hlist_del_init(&inode->i_hash); + list_del_init(&inode->i_sb_list); list_move(&inode->i_list, &freeable); inode->i_state |= I_FREEING; nr_pruned++; @@ -553,6 +549,7 @@ struct inode *new_inode(struct super_blo spin_lock(&inode_lock); inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -601,6 +598,7 @@ static struct inode * get_new_inode(stru inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); hlist_add_head(&inode->i_hash, head); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -984,6 +982,7 @@ void generic_delete_inode(struct inode * struct super_operations *op = inode->i_sb->s_op; list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -1031,6 +1030,7 @@ static void generic_forget_inode(struct hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -1228,27 +1228,11 @@ void remove_dquot_ref(struct super_block return; /* nothing to do */ spin_lock(&inode_lock); /* This lock is for inodes code */ /* We don't have to lock against quota code - test IS_QUOTAINIT is just for speedup... */ - - list_for_each(act_head, &inode_in_use) { - inode = list_entry(act_head, struct inode, i_list); - if (inode->i_sb == sb && IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, &tofree_head); - } - list_for_each(act_head, &inode_unused) { - inode = list_entry(act_head, struct inode, i_list); - if (inode->i_sb == sb && IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, &tofree_head); - } - list_for_each(act_head, &sb->s_dirty) { - inode = list_entry(act_head, struct inode, i_list); - if (IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, &tofree_head); - } - list_for_each(act_head, &sb->s_io) { - inode = list_entry(act_head, struct inode, i_list); + + list_for_each_entry(inode, &sb->s_inodes) if (IS_QUOTAINIT(inode)) remove_inode_dquot_ref(inode, type, &tofree_head); - } + spin_unlock(&inode_lock); put_dquot_list(&tofree_head); diff -prauN linux-2.6.0-test7-bk4/fs/super.c inode-2.6.0-test7-bk4-1/fs/super.c --- linux-2.6.0-test7-bk4/fs/super.c 2003-10-08 12:24:04.000000000 -0700 +++ inode-2.6.0-test7-bk4-1/fs/super.c 2003-10-13 02:28:22.000000000 -0700 @@ -66,6 +66,7 @@ static struct super_block *alloc_super(v INIT_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); sema_init(&s->s_lock, 1); down_write(&s->s_umount); diff -prauN linux-2.6.0-test7-bk4/include/linux/fs.h inode-2.6.0-test7-bk4-1/include/linux/fs.h --- linux-2.6.0-test7-bk4/include/linux/fs.h 2003-10-08 12:24:03.000000000 -0700 +++ inode-2.6.0-test7-bk4-1/include/linux/fs.h 2003-10-13 02:37:13.000000000 -0700 @@ -369,6 +369,7 @@ struct block_device { struct inode { struct hlist_node i_hash; struct list_head i_list; + struct list_head i_sb_list; struct list_head i_dentry; unsigned long i_ino; atomic_t i_count; @@ -687,6 +688,7 @@ struct super_block { atomic_t s_active; void *s_security; + struct list_head s_inodes; /* all inodes */ struct list_head s_dirty; /* dirty inodes */ struct list_head s_io; /* parked for writeback */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 9:53 ` William Lee Irwin III @ 2003-10-13 10:46 ` Kirill Korotaev 2003-10-13 11:06 ` William Lee Irwin III 2003-10-13 11:08 ` Andrew Morton 1 sibling, 1 reply; 13+ messages in thread From: Kirill Korotaev @ 2003-10-13 10:46 UTC (permalink / raw) To: William Lee Irwin III; +Cc: linux-kernel > Untested brute-force forward port to 2.6.0-test7-bk4. No idea if the > locking is correct or if list movement is done in all needed places. First of all, thanks for forward port. I think this patch is quite usefull for both 2.4 and 2.6 kernels, but I can't check it on all kernels myself. comments: 1. why have you replaced list_del with list_del_init in my patch? It's not required. 2. locks are ok, as for i_list, i_hash lists. 3. Missed +list_add(&inode->i_sb_list, &sb->s_inodes); in get_new_inode_fast() function. Please add it. I looked through, everything else looks ok. [your patch] Kirill ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 10:46 ` Kirill Korotaev @ 2003-10-13 11:06 ` William Lee Irwin III 0 siblings, 0 replies; 13+ messages in thread From: William Lee Irwin III @ 2003-10-13 11:06 UTC (permalink / raw) To: Kirill Korotaev; +Cc: linux-kernel At some point in the past, I wrote: >> Untested brute-force forward port to 2.6.0-test7-bk4. No idea if the >> locking is correct or if list movement is done in all needed places. On Mon, Oct 13, 2003 at 02:46:25PM +0400, Kirill Korotaev wrote: > First of all, thanks for forward port. I think this patch is quite usefull > for both 2.4 and 2.6 kernels, but I can't check it on all kernels myself. > comments: > 1. why have you replaced list_del with list_del_init in my patch? > It's not required. > 2. locks are ok, as for i_list, i_hash lists. > 3. Missed > +list_add(&inode->i_sb_list, &sb->s_inodes); > in get_new_inode_fast() function. Please add it. > I looked through, everything else looks ok. > [your patch] list_del_init() was for consistency with the surrounding ->i_list deletions, hence erring on the side of caution. Amended patch below, addressing point 3. diff -prauN linux-2.6.0-test7-bk4/fs/inode.c inode-2.6.0-test7-bk4-1/fs/inode.c --- linux-2.6.0-test7-bk4/fs/inode.c 2003-10-08 12:24:51.000000000 -0700 +++ inode-2.6.0-test7-bk4-1/fs/inode.c 2003-10-13 03:44:06.000000000 -0700 @@ -285,7 +285,7 @@ static void dispose_list(struct list_hea /* * Invalidate all inodes for a device. */ -static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) +static int invalidate_list(struct list_head *head, struct list_head *dispose) { struct list_head *next; int busy = 0, count = 0; @@ -298,13 +298,11 @@ static int invalidate_list(struct list_h next = next->next; if (tmp == head) break; - inode = list_entry(tmp, struct inode, i_list); - if (inode->i_sb != sb) - continue; + inode = list_entry(tmp, struct inode, i_sb_list); invalidate_inode_buffers(inode); if (!atomic_read(&inode->i_count)) { hlist_del_init(&inode->i_hash); - list_del(&inode->i_list); + list_del(&inode->i_sb_list); list_add(&inode->i_list, dispose); inode->i_state |= I_FREEING; count++; @@ -340,10 +338,7 @@ int invalidate_inodes(struct super_block down(&iprune_sem); spin_lock(&inode_lock); - busy = invalidate_list(&inode_in_use, sb, &throw_away); - busy |= invalidate_list(&inode_unused, sb, &throw_away); - busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); - busy |= invalidate_list(&sb->s_io, sb, &throw_away); + busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); dispose_list(&throw_away); @@ -443,6 +438,7 @@ static void prune_icache(int nr_to_scan) continue; } hlist_del_init(&inode->i_hash); + list_del_init(&inode->i_sb_list); list_move(&inode->i_list, &freeable); inode->i_state |= I_FREEING; nr_pruned++; @@ -553,6 +549,7 @@ struct inode *new_inode(struct super_blo spin_lock(&inode_lock); inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -601,6 +598,7 @@ static struct inode * get_new_inode(stru inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); hlist_add_head(&inode->i_hash, head); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -649,6 +647,7 @@ static struct inode * get_new_inode_fast inode->i_ino = ino; inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); hlist_add_head(&inode->i_hash, head); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -984,6 +983,7 @@ void generic_delete_inode(struct inode * struct super_operations *op = inode->i_sb->s_op; list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -1031,6 +1031,7 @@ static void generic_forget_inode(struct hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -1228,27 +1229,11 @@ void remove_dquot_ref(struct super_block return; /* nothing to do */ spin_lock(&inode_lock); /* This lock is for inodes code */ /* We don't have to lock against quota code - test IS_QUOTAINIT is just for speedup... */ - - list_for_each(act_head, &inode_in_use) { - inode = list_entry(act_head, struct inode, i_list); - if (inode->i_sb == sb && IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, &tofree_head); - } - list_for_each(act_head, &inode_unused) { - inode = list_entry(act_head, struct inode, i_list); - if (inode->i_sb == sb && IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, &tofree_head); - } - list_for_each(act_head, &sb->s_dirty) { - inode = list_entry(act_head, struct inode, i_list); - if (IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, &tofree_head); - } - list_for_each(act_head, &sb->s_io) { - inode = list_entry(act_head, struct inode, i_list); + + list_for_each_entry(inode, &sb->s_inodes) if (IS_QUOTAINIT(inode)) remove_inode_dquot_ref(inode, type, &tofree_head); - } + spin_unlock(&inode_lock); put_dquot_list(&tofree_head); diff -prauN linux-2.6.0-test7-bk4/fs/super.c inode-2.6.0-test7-bk4-1/fs/super.c --- linux-2.6.0-test7-bk4/fs/super.c 2003-10-08 12:24:04.000000000 -0700 +++ inode-2.6.0-test7-bk4-1/fs/super.c 2003-10-13 02:28:22.000000000 -0700 @@ -66,6 +66,7 @@ static struct super_block *alloc_super(v INIT_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); sema_init(&s->s_lock, 1); down_write(&s->s_umount); diff -prauN linux-2.6.0-test7-bk4/include/linux/fs.h inode-2.6.0-test7-bk4-1/include/linux/fs.h --- linux-2.6.0-test7-bk4/include/linux/fs.h 2003-10-08 12:24:03.000000000 -0700 +++ inode-2.6.0-test7-bk4-1/include/linux/fs.h 2003-10-13 02:37:13.000000000 -0700 @@ -369,6 +369,7 @@ struct block_device { struct inode { struct hlist_node i_hash; struct list_head i_list; + struct list_head i_sb_list; struct list_head i_dentry; unsigned long i_ino; atomic_t i_count; @@ -687,6 +688,7 @@ struct super_block { atomic_t s_active; void *s_security; + struct list_head s_inodes; /* all inodes */ struct list_head s_dirty; /* dirty inodes */ struct list_head s_io; /* parked for writeback */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 9:53 ` William Lee Irwin III 2003-10-13 10:46 ` Kirill Korotaev @ 2003-10-13 11:08 ` Andrew Morton 2003-10-13 11:19 ` William Lee Irwin III 1 sibling, 1 reply; 13+ messages in thread From: Andrew Morton @ 2003-10-13 11:08 UTC (permalink / raw) To: William Lee Irwin III; +Cc: kk, linux-kernel William Lee Irwin III <wli@holomorphy.com> wrote: > > Untested brute-force forward port to 2.6.0-test7-bk4. No idea if the > locking is correct or if list movement is done in all needed places. My preferred approach to this would be to move all the global inode lists into the superblock so both they and inode_lock become per-sb. It is a big change though. And, amazingly, nobody has yet hit sufficient inode_lock contention to warrant it. Yes, I bet that this search will hurt like hell on a really big box which has thousands of auto-expiring NFS mounts. Please test your patch and I'll queue it up while we think about it some more. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 11:08 ` Andrew Morton @ 2003-10-13 11:19 ` William Lee Irwin III 2003-10-13 11:45 ` Kirill Korotaev 0 siblings, 1 reply; 13+ messages in thread From: William Lee Irwin III @ 2003-10-13 11:19 UTC (permalink / raw) To: Andrew Morton; +Cc: kk, linux-kernel William Lee Irwin III <wli@holomorphy.com> wrote: >> Untested brute-force forward port to 2.6.0-test7-bk4. No idea if the >> locking is correct or if list movement is done in all needed places. On Mon, Oct 13, 2003 at 04:08:21AM -0700, Andrew Morton wrote: > My preferred approach to this would be to move all the global inode lists > into the superblock so both they and inode_lock become per-sb. > It is a big change though. And, amazingly, nobody has yet hit sufficient > inode_lock contention to warrant it. > Yes, I bet that this search will hurt like hell on a really big box which > has thousands of auto-expiring NFS mounts. Please test your patch and I'll > queue it up while we think about it some more. Generally dcache_lock stands in front of inode_lock, even with the current hashtable RCU code. inode_lock has been seen before in unusual situations I don't remember offhand, though generally it's not #1. The workloads used for, say, benchmark testing don't adequately model situations like what you just mentioned (or a number of other real-life usage cases), so per-sb inode_lock may be worth considering on a priori grounds, though it would probably be better to actually set something up to test that scenario. -- wli ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 11:19 ` William Lee Irwin III @ 2003-10-13 11:45 ` Kirill Korotaev 2003-10-13 11:54 ` William Lee Irwin III 0 siblings, 1 reply; 13+ messages in thread From: Kirill Korotaev @ 2003-10-13 11:45 UTC (permalink / raw) To: William Lee Irwin III, Andrew Morton; +Cc: linux-kernel > William Lee Irwin III <wli@holomorphy.com> wrote: > >> Untested brute-force forward port to 2.6.0-test7-bk4. No idea if the > >> locking is correct or if list movement is done in all needed places. > > On Mon, Oct 13, 2003 at 04:08:21AM -0700, Andrew Morton wrote: > > My preferred approach to this would be to move all the global inode lists > > into the superblock so both they and inode_lock become per-sb. > > It is a big change though. And, amazingly, nobody has yet hit sufficient > > inode_lock contention to warrant it. > > Yes, I bet that this search will hurt like hell on a really big box which > > has thousands of auto-expiring NFS mounts. Please test your patch and > > I'll queue it up while we think about it some more. > > Generally dcache_lock stands in front of inode_lock, even with the > current hashtable RCU code. inode_lock has been seen before in unusual > situations I don't remember offhand, though generally it's not #1. > The workloads used for, say, benchmark testing don't adequately model > situations like what you just mentioned (or a number of other real-life > usage cases), so per-sb inode_lock may be worth considering on a priori > grounds, though it would probably be better to actually set something > up to test that scenario. This patch can be tested quite easily. Main changes are in invalidate_list. This path can be triggered by umount/quota off. So I tested it the following way: 1. mounting/working/umounting partition (and turning quota on/off) 2. running memeater to call prune_icache when partition was mounted to test that lists are ok. All other places should be ok, since simple list_add/list_del is inserted. Kirill ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 11:45 ` Kirill Korotaev @ 2003-10-13 11:54 ` William Lee Irwin III 2003-10-13 12:02 ` Kirill Korotaev 0 siblings, 1 reply; 13+ messages in thread From: William Lee Irwin III @ 2003-10-13 11:54 UTC (permalink / raw) To: Kirill Korotaev; +Cc: Andrew Morton, linux-kernel William Lee Irwin III <wli@holomorphy.com> wrote: >> Generally dcache_lock stands in front of inode_lock, even with the >> current hashtable RCU code. inode_lock has been seen before in unusual >> situations I don't remember offhand, though generally it's not #1. >> The workloads used for, say, benchmark testing don't adequately model >> situations like what you just mentioned (or a number of other real-life >> usage cases), so per-sb inode_lock may be worth considering on a priori >> grounds, though it would probably be better to actually set something >> up to test that scenario. On Mon, Oct 13, 2003 at 03:45:01PM +0400, Kirill Korotaev wrote: > This patch can be tested quite easily. Main changes are in invalidate_list. > This path can be triggered by umount/quota off. > So I tested it the following way: > 1. mounting/working/umounting partition (and turning quota on/off) > 2. running memeater to call prune_icache when partition was mounted > to test that lists are ok. > All other places should be ok, since simple list_add/list_del is inserted. Sorry if I was unclear, I had in mind SMP performance testing of mount and unmount -heavy workloads, like uni setups with many automounted fs's, not stability testing per se. -- wli ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 11:54 ` William Lee Irwin III @ 2003-10-13 12:02 ` Kirill Korotaev 2003-10-13 12:11 ` William Lee Irwin III 0 siblings, 1 reply; 13+ messages in thread From: Kirill Korotaev @ 2003-10-13 12:02 UTC (permalink / raw) To: William Lee Irwin III; +Cc: Andrew Morton, linux-kernel > > This patch can be tested quite easily. Main changes are in > > invalidate_list. This path can be triggered by umount/quota off. > > So I tested it the following way: > > 1. mounting/working/umounting partition (and turning quota on/off) > > 2. running memeater to call prune_icache when partition was mounted > > to test that lists are ok. > > All other places should be ok, since simple list_add/list_del is > > inserted. > > Sorry if I was unclear, I had in mind SMP performance testing of mount > and unmount -heavy workloads, like uni setups with many automounted fs's, > not stability testing per se. Oh, sorry for misunderstanding. In our internal testcase on 8-CPU 8Gb RAM machine with 4gb split kernel w/o this patch mount/umount test longs in many-many (>10) times longer. Moreover, during the test machine is very slow (due to lock_kernel) and typing simple commands takes up to 30 seconds or so. I think such a long hangs are due to number of umounts executed subsequently. But ofcourse it's not numbers, just for you to know where the patch comes from :) Kirill ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 12:02 ` Kirill Korotaev @ 2003-10-13 12:11 ` William Lee Irwin III 2003-10-13 12:21 ` Kirill Korotaev 0 siblings, 1 reply; 13+ messages in thread From: William Lee Irwin III @ 2003-10-13 12:11 UTC (permalink / raw) To: Kirill Korotaev; +Cc: Andrew Morton, linux-kernel At some point in the past, I wrote: >> Sorry if I was unclear, I had in mind SMP performance testing of mount >> and unmount -heavy workloads, like uni setups with many automounted fs's, >> not stability testing per se. On Mon, Oct 13, 2003 at 04:02:20PM +0400, Kirill Korotaev wrote: > Oh, sorry for misunderstanding. > In our internal testcase on 8-CPU 8Gb RAM machine with 4gb split kernel > w/o this patch mount/umount test longs in many-many (>10) times longer. > Moreover, during the test machine is very slow (due to lock_kernel) > and typing simple commands takes up to 30 seconds or so. > I think such a long hangs are due to number of umounts executed > subsequently. But ofcourse it's not numbers, just for you to know where > the patch comes from :) Is this testcase available and/or trivial? Actually, even if it's trivial it might just save us the pain of writing the scripts ourselves. -- wli ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 12:11 ` William Lee Irwin III @ 2003-10-13 12:21 ` Kirill Korotaev 2003-10-13 12:29 ` Nikita Danilov 0 siblings, 1 reply; 13+ messages in thread From: Kirill Korotaev @ 2003-10-13 12:21 UTC (permalink / raw) To: William Lee Irwin III; +Cc: Andrew Morton, linux-kernel > At some point in the past, I wrote: > >> Sorry if I was unclear, I had in mind SMP performance testing of mount > >> and unmount -heavy workloads, like uni setups with many automounted > >> fs's, not stability testing per se. > > On Mon, Oct 13, 2003 at 04:02:20PM +0400, Kirill Korotaev wrote: > > Oh, sorry for misunderstanding. > > In our internal testcase on 8-CPU 8Gb RAM machine with 4gb split kernel > > w/o this patch mount/umount test longs in many-many (>10) times longer. > > Moreover, during the test machine is very slow (due to lock_kernel) > > and typing simple commands takes up to 30 seconds or so. > > I think such a long hangs are due to number of umounts executed > > subsequently. But ofcourse it's not numbers, just for you to know where > > the patch comes from :) > > Is this testcase available and/or trivial? Actually, even if it's trivial > it might just save us the pain of writing the scripts ourselves. no, testcase is not available :( And it uses functionality not available in mainstream kernel. But the problem can be hit with very simple script instead: 1. mount N filesystems. 2. work on them, so that inode cache grows to its maximum possible size (it was 1,000,000 of inodes in our case). 3. umount these filesystems. During operation #3 node is very slow and it is quite noticable on ssh console when typing commands. Kirill ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 12:21 ` Kirill Korotaev @ 2003-10-13 12:29 ` Nikita Danilov 2003-10-13 13:08 ` Kirill Korotaev 0 siblings, 1 reply; 13+ messages in thread From: Nikita Danilov @ 2003-10-13 12:29 UTC (permalink / raw) To: kk; +Cc: William Lee Irwin III, Andrew Morton, linux-kernel Kirill Korotaev writes: > > At some point in the past, I wrote: > > >> Sorry if I was unclear, I had in mind SMP performance testing of mount > > >> and unmount -heavy workloads, like uni setups with many automounted > > >> fs's, not stability testing per se. > > > > On Mon, Oct 13, 2003 at 04:02:20PM +0400, Kirill Korotaev wrote: > > > Oh, sorry for misunderstanding. > > > In our internal testcase on 8-CPU 8Gb RAM machine with 4gb split kernel > > > w/o this patch mount/umount test longs in many-many (>10) times longer. > > > Moreover, during the test machine is very slow (due to lock_kernel) > > > and typing simple commands takes up to 30 seconds or so. > > > I think such a long hangs are due to number of umounts executed > > > subsequently. But ofcourse it's not numbers, just for you to know where > > > the patch comes from :) > > > > Is this testcase available and/or trivial? Actually, even if it's trivial > > it might just save us the pain of writing the scripts ourselves. > no, testcase is not available :( And it uses functionality > not available in mainstream kernel. But the problem can be hit with > very simple script instead: > > 1. mount N filesystems. > 2. work on them, so that inode cache grows to its maximum > possible size (it was 1,000,000 of inodes in our case). > 3. umount these filesystems. > > During operation #3 node is very slow and it is quite noticable > on ssh console when typing commands. This can be due to a number of reasons (worst case behavior of shrink_dcache_parent() for example). What /proc/profile shows? > > Kirill Nikita. > ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Invalidate_inodes can be very slow 2003-10-13 12:29 ` Nikita Danilov @ 2003-10-13 13:08 ` Kirill Korotaev 0 siblings, 0 replies; 13+ messages in thread From: Kirill Korotaev @ 2003-10-13 13:08 UTC (permalink / raw) To: Nikita Danilov; +Cc: linux-kernel > > no, testcase is not available :( And it uses functionality > > not available in mainstream kernel. But the problem can be hit with > > very simple script instead: > > > > 1. mount N filesystems. > > 2. work on them, so that inode cache grows to its maximum > > possible size (it was 1,000,000 of inodes in our case). > > 3. umount these filesystems. > > > > During operation #3 node is very slow and it is quite noticable > > on ssh console when typing commands. > > This can be due to a number of reasons (worst case behavior of > shrink_dcache_parent() for example). What /proc/profile shows? I used cycles measuring patch. It showed that > 50% CPU during the test was spent in invalidate_inodes(). Kirill ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2003-10-13 13:07 UTC | newest] Thread overview: 13+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2003-10-13 9:18 [PATCH] Invalidate_inodes can be very slow Kirill Korotaev 2003-10-13 9:53 ` William Lee Irwin III 2003-10-13 10:46 ` Kirill Korotaev 2003-10-13 11:06 ` William Lee Irwin III 2003-10-13 11:08 ` Andrew Morton 2003-10-13 11:19 ` William Lee Irwin III 2003-10-13 11:45 ` Kirill Korotaev 2003-10-13 11:54 ` William Lee Irwin III 2003-10-13 12:02 ` Kirill Korotaev 2003-10-13 12:11 ` William Lee Irwin III 2003-10-13 12:21 ` Kirill Korotaev 2003-10-13 12:29 ` Nikita Danilov 2003-10-13 13:08 ` Kirill Korotaev
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.