public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Significant performace improvements on reiserfs systems, kupdated bugfixes
@ 2001-09-20  7:12 Beau Kuiper
  2001-09-20 12:16 ` Chris Mason
  2001-09-21 13:26 ` Matthias Andree
  0 siblings, 2 replies; 14+ messages in thread
From: Beau Kuiper @ 2001-09-20  7:12 UTC (permalink / raw)
  To: linux-kernel; +Cc: tovarlds

Hi,

Resierfs on 2.4 has always been bog slow.

I have identified kupdated as the culprit, and have 3 patches that fix the
peformance problems I have had been suffering.

I would like these patches to be reviewed an put into the mainline kernel
so that others can testthe changes.

Patch 1.

This patch fixes reiserfs to use the kupdated code path when told to
resync its super block, like it did in 2.2.19. This is the culpit for bad
reiserfs performace in 2.4. Unfortunately, this fix relies on the second
patch to work properly.

Patch 2

This patch implements a simple mechinism to ensure that each superblock
only gets told to be flushed once. With reiserfs and the first patch, the
superblock is still dirty after being told to sync (probably becasue it
doesn't want to write out the entire journal every 5 seconds when kupdate
calls it). This caused an infinite loop because sync_supers would
always find the reiserfs superblock dirty when called from kupdated. I am
not convinced that this patch is the best one for this problem
(suggestions?)

Patch 3

This patch was generated as I was exploring the buffer cache, wondering
why reiserfs was so slow on 2.4. I found that kupdated may write buffers
that are not actually old back to disk. Eg

Imagine that there are 20 dirty buffers. 16 of them are more that 30
seconds old (and should be written back), but the other 4 are younger than
30 seconds. The current code would force all 20 out to disk, interrupting
programs still using the young 4 until the disk write was complete.

I know that it isn't a major problem, but I found it and I have written
the patch for it :-)

Please try out these patches and give comments about style, performace
ect. They fixed my problems, sliced almost a minute off 2.2.19 kernel
compile time on my duron 700 (from 4min 30sec to 3min 45sec)

Thanks
Beau Kuiper
kuib-kl@ljbc.wa.edu.au

------------------------- PATCH 1:

--- linux-2.4.10pre11/fs/reiserfs/journal.c	Fri Sep 21 14:35:25 2001
+++ linux/fs/reiserfs/journal.c	Fri Sep 21 11:04:36 2001
@@ -2388,7 +2388,7 @@
   if (SB_JOURNAL_LIST_INDEX(p_s_sb) < 0) {
     return 0  ;
   }
-  if (!strcmp(current->comm, "kupdate")) {
+  if (!strcmp(current->comm, "kupdated")) {
     immediate = 0 ;
     keep_dirty = 1 ;
   }

------------------------- PATCH 2:

--- linux-2.4.10pre11/fs/super.c	Fri Sep 21 14:35:25 2001
+++ linux/fs/super.c	Fri Sep 21 12:15:12 2001
@@ -708,11 +708,14 @@
 		return;
 	}
 restart:
+	// since reiserfs does not garrentee super is not dirty (journal may
+	// be dirty still with kupdated), have to do this the hard way
 	spin_lock(&sb_lock);
 	sb = sb_entry(super_blocks.next);
 	while (sb != sb_entry(&super_blocks))
-		if (sb->s_dirt) {
+		if (sb->s_dirt && !(sb->s_flushed)) {
 			sb->s_count++;
+			sb->s_flushed = 1;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			write_super(sb);
@@ -720,6 +723,16 @@
 			goto restart;
 		} else
 			sb = sb_entry(sb->s_list.next);
+
+	// now unflush all supers
+
+	sb = sb_entry(super_blocks.next);
+	while (sb != sb_entry(&super_blocks))
+	{
+		sb->s_flushed = 0;
+		sb = sb_entry(sb->s_list.next);
+	}
+
 	spin_unlock(&sb_lock);
 }

@@ -805,6 +818,7 @@
 		sema_init(&s->s_dquot.dqio_sem, 1);
 		sema_init(&s->s_dquot.dqoff_sem, 1);
 		s->s_maxbytes = MAX_NON_LFS;
+		s->s_flushed = 0;
 	}
 	return s;
 }
--- linux-2.4.10pre11/include/linux/fs.h	Fri Sep 21 14:35:31 2001
+++ linux/include/linux/fs.h	Fri Sep 21 12:11:49 2001
@@ -689,6 +689,7 @@
 	unsigned long		s_blocksize;
 	unsigned char		s_blocksize_bits;
 	unsigned char		s_dirt;
+	unsigned char		s_flushed;
 	unsigned long long	s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	struct super_operations	*s_op;

----------------------- PATCH 3

--- linux-2.4.10pre11/fs/buffer.c	Fri Sep 21 14:35:23 2001
+++ linux/fs/buffer.c	Fri Sep 21 12:05:06 2001
@@ -201,7 +201,7 @@
  * return without it!
  */
 #define NRSYNC (32)
-static int write_some_buffers(kdev_t dev)
+static int write_some_buffers(kdev_t dev, int oldonly)
 {
 	struct buffer_head *next;
 	struct buffer_head *array[NRSYNC];
@@ -217,6 +217,8 @@

 		if (dev && bh->b_dev != dev)
 			continue;
+		if (oldonly && time_before(jiffies, bh->b_flushtime))
+			break;
 		if (test_and_set_bit(BH_Lock, &bh->b_state))
 			continue;
 		if (atomic_set_buffer_clean(bh)) {
@@ -247,7 +249,7 @@
 {
 	do {
 		spin_lock(&lru_list_lock);
-	} while (write_some_buffers(dev));
+	} while (write_some_buffers(dev, 0));
 	run_task_queue(&tq_disk);
 }

@@ -1239,7 +1241,7 @@

 	/* If we're getting into imbalance, start write-out */
 	spin_lock(&lru_list_lock);
-	write_some_buffers(NODEV);
+	write_some_buffers(NODEV, 0);

 	/*
 	 * And if we're _really_ out of balance, wait for
@@ -2768,7 +2770,7 @@
 		bh = lru_list[BUF_DIRTY];
 		if (!bh || time_before(jiffies, bh->b_flushtime))
 			break;
-		if (write_some_buffers(NODEV))
+		if (write_some_buffers(NODEV, 1))
 			continue;
 		return 0;
 	}
@@ -2867,7 +2869,7 @@
 		CHECK_EMERGENCY_SYNC

 		spin_lock(&lru_list_lock);
-		if (!write_some_buffers(NODEV) || balance_dirty_state() < 0) {
+		if (!write_some_buffers(NODEV, 0) || balance_dirty_state() < 0) {
 			wait_for_some_buffers(NODEV);
 			interruptible_sleep_on(&bdflush_wait);
 		}



^ permalink raw reply	[flat|nested] 14+ messages in thread
* Re: [PATCH] Significant performace improvements on reiserfs systems, kupdated bugfixes
@ 2001-09-20 17:23 Chris Mason
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Mason @ 2001-09-20 17:23 UTC (permalink / raw)
  To: Beau Kuiper; +Cc: linux-kernel



On Thursday, September 20, 2001 11:20:26 PM +0800 Beau Kuiper <kuib-kl@ljbc.wa.edu.au> wrote:

> I don't think that this could happen until 2.5.x though, as either
> solution touches every file system. However, if we added an extra methed,
> we could do this while only slightly touching the other filesystems (where
> kupdated sync == real sync) Simply see if the method exists (is non-null)
> and call that instead with a kupdate sync instead of the normal
> super_sync. Are you interested in me writing a patch to do this?

If the patch below doesn't fix things for you, I've got another one
that adds a new super_operation.

> 
> 
>> 
>> It is possible to get almost the same behaviour as 2.2.x by changing the
>> metadata sync interval in bdflush to 30 seconds.
>> 
> 
> But then kupdate doesn't flush normal data as regularly as it should, plus
> it is almost as messy as Patch 1 :-)

Tuning through /proc/sys/vm/bdflush is much nicer than doing a
strcmp on the current process name.

The real problem is probably the way commits are done.  When there 
are other FS writers, the commits are done outside the journal
lock, but if you hit things at just the wrong time, you might
trigger writing the commit blocks inside the lock, which slows
things down.  The patch below tries harder to keep log block io outside
the journal lock.

This way you can play with the kupdate interval to find the right
balance for your workload.

> 
>> > 
>> > Patch 2
>> > 
>> > This patch implements a simple mechinism to ensure that each superblock
>> > only gets told to be flushed once. With reiserfs and the first patch,
>> > the superblock is still dirty after being told to sync (probably
>> > becasue it doesn't want to write out the entire journal every 5
>> > seconds when kupdate calls it). This caused an infinite loop because
>> > sync_supers would always find the reiserfs superblock dirty when
>> > called from kupdated. I am not convinced that this patch is the best
>> > one for this problem (suggestions?)
>> 
>> It is ok to leave the superblock dirty, after all, since the commit
>> wasn't done, the super is still dirty.  If the checks from
>> reiserfs_write_super are actually slowing things down, then it is
>> probably best to fix the checks.
> 
> I meant, there might be better wway to prevent the endless loop than
> adding an extra field to the superblock data structure. I beleive (I
> havn't explored reiserfs code much) the slowdown is caused by the journal
> being synced with the superblock, thus causing:

The dirty bit is cleared by reiserfs_write_super when something has
actually been written to disk.  When reiserfs_write_super doesn't
do the commit, it leaves the super dirty, which is important because
sys_sync and other people rely on the dirty bit.

> 
> 1) Too much contention for disk resources.
> 2) A huge increase in the number of times programs must be suspended to
> wait for the disk
> 3) Poor CPU utilization in code that uses the filesystem regularly (like
> compiling)

The way to avoid these is to not call write_super until you really
want to see things on disk ;-)

> Patch 1 makes a huge difference because it stops reiserfs from reacting
> badly on a kupdated.

-chris

--- linux/fs/reiserfs/journal.c	Sat Sep  8 08:05:32 2001
+++ linux/fs/reiserfs/journal.c	Thu Sep 20 13:15:04 2001
@@ -2872,17 +2872,12 @@
   /* write any buffers that must hit disk before this commit is done */
   fsync_inode_buffers(&(SB_JOURNAL(p_s_sb)->j_dummy_inode)) ;
 
-  /* honor the flush and async wishes from the caller */
+  /* honor the flush wishes from the caller, simple commits can
+  ** be done outside the journal lock, they are done below
+  */
   if (flush) {
-  
     flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
     flush_journal_list(p_s_sb,  SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 1) ;  
-  } else if (commit_now) {
-    if (wait_on_commit) {
-      flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
-    } else {
-      commit_flush_async(p_s_sb, orig_jindex) ; 
-    }
   }
 
   /* reset journal values for the next transaction */
@@ -2944,6 +2939,16 @@
   atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
   /* wake up any body waiting to join. */
   wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+  
+  if (!flush && commit_now) {
+    if (current->need_resched)
+      schedule() ;
+    if (wait_on_commit) {
+      flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
+    } else {
+      commit_flush_async(p_s_sb, orig_jindex) ; 
+    }
+  }
   return 0 ;
 }
 




^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2001-09-24 14:37 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2001-09-20  7:12 [PATCH] Significant performace improvements on reiserfs systems, kupdated bugfixes Beau Kuiper
2001-09-20 12:16 ` Chris Mason
2001-09-20 15:20   ` Beau Kuiper
2001-09-20 16:15     ` Andreas Dilger
2001-09-20 16:22       ` Beau Kuiper
2001-09-21 20:12         ` Lehmann 
2001-09-21 20:57           ` Rik van Riel
2001-09-21 22:50             ` Lehmann 
2001-09-21 13:26 ` Matthias Andree
2001-09-22 18:38   ` Beau Kuiper
2001-09-22 19:32     ` Matthias Andree
2001-09-23 15:24       ` Chris Mason
2001-09-24 14:37   ` Stephen C. Tweedie
  -- strict thread matches above, loose matches on Subject: below --
2001-09-20 17:23 Chris Mason

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox