All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Mason <mason@suse.com>
To: Manuel Krause <manuelkrause@netscape.net>
Cc: reiserfs-list <reiserfs-list@namesys.com>
Subject: Re: udpated data logging available
Date: 01 Jul 2003 21:44:21 -0400	[thread overview]
Message-ID: <1057110260.20904.878.camel@tiny.suse.com> (raw)
In-Reply-To: <3F021C41.9090100@netscape.net>

[-- Attachment #1: Type: text/plain, Size: 1860 bytes --]

On Tue, 2003-07-01 at 19:41, Manuel Krause wrote:
>  
> > Does the search_reada-4 contradict the new code or is it even dangerous
> > to combine them (what I luckily didn't trigger so far)?
> > 
> > Thanks,
> > 
> >  Manuel
> 
> No answer needed so far upon search_reada-4 ?!?!
> 

Sorry, I've been doing some final testing on search_reada-5, which is
attached.  It doesn't help quite as much as search_reada-4, but it also
doesn't hurt the random io case anywhere near as badly.  It tries to be
smarter about only doing read ahead for the same object you are
searching for.  

I'll upload in the morning.

> If I may remind, that only patch brought 2.4.20+  +reiserfs
> +data-logging to the high throughput values of 2.4.19 +reiserfs
> +data-logging when copying my backup partition (around 5GB) via cp.
> 
> 
> 
> O.K. -- The new (experimental) patches run fine on all my previous
> simple test patterns _with_ search_reada-4 (cp my backup-partitions,
> home usage with NS 7.1 and OOo 1.1betas; VMware 3.2.1 sessions with
> defrag/SpeedDisk in Win98) with 2.4.21 +data-logging +rml-preempt-kernel.
> 
> I didn't post definite timings upon my data as using the first new
> experimental data-logging patches led to a throughput/speed improvement
> of 3% only (compared to without exp patches) what is within in the
> typical fluctuation (copying via cp). And I avoided testing without
> search_reada so far, for the reason of needed retesting back to 2.4.19
> (disk content changed).
> So, at least, I can say "It didn't get slower - but may be a bit faster
> or even another bit more. - Depends..."
> 

Most of the improvement comes in fsync heavy workloads.  The
data=ordered io is a little smoother as well, for better latencies in
general.

> 
> Many thanks, your work is great indeed !
> 

Thanks for your continued tests, they are very helpful.

-chris


[-- Attachment #2: search_reada-5.diff --]
[-- Type: text/plain, Size: 3648 bytes --]

===== fs/reiserfs/stree.c 1.22 vs edited =====
--- 1.22/fs/reiserfs/stree.c	Mon Jun 30 12:45:49 2003
+++ edited/fs/reiserfs/stree.c	Mon Jun 30 13:33:02 2003
@@ -598,26 +598,32 @@
 
 
 
-#ifdef SEARCH_BY_KEY_READA
+#define SEARCH_BY_KEY_READA 8
 
 /* The function is NOT SCHEDULE-SAFE! */
-static void search_by_key_reada (struct super_block * s, int blocknr)
+static void search_by_key_reada (struct super_block * s, 
+                                 struct buffer_head **bh, 
+				 unsigned long *b, int num)
 {
-    struct buffer_head * bh;
+    int i,j;
   
-    if (blocknr == 0)
-	return;
-
-    bh = getblk (s->s_dev, blocknr, s->s_blocksize);
-  
-    if (!buffer_uptodate (bh)) {
-	ll_rw_block (READA, 1, &bh);
+    for (i = 0 ; i < num ; i++) {
+	bh[i] = sb_getblk (s, b[i]);
+	if (buffer_uptodate(bh[i])) {
+	    brelse(bh[i]);
+	    break;
+	}
+	touch_buffer(bh[i]);
+    } 
+    if (i) {
+	ll_rw_block(READA, i, bh);
+    }
+    for(j = 0 ; j < i ; j++) {
+        if (bh[j])
+	    brelse(bh[j]);
     }
-    bh->b_count --;
 }
 
-#endif
-
 /**************************************************************************
  * Algorithm   SearchByKey                                                *
  *             look for item in the Disk S+Tree by its key                *
@@ -660,6 +666,9 @@
     int				n_node_level, n_retval;
     int 			right_neighbor_of_leaf_node;
     int				fs_gen;
+    struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
+    unsigned long      reada_blocks[SEARCH_BY_KEY_READA];
+    int reada_count = 0;
 
 #ifdef CONFIG_REISERFS_CHECK
     int n_repeat_counter = 0;
@@ -696,11 +705,11 @@
 	fs_gen = get_generation (p_s_sb);
 	expected_level --;
 
-#ifdef SEARCH_BY_KEY_READA
-	/* schedule read of right neighbor */
-	search_by_key_reada (p_s_sb, right_neighbor_of_leaf_node);
-#endif
-
+	/* schedule read of right neighbors */
+	if (reada_count) {
+	    search_by_key_reada (p_s_sb, reada_bh, reada_blocks, reada_count);
+	    reada_count = 0;
+	}
 	/* Read the next tree node, and set the last element in the path to
            have a pointer to it. */
 	if ( ! (p_s_bh = p_s_last_element->pe_buffer =
@@ -787,12 +796,37 @@
 	   an internal node.  Now we calculate child block number by
 	   position in the node. */
 	n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
-
-#ifdef SEARCH_BY_KEY_READA
-	/* if we are going to read leaf node, then calculate its right neighbor if possible */
-	if (n_node_level == DISK_LEAF_NODE_LEVEL + 1 && p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh))
-	    right_neighbor_of_leaf_node = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position + 1);
-#endif
+	
+	/* if we are going to read leaf nodes, try for read ahead as well */
+	if (n_node_level == DISK_LEAF_NODE_LEVEL + 1 && 
+	    p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh) &&
+	    !is_direct_cpu_key(p_s_key) && 
+	    !is_statdata_cpu_key(p_s_key))
+	{
+	    int pos = p_s_last_element->pe_position;
+	    int limit = B_NR_ITEMS(p_s_bh);
+	    struct buffer_head *tmp_bh;
+	    struct key *le_key;
+
+	    /* don't try to readahead if the leaf is already
+	     * in ram.  get_hash_table doesn't schedule, so this
+	     * is safe
+	     */
+	    tmp_bh = sb_get_hash_table(p_s_sb, n_block_number);
+	    if (tmp_bh) {
+	        brelse(tmp_bh);
+		continue;
+	    }
+	    while(pos <= limit && reada_count < SEARCH_BY_KEY_READA) { 
+		le_key = B_N_PDELIM_KEY(p_s_bh, pos);
+		if (le32_to_cpu(le_key->k_objectid) != 
+		    p_s_key->on_disk_key.k_objectid)
+		{
+		    break;
+		}
+	        reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos++);
+	    }
+        }
     }
 }
 

  reply	other threads:[~2003-07-02  1:44 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-06-16 11:47 udpated data logging available Chris Mason
2003-06-18 13:56 ` Chris Mason
2003-06-23  2:45   ` Chris Mason
2003-06-23 16:53     ` Christian Mayrhuber
2003-06-25 19:15       ` Chris Mason
2003-06-26  0:16         ` Christian Mayrhuber
2003-06-26  1:47           ` Chris Mason
2003-06-26 11:42             ` Dieter Nützel
2003-06-26 12:53               ` Chris Mason
2003-06-26 13:36                 ` Manuel Krause
2003-07-01 23:41                   ` Manuel Krause
2003-07-02  1:44                     ` Chris Mason [this message]
2003-06-26 17:19                 ` Dieter Nützel
2003-07-02  0:46               ` Manuel Krause
2003-07-02  1:46                 ` Chris Mason
2003-06-26 11:48             ` Dieter Nützel
2003-06-26 12:18               ` Philippe Gramoullé
2003-06-26 12:35                 ` Dieter Nützel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1057110260.20904.878.camel@tiny.suse.com \
    --to=mason@suse.com \
    --cc=manuelkrause@netscape.net \
    --cc=reiserfs-list@namesys.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.